python login and crawl taobao information code example
- 2020-06-15 09:38:47
- OfStack
This article mainly shares the relevant code about python login and crawls taobao information, which is quite good, you can understand.
#!/usr/bin/env python
# -*- coding:utf-8 -*-
from selenium import webdriver
import time
import datetime
import traceback
import logging
import os
from selenium.webdriver.common.action_chains import ActionChains
import codecs
# The login
def login(driver,site):
driver.get(site)
time.sleep(5)
try:
# Click to login
driver.find_element_by_class_name("h").click()
time.sleep(5)
# Enter your account number and password
driver.find_element_by_id("TPL_username_1").send_keys(u"yourusername")
time.sleep(5)
#print driver.find_element_by_id("TPL_username_1")
driver.find_element_by_id("TPL_password_1").send_keys(u"yourpsd")
time.sleep(5)
# Click login
driver.find_element_by_id("J_SubmitStatic").click()
time.sleep(30)
except:
print u"failure"
def crawlmarket(driver,filename,site):
#driver = webdriver.Firefox()
driver.get(site)
driver.maximize_window()
time.sleep(10)
driver.refresh()
time.sleep(10)
test = driver.find_elements_by_xpath("//a[@class='J_ItemLink']")
# Whether the message was retrieved , If not, log in
if len(test)==0:
login(driver,site)
time.sleep(30)
resultstrall=""
resultstr=""
strinfo =""
for i in range(0,len(test),1):
if test[i].text != "" :
resultstr = test[i].text.strip()+'\n'
print resultstr
resultstrall += resultstr
# Whether the grab is successful
if resultstrall !="":
f = codecs.open(filename,'w','utf-8')
f.write(resultstrall)
f.close()
# If it does not crawl to write to the site error
else:
strinfo = filename+","+site
print strinfo
ferror = codecs.open("error.txt",'a','utf-8')
ferror.write(strinfo)
ferror.close()
driver.quit()
def crawltaobaosousuo(driver,filename,site):
#driver = webdriver.Firefox()
driver.get(site)
driver.maximize_window()
time.sleep(10)
driver.get(site)
time.sleep(30)
driver.refresh()
test = driver.find_elements_by_xpath("//a[@class='J_ClickStat']")
resultstrall=""
resultstr=""
strinfo =""
for i in range(0,len(test),1):
if test[i].text != "" :
resultstr = test[i].text.strip()+'\n'
print resultstr
resultstrall += resultstr
if resultstrall !="":
f = codecs.open(filename,'w','utf-8')
f.write(resultstrall)
f.close()
else:
strinfo = filename+","+site
print strinfo
ferror = codecs.open("error.txt",'a','utf-8')
ferror.write(strinfo)
ferror.close()
driver.quit()
def jiexi(driver):
f = open("1.txt","r")
for line in f:
time.sleep(60)
info = line.split(",")
href = info[1]
filename = info[0].decode("utf-8")
print filename
if "markets" in href:
crawlmarket(driver,filename,href)
else:
crawltaobaosousuo(driver,filename,href)
if __name__ =='__main__':
driver = webdriver.Firefox()
jiexi(driver)
summary
There is an improvement strategy 1 to discuss, you can grab part of taobao web page content, according to their own needs to change, will be risk control. Personally, I think it's better not to log in.
The above is the entire content of this article on python login and crawl Taobao information code example, hope to help you. Those who are interested can continue to see other related topics on this site. If there is any deficiency, please let me know. Thank you for your support!