本篇文章主要介绍了python3百度指数抓取,小编觉得挺不错的,现在分享给大家,也给大家做个参考。一起跟随小编过来看看吧。
百度指数抓取,再用图像识别得到指数
前言:
土福曾说,百度指数很难抓,在淘宝上面是20块1个关键字:
哥那么叼的人怎么会被他吓到,于是乎花了零零碎碎加起来大约2天半搞定,在此鄙视一下土福
安装的库很多:
谷歌图像识别tesseract-ocrpip3 install pillowpip3 install pyocrselenium2.45Chrome47.0.2526.106 m or Firebox32.0.1chromedriver.exe
进入百度指数需要登陆,登陆的账号密码写在文本account里面:
万能登陆代码如下:
# 打开浏览器def openbrowser(): global browser # http://www.gaodaima.com/ url = "https://passport.baidu.com/v2/?login&tpl=mn&u=http%3A%2F%2Fwww.baidu.com%2F" # 打开谷歌浏览器 # Firefox() # Chrome() browser = webdriver.Chrome() # 输入网址 browser.get(url) # 打开浏览器时间 # print("等待10秒打开浏览器...") # time.sleep(10) # 找到id="TANGRAM__PSP_3__userName"的对话框 # 清空输入框 browser.find_element_by_id("TANGRAM__PSP_3__userName").clear() browser.find_element_by_id("TANGRAM__PSP_3__password").clear() # 输入账号密码 # 输入账号密码 account = [] try: fileaccount = open("../baidu/account.txt") accounts = fileaccount.readlines() for acc in accounts: account.append(acc.strip()) fileaccount.close() except Exception as err: print(err) input("请正确在account.txt里面写入账号密码") exit() browser.find_element_by_id("TANGRAM__PSP_3__userName").send_keys(account[0]) browser.find_element_by_id("TANGRAM__PSP_3__password").send_keys(account[1]) # 点击登陆登陆 # id="TANGRAM__PSP_3__submit" browser.find_element_by_id("TANGRAM__PSP_3__submit").click() # 等待登陆10秒 # print('等待登陆10秒...') # time.sleep(10) print("等待网址加载完毕...") select = input("请观察浏览器网站是否已经登陆(y/n):") while 1: if select == "y" or select == "Y": print("登陆成功!") print("准备打开新的窗口...") # time.sleep(1) # browser.quit() break elif select == "n" or select == "N": selectno = input("账号密码错误请按0,验证码出现请按1...") # 账号密码错误则重新输入 if selectno == "0": # 找到id="TANGRAM__PSP_3__userName"的对话框 # 清空输入框 browser.find_element_by_id("TANGRAM__PSP_3__userName").clear() browser.find_element_by_id("TANGRAM__PSP_3__password").clear() # 输入账号密码 account = [] try: fileaccount = open("../baidu/account.txt") accounts = fileaccount.readlines() for acc in accounts: account.append(acc.strip()) fileaccount.close() except Exception as err: print(err) input("请正确在account.txt里面写入账号密码") exit() browser.find_element_by_id("TANGRAM__PSP_3__userName").send_keys(account[0]) browser.find_element_by_id("TANGRAM__PSP_3__password").send_keys(account[1]) # 点击登陆sign in # id="TANGRAM__PSP_3__submit" <b style="color:transparent">本文来源gao@!dai!ma.com搞$$代^@码!网!</b> browser.find_element_by_id("TANGRAM__PSP_3__submit").click() elif selectno == "1": # 验证码的id为id="ap_captcha_guess"的对话框 input("请在浏览器中输入验证码并登陆...") select = input("请观察浏览器网站是否已经登陆(y/n):") else: print("请输入“y”或者“n”!") select = input("请观察浏览器网站是否已经登陆(y/n):")