import requests
import sys
import time
import pandas as pd
import numpy as np
import datetime
word_url = 'http://index.baidu.com/api/SearchApi/thumbnail?area=0&word={}'
# word_url1 = f'http://index.baidu.com/api/SearchApi/thumbnail?area=0&word=[[%7B%22name%22:%22{}%22,%22wordType%22:1%7D]]'
cookieS = 'BIDUPSID=6C34DA33F329ACF74270250DDA77C712; PSTM=1589523676; BAIDUID=BB5A781560A929325CCF14881D661AB4:FG=1; BDUSS=9GWlVwV2UyaFRqWjhqYlVOU1ZWSnNEOFBEbFpqbkg1fkoxOFlMY2FOcUFPT2RlRVFBQUFBJCQAAAAAAAAAAAEAAADUnXqaY2doaGhjZ2hoAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAICrv16Aq79eV0; BDORZ=FFFB88E999055A3F8A630C64834BD6D0; H_PS_PSSID=; Hm_lvt_d101ea4d2a5c67dab98251f0b5de24dc=1591187994,1591610816; bdindexid=rfj4nvkpb8il9sl3ii6sm40tv2; Hm_lpvt_d101ea4d2a5c67dab98251f0b5de24dc=1591610824; delPer=0; PSINO=2; BDRCVFR[1kRcOFa5hin]=mk3SLVN4HKm; RT="sl=0&ss=kb6da8yx&tt=0&bcn=https%3A%2F%2Ffclog.baidu.com%2Flog%2Fweirwood%3Ftype%3Dperf&z=1&dm=baidu.com&si=9bzana5mhzs&ld=9qm&ul=9xgx"'
def decrypt(t, e):
n = list(t)
i = list(e)
a = {}
result = []
ln = int(len(n) / 2)
start = n[ln:]
end = n[:ln]
for j, k in zip(start, end):
a.update({k: j})
for j in e:
result.append(a.get(j))
return ''.join(result)
def get_index_home(keyword):
'cookie': cookieS
word_url = f'http://index.baidu.com/api/SearchApi/thumbnail?area=0&word=[[%7B%22name%22:%22{keyword}%22,%22wordType%22:1%7D]]'
resp = requests.get(word_url, headers=headers)
j = resp.json()
print(j)
uniqid = j.get('data').get('uniqid')
return get_ptbk(uniqid)
def get_ptbk(uniqid):
url = 'http://index.baidu.com/Interface/ptbk?uniqid={}'
ptbk_headers = {
resp = requests.get(url.format(uniqid), headers=ptbk_headers)
print('获取uniqid失败')
return resp.json().get('data')
def get_index_data(keyword, start='2012-01-01', end='2012-12-31'):
url = f'http://index.baidu.com/api/SugApi/sug?inputword[]={keyword}&area=0&startDate={start}&endDate={end}'
word_param = f'[[%7B"name":"{keyword}","wordType":1%7D]]'
url1 = f'http://index.baidu.com/api/SearchApi/index?area=0&word={word_param}&startDate={start}&endDate={end}'
print(url1 + " ")
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.90 Safari/537.36',
resp = requests.get(url1, headers=headers)
print('获取指数失败')
======= json ======# print(resp.json())
data = resp.json().get('data').get('userIndexes')[0]
uniqid = resp.json().get('data').get('uniqid')
======= data ======# print(data)
#
======= uniqid ======# print(uniqid)
ptbk = get_ptbk(uniqid)
# print(ptbk)
# while ptbk is None or ptbk == '':
# ptbk = get_index_home(uniqid)
all_data = data.get('all').get('data')
result = decrypt(ptbk, all_data)
result = result.split(',')
======= result ======# print(result)
return result
def demo():
data = get_index_data(keyword='酷安')
print(data)
以上就是本篇文章【python爬关键词百度指数_Python爬虫 - 抓取百度指数】的全部内容了,欢迎阅览 ! 文章地址:http://dfvalve.xrbh.cn/quote/670.html 行业 资讯 企业新闻 行情 企业黄页 同类资讯 网站地图 返回首页 迅博思语资讯移动站 http://keant.xrbh.cn/ , 查看更多