热门推荐
python爬关键词百度指数_Python爬虫 - 抓取百度指数
2024-10-31 19:54

import requests

python爬关键词百度指数_Python爬虫 - 抓取百度指数

import sys

import time

import pandas as pd

import numpy as np

import datetime

word_url = 'http://index.baidu.com/api/SearchApi/thumbnail?area=0&word={}'

# word_url1 = f'http://index.baidu.com/api/SearchApi/thumbnail?area=0&word=[[%7B%22name%22:%22{}%22,%22wordType%22:1%7D]]'

cookieS = 'BIDUPSID=6C34DA33F329ACF74270250DDA77C712; PSTM=1589523676; BAIDUID=BB5A781560A929325CCF14881D661AB4:FG=1; BDUSS=9GWlVwV2UyaFRqWjhqYlVOU1ZWSnNEOFBEbFpqbkg1fkoxOFlMY2FOcUFPT2RlRVFBQUFBJCQAAAAAAAAAAAEAAADUnXqaY2doaGhjZ2hoAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAICrv16Aq79eV0; BDORZ=FFFB88E999055A3F8A630C64834BD6D0; H_PS_PSSID=; Hm_lvt_d101ea4d2a5c67dab98251f0b5de24dc=1591187994,1591610816; bdindexid=rfj4nvkpb8il9sl3ii6sm40tv2; Hm_lpvt_d101ea4d2a5c67dab98251f0b5de24dc=1591610824; delPer=0; PSINO=2; BDRCVFR[1kRcOFa5hin]=mk3SLVN4HKm; RT="sl=0&ss=kb6da8yx&tt=0&bcn=https%3A%2F%2Ffclog.baidu.com%2Flog%2Fweirwood%3Ftype%3Dperf&z=1&dm=baidu.com&si=9bzana5mhzs&ld=9qm&ul=9xgx"'

def decrypt(t, e):

n = list(t)

i = list(e)

a = {}

result = []

ln = int(len(n) / 2)

start = n[ln:]

end = n[:ln]

for j, k in zip(start, end):

a.update({k: j})

for j in e:

result.append(a.get(j))

return ''.join(result)

def get_index_home(keyword):

'cookie': cookieS

word_url = f'http://index.baidu.com/api/SearchApi/thumbnail?area=0&word=[[%7B%22name%22:%22{keyword}%22,%22wordType%22:1%7D]]'

resp = requests.get(word_url, headers=headers)

j = resp.json()

print(j)

uniqid = j.get('data').get('uniqid')

return get_ptbk(uniqid)

def get_ptbk(uniqid):

url = 'http://index.baidu.com/Interface/ptbk?uniqid={}'

ptbk_headers = {

resp = requests.get(url.format(uniqid), headers=ptbk_headers)

print('获取uniqid失败')

return resp.json().get('data')

def get_index_data(keyword, start='2012-01-01', end='2012-12-31'):

url = f'http://index.baidu.com/api/SugApi/sug?inputword[]={keyword}&area=0&startDate={start}&endDate={end}'

word_param = f'[[%7B"name":"{keyword}","wordType":1%7D]]'

url1 = f'http://index.baidu.com/api/SearchApi/index?area=0&word={word_param}&startDate={start}&endDate={end}'

print(url1 + " ")

'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.90 Safari/537.36',

resp = requests.get(url1, headers=headers)

print('获取指数失败')

======= json ======

# print(resp.json())

data = resp.json().get('data').get('userIndexes')[0]

uniqid = resp.json().get('data').get('uniqid')

======= data ======

# print(data)

#

======= uniqid ======

# print(uniqid)

ptbk = get_ptbk(uniqid)

# print(ptbk)

# while ptbk is None or ptbk == '':

# ptbk = get_index_home(uniqid)

all_data = data.get('all').get('data')

result = decrypt(ptbk, all_data)

result = result.split(',')

======= result ======

# print(result)

return result

def demo():

data = get_index_data(keyword='酷安')

print(data)

    以上就是本篇文章【python爬关键词百度指数_Python爬虫 - 抓取百度指数】的全部内容了,欢迎阅览 ! 文章地址:http://dfvalve.xrbh.cn/quote/670.html 
     行业      资讯      企业新闻      行情      企业黄页      同类资讯      网站地图      返回首页 迅博思语资讯移动站 http://keant.xrbh.cn/ , 查看更多