序言:
首先感谢两篇文章的作者【项目小结】爬虫学习进阶:获取百度指数历史数据_%ool-CSDN博客
Python爬虫|百度指数爬虫项目(简易版)_百度指数爬虫 python 代码-CSDN博客
附上代码:
import json import os import pandas as pd import requests from openpyxl import Workbook import time #读取想要了解数据的股票名称 qiye_name=[] df = pd.read_excel('企业选择.xlsx', sheet_name='读取列') shape = df.shape # print(shape) name_column = df['股票名称'] my_string=name_column[i].strip() qiye_name.append(my_string) # 创建一个新的 Excel 工作簿 wb = Workbook() # 获取默认的工作表 ws = wb.active # 循环写入数据 for row in range(2, shape[0]+2): ws.cell(row=row, column=1, value=qiye_name[row-2]) for column in range(11): ws.cell(row=1,column=column+2,value=2012+column) timeout=3000 def get_index_data(keys,year): words = [[{"name": keys, "wordType": 1}]] words = str(words).replace(" ", "").replace("'", """) startDate = f"{year}-01-01" endDate = f"{year}-12-31" url = f'http://index.baidu.com/api/SearchApi/index?area=0&word={words}&startDate={startDate}&endDate={endDate}' # 请求头配置 headers = { "Connection": "keep-alive", "Accept": "application/json, text/plain, */*", "User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.198 Safari/537.36", "Sec-Fetch-Site": "same-origin", "Sec-Fetch-Mode": "cors", "Sec-Fetch-Dest": "empty", "Cipher-Text": "1698156005330_1698238860769_ZPrC2QTaXriysBT+5sgXcnbTX3/lW65av4zgu9uR1usPy82bArEg4m9deebXm7/O5g6QWhRxEd9/r/hqHad2WnVFVVWybHPFg3YZUUCKMTIYFeSUIn23C6HdTT1SI8mxsG5mhO4X9nnD6NGI8hF8L5/G+a5cxq+b21PADOpt/XB5eu/pWxNdwfa12krVNuYI1E8uHQ7TFIYjCzLX9MoJzPU6prjkgJtbi3v0X7WGKDJw9hwnd5Op4muW0vWKMuo7pbxUNfEW8wPRmSQjIgW0z5p7GjNpsg98rc3FtHpuhG5JFU0kZ6tHgU8+j6ekZW7+JljdyHUMwEoBOh131bGl+oIHR8vw8Ijtg8UXr0xZqcZbMEagEBzWiiKkEAfibCui59hltAgW5LG8IOtBDqp8RJkbK+IL5GcFkNaXaZfNMpI=", "Referer": "https://index.baidu.com/v2/main/index.html", "Accept-Language": "zh-CN,zh;q=0.9", 'cookie': cookie} res = requests.get(url, headers=headers) res_json = res.json() if res_json["message"] == "bad request": print("抓取关键词:" + keys + " 失败,请检查cookie或者关键词是否存在") if res_json["message"]=="request block": os._exit(0) else: # 获取特征值 # data = res_json['data'] # print(data) # uniqid = data["uniqid"] # url = f'http://index.baidu.com/Interface/ptbk?uniqid={uniqid}' # res = requests.get(url, headers=headers) # # 获取解码字 # ptbk = res.json()['data'] # 创建暂存文件夹 os.makedirs('res', exist_ok=True) filename = f"{keys}_{year}.json" file_path = os.path.join('res', filename) with open(file_path, 'w', encoding='utf-8') as json_file: json.dump(res_json, json_file, ensure_ascii=False, indent=4) return file_path#, ptbk def data_search(mc,years): # 假设 data.json 是你获取到的 JSON 文件 with open(f"res\{mc}_{years}.json", 'r', encoding='utf-8') as file: data = json.load(file) # 现在 data 变量中包含了解析后的 JSON 数据,你可以按照字典的方式来访问数据 # if data['message']= return data['data']['generalRatio'][0]['all']['avg'] for j in range(11): get_index_data(qiye_name[i],2012+j) print(i,j) time.sleep(3) #等到res文件里有数据时即可 # data_search(qiye_name[i],2012+j)#qiye_name[i],2012+j # ws.cell(row=i+2, column=j + 2, value=data_search(qiye_name[i],2012+j)) # wb.save("企业指数.xlsx")以上就是本篇文章【关于爬取百度指数中的整体日均值(复现他人思路以及过程心得)】的全部内容了,欢迎阅览 ! 文章地址:http://dfvalve.xrbh.cn/news/6815.html 资讯 企业新闻 行情 企业黄页 同类资讯 首页 网站地图 返回首页 迅博思语资讯移动站 http://keant.xrbh.cn/ , 查看更多