爬取微博评论数据
# -*- coding: utf-8 -*-
import requests #用于发送请求并且拿到源代码
from bs4 import BeautifulSoup #用于解析数据
'''
1.找到数据源地址并且分析链接
2.发送请求并且拿到数据
3.在拿到的数据中解析出需要的数据
4.存储数据
'''
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:125.0) Gecko/20100101 Firefox/125.0",
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
"Accept-Language": "zh-CN,zh;q=0.8,zh-TW;q=0.7,zh-HK;q=0.5,en-US;q=0.3,en;q=0.2",
"Accept-Encoding": "gzip, deflate, br",
"Connection": "keep-alive",
"Upgrade-Insecure-Requests": "1",
"Sec-Fetch-Dest": "document",
"Sec-Fetch-Mode": "navigate",
"Sec-Fetch-Site": "cross-site"
}
cookies = {
"SUB": "_2A25LJAI1DeRhGeNG6VQS8yHXVoWBv9rDV8PUNbmtAbLUfTkW9NS3qSAB5yafaDOCMSBsSAAzS_ZARZ1aBI",
"SUBP": "0033WrSXqPxfM72_qlqsxnLSjV6mMYf5JpX5KzhUgL.Fo-Reoq0e0BpeKM2dJLoIEUq-XQLxK-LB-qL1KzLxK-L1hqLBo5LxKBLBo.L12zLxK.L1-zLB-2t",
"SINAGLOBAL": "675.476.1705404058457",
"ULV": "1713402436:2:1:4927205619754.432.1713402429966:1712029972225",
"UOR": ",,www.baidu.com",
"_s_tentry": "weibo.com",
"Apache": "49272056197.1713402429966",
"ALF": "02_1714469",
"PC_TOKEN": "614a985b"
}
li=['美团','饿了么','淘宝']
f=open('x.txt','w',encoding='utf-8')
for word in li:
for ix in range(1,4):
url=f'https://s.weibo.com/weibo?q={word}&page={ix}'
response=requests.get(url,headers=headers,cookies=cookies)
res=response.text #.text用于取出源代码
#print(res)
'''
对方设置了一些规则:
对方会验证这个请求是不是浏览器发来的
让对方认为我就是浏览器发过去的请求
'''
#requests.post()
#************************************************************************************
html=BeautifulSoup(res,'lxml') #初始化
#print(html)
att={
'node-type':'feed_list_content'
}
bs=html.find_all(attrs=att)
#************************************************************************************
for i in bs:
f.write(i.text+'\n')
f.close()
print('完成')
更多推荐
所有评论(0)