图片混淆反爬
·
'''
图片混淆,对图片解析
如果图片识别不准确,需要升级OCR库
6.1
'''
from common.tools import Tool
import requests
from scrapy.selector import Selector
from urllib import parse
class ImgSpider:
def __init__(self):
self.tool = Tool.get_tool()
def run(self):
url = 'http://www.porters.vip/confusion/recruit.html#'
headers = {}
headers = self.tool.get_user_agent(headers)
r = requests.get(url=url,headers=headers)
if r.status_code == 200:
img = Selector(r).xpath('//img[@class="pn"]/@src').extract_first()
img = parse.urljoin(r.url,img)
# img = 'https://vip.gxrc.com/Public/Phone/3678B85A-6C0B-42D6-B5C7-3D6D89450001'
test = self.tool.tessseract_img(img)
print('图片字体为:{}'.format(test))
if __name__ == '__main__':
ImgSpider().run()
----参考python3反爬虫原理与绕过实战
更多推荐
已为社区贡献6条内容
所有评论(0)