0
点赞
收藏
分享

微信扫一扫

猫眼TOP100爬虫代码「持续更新」

小禹说财 2022-03-27 阅读 43


你好,悦创。

爬虫网站链接:https://ssr1.scrape.center/page/1,视频回放请联系 AI悦创

代码:

# title
# 地区
# score
# image
# datetime
import re
import requests
from requests.exceptions import RequestException
class MaoYan(object):
def __init__(self):
self.headers = {"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/98.0.4758.102 Safari/537.36",}

def requests(self, url):
try:
response = requests.get(url, headers=self.headers)
if response.status_code == 200:
return response.text
else:
return "None"
except RequestException as e:
return e
def pattern(self, string):
img_lst = re.findall(r'<img.*?data-v-7f856186.*?src="(.*?)".*?cover.*?>', string, re.S|re.I)
title = r'<h2.*?class="m-b-sm">(.*?)</h2>'
# pattern = r'<div.*?data-v-7f856186.*?<a.*?href="(.*?)".*?img.*?src="(.*?)"cover.*?>'
pattern = r'<div.*?data-v-7f856186.*?el-row.*?<a.*?href="(.*?)".*?' \
r'<img.*?src="(.*?)".*?cover">' \
r'.*?data-v-7f856186.*?h2.*?data-v-7f856186.*?>(.*?)</h2>' \
r'.*?categories.*?button.*?type="button".*?span>(.*?)</span>' \
r'.*?div.*?data-v-7f856186.*?span.*?>(.*?)</span>' \
r'.*?<span.*?>(.*?)</span>' \
r'.*?<span.*?>(.*?)</span>' \
r'.*?data-v-7f856186.*?<span.*?>(.*?)</span>' \
r'.*?data-v-7f856186.*?p.*?score.*?>(.*?)</p>'
return re.findall(pattern, string, re.S|re.I)
# return re.findall('<p.*?data-v-7f856186.*?class="score.*?">(.*?)</p>', string, re.S|re.I)
def parse(self, content):
lst = self.pattern(content)
print(lst)
print(len(lst))

def main(self):
url = "https://ssr1.scrape.center/page/1"
html = self.requests(url)
# print(html)
self.parse(html)

if __name__ == '__main__':
Crawler = MaoYan()
Crawler.main()






举报

相关推荐

0 条评论