直接上代码:
import re
import requests
# 创建定长数字,作为一会名字
def creat_fivenum(num,n=5):
for i in range(1,10):
s=str(num)
j=10**i
if (int(num/j)==0):
for a in range(1,6-i):
s="0"+s
return s
#爬取臭事百科
# https://www.qiushibaike.com/imgrank/
# 分析一下照片所在的区域
# <div class="thumb">
# <a href="/article/124066439" target="_blank">
# <img src="//pic.qiushibaike.com/system/pictures/12406/124066439/medium/NKSSOW6NS7WM1L6J.jpg" alt="糗事#124066439" class="illustration" width="100%" height="auto">
# </a>
# </div>
def downQiushiImg(endpage=13):
"""
只需要传参,一共爬取多少页就行
:param endpage:
:return:
"""
num = 1 # 初始化计数器
for page in range(endpage):
url = f"https://www.qiushibaike.com/imgrank/page/{page+1}/"
res = requests.get(url, headers=headers)
ex = '<div class="thumb">.*?<img src="(.*?)" alt.*?</div>'
r = re.findall(ex, res.text, re.S)
for i in r:
url = "http:" + i
print(url)
res = requests.get(url, headers=headers)
with open(f"D://糗事百科image/{creat_fivenum(num)}.png", "wb")as f:
f.write(res.content)
num = num + 1;
print(f"爬取完毕,一共{num-1}个照片")
downQiushiImg()
View Code
注意:再次申明,正则用的不好,是因为你需要指定re.S
-----------------------------------------------------------------------------------------------------------------------------------------