0
点赞
收藏
分享

微信扫一扫

python图片下载爬虫案例

跟着Damon写代码 2022-03-21 阅读 96
python
#!/usr/bin/env python3
# -*- coding: utf-8 -*-

import urllib.request
import re

class Get_Html(object):
    def __init__(self, url, head):
        self.url = url
        self.head = head

    def get_default(self):
        self.request = urllib.request.Request(self.url)
        self.request.add_header("user-agent", self.head)
        self.response = urllib.request.urlopen(self.request)
        self.result = self.response.read()
        #self.result_data = self.result.decode('UTF-8')
        return  self.result

    def get_list(self):
        self.image_addr_list = []
        self.image_list = re.findall(b"http.*?.jp[\w]+", self.get_default())
        for i in self.image_list:
            self.image_addr_list.append(str(i,encoding="utf8"))
			#字符串添加要到列表要转成字符串型
            print(i)
        return self.image_addr_list

    def get_images(self):
        num = 0
        for self.url in self.get_list():
		#再次利用上面的self.url变量
            num = num + 1
            with open(str(num)+".jpeg", "wb") as file:                  
                file.write(self.get_default())
				#再次利用上面的urlopen函数,因为里面有self.usr变量。
              
  
if __name__ == '__main__':
    html = Get_Html("https://9yin.woniu.com/media/wallpapers/", "Mozilla/5.0  \
(Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.198 Safari/537.36")
    html.get_images()
举报

相关推荐

0 条评论