爬取图书

DT_M

关注

阅读 231

2022-07-12

# -*- coding: utf-8 -*-
import scrapy


class BooksSpider(scrapy.Spider):
name = 'books'
allowed_domains = ['www.books.toscrape.com']
start_urls = ['http://books.toscrape.com/']

def parse(self, response):
for book in response.css("article.product_pod"):
name = book.xpath("./h3/a/@title").extract()
price = book.css('p.price_color::text').extract()
yield {
'name':name,
"price":price
}
next_url = response.css('ul.pager li.next a::attr(href)').extract_first()
if next_url:
next_url = response.urljoin(next_url)
yield scrapy.Request(next_url, callback=self.parse)


精彩评论(0)

0 0 举报