京东做了懒加载,懒加载的原理其实就是先给img标签一个自定义属性,属性值就是图片链接,检测浏览器滑动高度,达到一定高度就会将自定义属性里的图片链接放到图片的src属性中!随后为了达到反爬效果【实则软用没有,随便猜都能猜到】再将自定义属性的值替换成其他值。
import time
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.chrome.options import Options
def JD_SOU(SOU_KEY):
def NOT_GUI():
chrome_options = Options()
chrome_options.add_argument('window-size=1920x3000')
chrome_options.add_argument('--disable-gpu')
chrome_options.add_argument('--hide-scrollbars')
chrome_options.add_argument('blink-settings=imagesEnabled=false')
chrome_options.add_argument('--headless')
return chrome_options
def GET_JD(browser):
l_list = browser.find_elements(By.CSS_SELECTOR, '.gl-warp li')
print(len(l_list))
for l in l_list:
try:
url = l.find_element(By.CSS_SELECTOR, '.p-name a').get_attribute('href')
price = l.find_element(By.CSS_SELECTOR, '.p-price strong i').text
title = l.find_element(By.CSS_SELECTOR, '.p-name em').text
shop_home = l.find_element(By.CSS_SELECTOR, '.p-shop a').get_attribute('href')
shop_name = l.find_element(By.CSS_SELECTOR, '.p-shop a').text
commit_num = l.find_element(By.CSS_SELECTOR, '.p-commit strong a').text
commit_url = l.find_element(By.CSS_SELECTOR, '.p-commit strong a').get_attribute('href')
img_url = l.find_element(By.CSS_SELECTOR, '.p-img img').get_attribute('src')
img_url = 'https:' + l.find_element(By.CSS_SELECTOR, '.p-img img').get_attribute(
'data-lazy-img') if not img_url else img_url
yh = l.find_element(By.CSS_SELECTOR, '.p-icons').text.split('\n')
print("""
商品主页:%s
商品价格:%s
商品标题:%s
商家主页:%s
商家名称:%s
评论数量:%s
评论地址:%s
商品图片:%s
优惠标签:%s
""" % (url, price, title, shop_name, shop_home, commit_num, commit_url, img_url, yh))
except Exception as e:
continue
next_page = browser.find_element(By.PARTIAL_LINK_TEXT, '下一页')
next_page.click()
time.sleep(0.5)
GET_JD(browser)
browser = webdriver.Chrome(options=NOT_GUI())
browser.implicitly_wait(10)
try:
browser.get('https://www.jd.com/')
s_input = browser.find_element(By.ID, 'key')
s_input.send_keys('%s'% SOU_KEY)
s_input.send_keys(Keys.ENTER)
GET_JD(browser)
except Exception as e:
print(e)
finally:
browser.close()
JD_SOU('小黑子立牌')