查看页面源码
 

 
案例
 
"""
1. 提取页面源代码
2. 解析页面源代码,提取数据
"""
import requests
from pyquery import PyQuery
f = open("qingchezhijia.csv",mode='w',encoding='utf-8') 
def get_page_source(url):  
    resp = requests.get(url)
    resp.encoding = "gbk"
    
    return resp.text
def parse_page_source(html):   
    doc =PyQuery(html)
    mt_list = doc(".mt-10").items()  
    for mt in mt_list: 
        
        if not mt("div >dl:nth-child(3)>dt:contains(购车经销商)"):
            
            mt("div >dl:nth-child(2)").after(PyQuery("""<dl class="choose-dl">
                        <dt>购车经销商</dt>
                        <dd>
                            <a href="###" class="js-dearname" data-val="81115,51982" data-evalid="4033271" target="_blank">
                                 
                            </a>
                        </dd>
                    </dl>"""))
        
        
        
        car =mt("div>dl:nth-child(1)>dd").eq(0).text().replace("\n","").replace(" ","")
        place = mt("div>dl:nth-child(2)>dd").eq(0).text()
        time = mt("div>dl:nth-child(4)>dd").eq(0).text()
        price = mt("div>dl:nth-child(5)>dd").eq(0).text().replace("万元","")
        youhao = mt("div>dl:nth-child(6)>dd >p:nth-child(1)").eq(0).text().replace("升/百公里","")
        kilometer = mt("div>dl:nth-child(6)>dd >p:nth-child(2)").eq(0).text().replace("公里","")
        
        
        
        other = mt("div>div>dl>dd").text().split()
        
        
        
        
        
        f.write(f"购买车型:{car},购买地点:{place},购买时间:{time},购车购买价:{price},油耗:{youhao},目前行驶:{kilometer},其它:{other}\n")  
        
def main():   
    url = "https://k.autohome.com.cn/146/"
    
    html = get_page_source(url)
    
    parse_page_source(html)
if __name__ == '__main__':  
    main()
 
运行结果:
 
