from bs4 import BeautifulSoup
with open('new_index.html') as wb_date:
Soup = BeautifulSoup(wb_date,'lxml')
lis = Soup.select('body > div.main-content > ul > li')
for li in lis:
image=li.select('li > img')[0].get('src')
title= li.select('li > div.article-info > h3 > a')[0].get_text()
descs = li.select('li > div.article-info > p.description')[0].get_text()
rates = li.select('li > div.rate > span')[0].get_text()
cates =list(li.select('li > div.article-info > p.meta-info ')[0].stripped_strings)
if float(rates)>3:
print(title,descs,rates,image)
'''
from bs4 import BeautifulSoup
info =[]
with open('new_index.html') as wb_date:
Soup = BeautifulSoup(wb_date,'lxml')
images=Soup.select('body > div.main-content > ul > li > img')
titles = Soup.select('body > div.main-content > ul > li > div.article-info > h3 > a')
descs = Soup.select('body > div.main-content > ul > li > div.article-info > p.description')
rates = Soup.select('body > div.main-content > ul > li > div.rate > span')
cates =Soup.select('body > div.main-content > ul > li > div.article-info > p.meta-info ')
#print(images,title,descs,rates,cates,sep ='\n------------------\n')
for title,desc,rate,cate,image in zip(titles,descs,rates,cates,images):
data = {
'title': title.get_text(),
'desc': desc.get_text(),
'rate': rate.get_text(),
'cate': list(cate.stripped_strings),
'image': image.get('src')
}
#print(date)
info.append(data)
for i in info:
if float(i['rate'])>3:
print(i['title'],i['cate'])
'''