Python爬虫初试

阅读 29

2024-06-20

edu_src.py

import requests, time
from bs4 import BeautifulSoup
for i in range(1, 20):
    url = f'https://src.sjtu.edu.cn/rank/firm/0/?page={i}'
    print(f"正在获取第{i}页数据")
    s = requests.get(url).text
    # print(s)
    soup = BeautifulSoup(s, 'html.parser')
    edu1 = soup.find_all('tr')
    # print(edu1)
    for edu in edu1:
        try:
            edu_name = edu.a.text
            # print(edu_name)
            with open('edu_name.txt', 'a+',encoding='utf-8') as f:
                f.write(edu_name + '\n')
        except:
            pass
    print(f"{i}页已经写入!!!")

精彩评论(0)

0 0 举报