0
点赞
收藏
分享

微信扫一扫

抓取天气爬虫

# data14_analysis.py
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import math
def tem_curve(data):
    date = list( data['日期'] )
    tem_low = list( data['最低气温'] )
    tem_high = list( data['最高气温'] )
    for i in range( 0, 14 ):
        if math.isnan( tem_low[i] ) == True:
            tem_low[i] = tem_low[i - 1]
        if math.isnan( tem_high[i] ) == True:
            tem_high[i] = tem_high[i - 1]

    tem_high_ave = sum( tem_high ) / 14
    tem_low_ave = sum( tem_low ) / 14

    tem_max = max( tem_high )
    tem_max_date = tem_high.index( tem_max )
    tem_min = min( tem_low )
    tem_min_date = tem_low.index( tem_min )

    x = range( 1, 15 )
    plt.figure( 1 )
    plt.plot( x, tem_high, color='red', label='高温' )
    plt.scatter( x, tem_high, color='red' )
    plt.plot( x, tem_low, color='blue', label='低温' )
    plt.scatter( x, tem_low, color='blue' )

    plt.plot( [1, 15], [tem_high_ave, tem_high_ave], c='black', linestyle='--' )
    plt.plot( [1, 15], [tem_low_ave, tem_low_ave], c='black', linestyle='--' )
    plt.legend()
    plt.text( tem_max_date + 0.15, tem_max + 0.15, str( tem_max ), ha='center', va='bottom', fontsize=10.5 )
    plt.text( tem_min_date + 0.15, tem_min + 0.15, str( tem_min ), ha='center', va='bottom', fontsize=10.5 )
    plt.xticks( x )
    plt.title( '未来14天高温低温变化曲线图' )
    plt.xlabel( '未来天数/天' )
    plt.ylabel( '摄氏度/℃' )
    plt.show()


def change_wind(wind):
    """改变风向"""
    for i in range( 0, 14 ):
        if wind[i] == "北风":
            wind[i] = 90
        elif wind[i] == "南风":
            wind[i] = 270
        elif wind[i] == "西风":
            wind[i] = 180
        elif wind[i] == "东风":
            wind[i] = 360
        elif wind[i] == "东北风":
            wind[i] = 45
        elif wind[i] == "西北风":
            wind[i] = 135
        elif wind[i] == "西南风":
            wind[i] = 225
        elif wind[i] == "东南风":
            wind[i] = 315
    return wind


def wind_radar(data):
    """风向雷达图"""
    wind1 = list( data['风向1'] )
    wind2 = list( data['风向2'] )
    wind_speed = list( data['风级'] )
    wind1 = change_wind( wind1 )
    wind2 = change_wind( wind2 )

    degs = np.arange( 45, 361, 45 )
    temp = []
    for deg in degs:
        speed = []
        for i in range( 0, 14 ):
            if wind1[i] == deg:
                speed.append( wind_speed[i] )
            if wind2[i] == deg:
                speed.append( wind_speed[i] )
        if len( speed ) == 0:
            temp.append( 0 )
        else:
            temp.append( sum( speed ) / len( speed ) )
    print( temp )
    N = 8
    theta = np.arange( 0. + np.pi / 8, 2 * np.pi + np.pi / 8, 2 * np.pi / 8 )
    radii = np.array( temp )
    plt.axes( polar=True )
    colors = [(1 - x / max( temp ), 1 - x / max( temp ), 0.6) for x in radii]
    plt.bar( theta, radii, width=(2 * np.pi / N), bottom=0.0, color=colors )
    plt.title( '未来14天风级图', x=0.2, fontsize=20 )
    plt.show()
def weather_pie(data):
    """绘制天气饼图"""
    weather = list( data['天气'] )
    dic_wea = {}
    for i in range( 0, 14 ):
        if weather[i] in dic_wea.keys():
            dic_wea[weather[i]] += 1
        else:
            dic_wea[weather[i]] = 1
    print( dic_wea )
    explode = [0.01] * len( dic_wea.keys() )
    color = ['lightskyblue', 'silver', 'yellow', 'salmon', 'grey', 'lime', 'gold', 'red', 'green', 'pink']
    plt.pie( dic_wea.values(), explode=explode, labels=dic_wea.keys(), autopct='%1.1f%%', colors=color )
    plt.title( '未来14天气候分布饼图' )
    plt.show()
def main():
    plt.rcParams['font.sans-serif'] = ['SimHei']
    plt.rcParams['axes.unicode_minus'] = False
    data14 = pd.read_csv( 'weather14.csv', encoding='gb2312' )
    print( data14 )
    tem_curve( data14 )
    wind_radar( data14 )
    weather_pie( data14 )
if __name__ == '__main__':
    main()
 

# data1_analysis.py
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import math
def tem_curve(data):
    hour = list( data['小时'] )
    tem = list( data['温度'] )
    for i in range( 0, 24 ):
        if math.isnan( tem[i] ) == True:
            tem[i] = tem[i - 1]
    tem_ave = sum( tem ) / 24
    tem_max = max( tem )
    tem_max_hour = hour[tem.index( tem_max )]
    tem_min = min( tem )
    tem_min_hour = hour[tem.index( tem_min )]
    x = []
    y = []
    for i in range( 0, 24 ):
        x.append( i )
        y.append( tem[hour.index( i )] )
    plt.figure( 1 )
    plt.plot( x, y, color='red', label='温度' )
    plt.scatter( x, y, color='red' )
    plt.plot( [0, 24], [tem_ave, tem_ave], c='blue', linestyle='--', label='平均温度' )
    plt.text( tem_max_hour + 0.15, tem_max + 0.15, str( tem_max ), ha='center', va='bottom', fontsize=10.5 )
    plt.text( tem_min_hour + 0.15, tem_min + 0.15, str( tem_min ), ha='center', va='bottom', fontsize=10.5 )
    plt.xticks( x )
    plt.legend()
    plt.title( '一天温度变化曲线图' )
    plt.xlabel( '时间/h' )
    plt.ylabel( '摄氏度/℃' )
    plt.show()


def hum_curve(data):
    """相对湿度曲线绘制"""
    hour = list( data['小时'] )
    hum = list( data['相对湿度'] )
    for i in range( 0, 24 ):
        if math.isnan( hum[i] ) == True:
            hum[i] = hum[i - 1]
    hum_ave = sum( hum ) / 24
    hum_max = max( hum )
    hum_max_hour = hour[hum.index( hum_max )]
    hum_min_hour = hour[hum.index( hum_min )]
    x = []
    y = []
    for i in range( 0, 24 ):
        x.append( i )
        y.append( hum[hour.index( i )] )
    plt.figure( 2 )
    plt.plot( x, y, color='blue', label='相对湿度' )
    plt.scatter( x, y, color='blue' )
    plt.plot( [0, 24], [hum_ave, hum_ave], c='red', linestyle='--', label='平均相对湿度' )  # 画出平均相对湿度虚线
    plt.text( hum_max_hour + 0.15, hum_max + 0.15, str( hum_max ), ha='center', va='bottom', fontsize=10.5 )  # 标出最高相对湿度
    plt.text( hum_min_hour + 0.15, hum_min + 0.15, str( hum_min ), ha='center', va='bottom', fontsize=10.5 )  # 标出最低相对湿度
    plt.xticks( x )
    plt.legend()
    plt.title( '一天相对湿度变化曲线图' )
    plt.xlabel( '时间/h' )
    plt.ylabel( '百分比/%' )
    plt.show()


def air_curve(data):
    """空气质量曲线绘制"""
    hour = list( data['小时'] )
    air = list( data['空气质量'] )
    print( type( air[0] ) )
    for i in range( 0, 24 ):
        if math.isnan( air[i] ) == True:
            air[i] = air[i - 1]
    air_ave = sum( air ) / 24
    air_max = max( air )
    air_max_hour = hour[air.index( air_max )]
    air_min = min( air )
    air_min_hour = hour[air.index( air_min )]
    x = []
    y = []
    for i in range( 0, 24 ):
        x.append( i )
        y.append( air[hour.index( i )] )
    plt.figure( 3 )

    for i in range( 0, 24 ):
        if y[i] <= 50:
            plt.bar( x[i], y[i], color='lightgreen', width=0.7 )  # 1等级
        elif y[i] <= 100:
            plt.bar( x[i], y[i], color='wheat', width=0.7 )  # 2等级
        elif y[i] <= 150:
            plt.bar( x[i], y[i], color='orange', width=0.7 )  # 3等级
        elif y[i] <= 200:
            plt.bar( x[i], y[i], color='orangered', width=0.7 )  # 4等级
        elif y[i] <= 300:
            plt.bar( x[i], y[i], color='darkviolet', width=0.7 )  # 5等级
        elif y[i] > 300:
            plt.bar( x[i], y[i], color='maroon', width=0.7 )  # 6等级
    plt.plot( [0, 24], [air_ave, air_ave], c='black', linestyle='--' )
    plt.text( air_max_hour + 0.15, air_max + 0.15, str( air_max ), ha='center', va='bottom', fontsize=10.5 )  # 标出最高空气质量
    plt.text( air_min_hour + 0.15, air_min + 0.15, str( air_min ), ha='center', va='bottom', fontsize=10.5 )  # 标出最低空气质量
    plt.xticks( x )
    plt.title( '一天空气质量变化曲线图' )
    plt.xlabel( '时间/h' )
    plt.ylabel( '空气质量指数AQI' )
    plt.show()


def wind_radar(data):
    """风向雷达图"""
    wind = list( data['风力方向'] )
    wind_speed = list( data['风级'] )
    for i in range( 0, 24 ):
        if wind[i] == "北风":
            wind[i] = 90
        elif wind[i] == "南风":
            wind[i] = 270
        elif wind[i] == "西风":
            wind[i] = 180
        elif wind[i] == "东风":
            wind[i] = 360
        elif wind[i] == "东北风":
            wind[i] = 45
        elif wind[i] == "西北风":
            wind[i] = 135
        elif wind[i] == "西南风":
            wind[i] = 225
        elif wind[i] == "东南风":
            wind[i] = 315
    degs = np.arange( 45, 361, 45 )
    temp = []
    for deg in degs:
        speed = []
        for i in range( 0, 24 ):
            if wind[i] == deg:
                speed.append( wind_speed[i] )
        if len( speed ) == 0:
            temp.append( 0 )
        else:
            temp.append( sum( speed ) / len( speed ) )
    print( temp )
    N = 8
    theta = np.arange( 0. + np.pi / 8, 2 * np.pi + np.pi / 8, 2 * np.pi / 8 )
    radii = np.array( temp )
    plt.axes( polar=True )
    colors = [(1 - x / max( temp ), 1 - x / max( temp ), 0.6) for x in radii]
    plt.bar( theta, radii, width=(2 * np.pi / N), bottom=0.0, color=colors )
    plt.title( '一天风级图', x=0.2, fontsize=20 )
    plt.show()
def calc_corr(a, b):
    """计算相关系数"""
    a_avg = sum( a ) / len( a )
    b_avg = sum( b ) / len( b )
    cov_ab = sum( [(x - a_avg) * (y - b_avg) for x, y in zip( a, b )] )
    sq = math.sqrt( sum( [(x - a_avg) ** 2 for x in a] ) * sum( [(x - b_avg) ** 2 for x in b] ) )
    corr_factor = cov_ab / sq
    return corr_factor
def corr_tem_hum(data):
    """温湿度相关性分析"""
    tem = data['温度']
    hum = data['相对湿度']
    plt.scatter( tem, hum, color='blue' )
    plt.title( "温湿度相关性分析图" )
    plt.xlabel( "温度/℃" )
    plt.ylabel( "相对湿度/%" )
    plt.text( 20, 40, "相关系数为:" + str( calc_corr( tem, hum ) ), fontdict={'size': '10', 'color': 'red'} )
    plt.show()
    print( "相关系数为:" + str( calc_corr( tem, hum ) ) )
def main():
    plt.rcParams['font.sans-serif'] = ['SimHei']
    plt.rcParams['axes.unicode_minus'] = False
    data1 = pd.read_csv( 'weather1.csv', encoding='gb2312' )
    print( data1 )
    tem_curve( data1 )
    hum_curve( data1 )
    air_curve( data1 )
    wind_radar( data1 )
    corr_tem_hum( data1 )


if __name__ == '__main__':
    main()
 

# weather.py
import requests
from bs4 import BeautifulSoup
import csv
import json
def getHTMLtext(url):
    """请求获得网页内容"""
    try:
        r = requests.get( url, timeout=30 )
        r.raise_for_status()
        r.encoding = r.apparent_encoding
        print( "成功访问" )
        return r.text
    except:
        print( "访问错误" )
        return " "
def get_content(html):
    """处理得到有用信息保存数据文件"""
    final = []  # 初始化一个列表保存数据
    bs = BeautifulSoup( html, "html.parser" )
    body = bs.body
    data = body.find( 'div', {'id': '7d'} )
    # 下面爬取当天的数据
    data2 = body.find_all( 'div', {'class': 'left-div'} )
    text = data2[2].find( 'script' ).string
    text = text[text.index( '=' ) + 1:-2]
    jd = json.loads( text )
    dayone = jd['od']['od2']  # 找到当天的数据
    final_day = []  # 存放当天的数据
    count = 0
    for i in dayone:
        temp = []
        if count <= 23:
            temp.append( i['od21'] )  # 添加时间
            temp.append( i['od22'] )  # 添加当前时刻温度
            temp.append( i['od24'] )  # 添加当前时刻风力方向
            temp.append( i['od25'] )  # 添加当前时刻风级
            temp.append( i['od26'] )  # 添加当前时刻降水量
            temp.append( i['od27'] )  # 添加当前时刻相对湿度
            temp.append( i['od28'] )  # 添加当前时刻控制质量
            # print(temp)
            final_day.append( temp )
        count = count + 1
    # 下面爬取7天的数据
    ul = data.find( 'ul' )  # 找到所有的ul标签
    li = ul.find_all( 'li' )  # 找到左右的li标签
    i = 0  # 控制爬取的天数
    for day in li:  # 遍历找到的每一个li
        if i < 7 and i > 0:
            temp = []  # 临时存放每天的数据
            date = day.find( 'h1' ).string  # 得到日期
            date = date[0:date.index( '日' )]  # 取出日期号
            temp.append( date )
            inf = day.find_all( 'p' )  # 找出li下面的p标签,提取第一个p标签的值,即天气
            temp.append( inf[0].string )

            tem_low = inf[1].find( 'i' ).string  # 找到最低气温

            if inf[1].find( 'span' ) is None:  # 天气预报可能没有最高气温
                tem_high = None
            else:
                tem_high = inf[1].find( 'span' ).string  # 找到最高气温
            temp.append( tem_low[:-1] )
            if tem_high[-1] == '℃':
                temp.append( tem_high[:-1] )
            else:
                temp.append( tem_high )

            wind = inf[2].find_all( 'span' )  # 找到风向
            for j in wind:
                temp.append( j['title'] )

            wind_scale = inf[2].find( 'i' ).string  # 找到风级
            index1 = wind_scale.index( '级' )
            temp.append( int( wind_scale[index1 - 1:index1] ) )
            final.append( temp )
        i = i + 1
    return final_day, final


# print(final)
def get_content2(html):
    """处理得到有用信息保存数据文件"""
    final = []  # 初始化一个列表保存数据
    bs = BeautifulSoup( html, "html.parser" )  # 创建BeautifulSoup对象
    body = bs.body
    data = body.find( 'div', {'id': '15d'} )  # 找到div标签且id = 15d
    ul = data.find( 'ul' )  # 找到所有的ul标签
    li = ul.find_all( 'li' )  # 找到左右的li标签
    final = []
    i = 0  # 控制爬取的天数
    for day in li:  # 遍历找到的每一个li
        if i < 8:
            temp = []  #
            date = day.find( 'span', {'class': 'time'} ).string
            date = date[date.index( '(' ) + 1:-2]
            temp.append( date )
            weather = day.find( 'span', {'class': 'wea'} ).string
            temp.append( weather )
            tem = day.find( 'span', {'class': 'tem'} ).text
            temp.append( tem[tem.index( '/' ) + 1:-1] )
            temp.append( tem[:tem.index( '/' ) - 1] )
            wind = day.find( 'span', {'class': 'wind'} ).string
            if '转' in wind:
                temp.append( wind[:wind.index( '转' )] )
                temp.append( wind[wind.index( '转' ) + 1:] )
            else:
                temp.append( wind )
                temp.append( wind )
            wind_scale = day.find( 'span', {'class': 'wind1'} ).string
            index1 = wind_scale.index( '级' )
            temp.append( int( wind_scale[index1 - 1:index1] ) )

            final.append( temp )
    return final
def write_to_csv(file_name, data, day=14):
    """保存为csv文件"""
    with open( file_name, 'a', errors='ignore', newline='' ) as f:
        if day == 14:
            header = ['日期', '天气', '最低气温', '最高气温', '风向1', '风向2', '风级']
        else:
            header = ['小时', '温度', '风力方向', '风级', '降水量', '相对湿度', '空气质量']
        f_csv = csv.writer( f )
        f_csv.writerow( header )
        f_csv.writerows( data )
def main():
    """主函数"""
    print( "Weather test" )
    # 珠海
    url1 = 'http://www.weather.com.cn/weather/101280701.shtml'
    url2 = 'http://www.weather.com.cn/weather15d/101280701.shtml'
    html1 = getHTMLtext( url1 )
    data1, data1_7 = get_content( html1 )
    html2 = getHTMLtext( url2 )
    data8_14 = get_content2( html2 )
    data14 = data1_7 + data8_14
    # print(data)
    write_to_csv( 'weather14.csv', data14, 14 )
    write_to_csv( 'weather1.csv', data1, 1 )
if __name__ == '__main__':
    main()

举报

相关推荐

0 条评论