# data14_analysis.py
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import math
def tem_curve(data):
date = list( data['日期'] )
tem_low = list( data['最低气温'] )
tem_high = list( data['最高气温'] )
for i in range( 0, 14 ):
if math.isnan( tem_low[i] ) == True:
tem_low[i] = tem_low[i - 1]
if math.isnan( tem_high[i] ) == True:
tem_high[i] = tem_high[i - 1]
tem_high_ave = sum( tem_high ) / 14
tem_low_ave = sum( tem_low ) / 14
tem_max = max( tem_high )
tem_max_date = tem_high.index( tem_max )
tem_min = min( tem_low )
tem_min_date = tem_low.index( tem_min )
x = range( 1, 15 )
plt.figure( 1 )
plt.plot( x, tem_high, color='red', label='高温' )
plt.scatter( x, tem_high, color='red' )
plt.plot( x, tem_low, color='blue', label='低温' )
plt.scatter( x, tem_low, color='blue' )
plt.plot( [1, 15], [tem_high_ave, tem_high_ave], c='black', linestyle='--' )
plt.plot( [1, 15], [tem_low_ave, tem_low_ave], c='black', linestyle='--' )
plt.legend()
plt.text( tem_max_date + 0.15, tem_max + 0.15, str( tem_max ), ha='center', va='bottom', fontsize=10.5 )
plt.text( tem_min_date + 0.15, tem_min + 0.15, str( tem_min ), ha='center', va='bottom', fontsize=10.5 )
plt.xticks( x )
plt.title( '未来14天高温低温变化曲线图' )
plt.xlabel( '未来天数/天' )
plt.ylabel( '摄氏度/℃' )
plt.show()
def change_wind(wind):
"""改变风向"""
for i in range( 0, 14 ):
if wind[i] == "北风":
wind[i] = 90
elif wind[i] == "南风":
wind[i] = 270
elif wind[i] == "西风":
wind[i] = 180
elif wind[i] == "东风":
wind[i] = 360
elif wind[i] == "东北风":
wind[i] = 45
elif wind[i] == "西北风":
wind[i] = 135
elif wind[i] == "西南风":
wind[i] = 225
elif wind[i] == "东南风":
wind[i] = 315
return wind
def wind_radar(data):
"""风向雷达图"""
wind1 = list( data['风向1'] )
wind2 = list( data['风向2'] )
wind_speed = list( data['风级'] )
wind1 = change_wind( wind1 )
wind2 = change_wind( wind2 )
degs = np.arange( 45, 361, 45 )
temp = []
for deg in degs:
speed = []
for i in range( 0, 14 ):
if wind1[i] == deg:
speed.append( wind_speed[i] )
if wind2[i] == deg:
speed.append( wind_speed[i] )
if len( speed ) == 0:
temp.append( 0 )
else:
temp.append( sum( speed ) / len( speed ) )
print( temp )
N = 8
theta = np.arange( 0. + np.pi / 8, 2 * np.pi + np.pi / 8, 2 * np.pi / 8 )
radii = np.array( temp )
plt.axes( polar=True )
colors = [(1 - x / max( temp ), 1 - x / max( temp ), 0.6) for x in radii]
plt.bar( theta, radii, width=(2 * np.pi / N), bottom=0.0, color=colors )
plt.title( '未来14天风级图', x=0.2, fontsize=20 )
plt.show()
def weather_pie(data):
"""绘制天气饼图"""
weather = list( data['天气'] )
dic_wea = {}
for i in range( 0, 14 ):
if weather[i] in dic_wea.keys():
dic_wea[weather[i]] += 1
else:
dic_wea[weather[i]] = 1
print( dic_wea )
explode = [0.01] * len( dic_wea.keys() )
color = ['lightskyblue', 'silver', 'yellow', 'salmon', 'grey', 'lime', 'gold', 'red', 'green', 'pink']
plt.pie( dic_wea.values(), explode=explode, labels=dic_wea.keys(), autopct='%1.1f%%', colors=color )
plt.title( '未来14天气候分布饼图' )
plt.show()
def main():
plt.rcParams['font.sans-serif'] = ['SimHei']
plt.rcParams['axes.unicode_minus'] = False
data14 = pd.read_csv( 'weather14.csv', encoding='gb2312' )
print( data14 )
tem_curve( data14 )
wind_radar( data14 )
weather_pie( data14 )
if __name__ == '__main__':
main()
# data1_analysis.py
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import math
def tem_curve(data):
hour = list( data['小时'] )
tem = list( data['温度'] )
for i in range( 0, 24 ):
if math.isnan( tem[i] ) == True:
tem[i] = tem[i - 1]
tem_ave = sum( tem ) / 24
tem_max = max( tem )
tem_max_hour = hour[tem.index( tem_max )]
tem_min = min( tem )
tem_min_hour = hour[tem.index( tem_min )]
x = []
y = []
for i in range( 0, 24 ):
x.append( i )
y.append( tem[hour.index( i )] )
plt.figure( 1 )
plt.plot( x, y, color='red', label='温度' )
plt.scatter( x, y, color='red' )
plt.plot( [0, 24], [tem_ave, tem_ave], c='blue', linestyle='--', label='平均温度' )
plt.text( tem_max_hour + 0.15, tem_max + 0.15, str( tem_max ), ha='center', va='bottom', fontsize=10.5 )
plt.text( tem_min_hour + 0.15, tem_min + 0.15, str( tem_min ), ha='center', va='bottom', fontsize=10.5 )
plt.xticks( x )
plt.legend()
plt.title( '一天温度变化曲线图' )
plt.xlabel( '时间/h' )
plt.ylabel( '摄氏度/℃' )
plt.show()
def hum_curve(data):
"""相对湿度曲线绘制"""
hour = list( data['小时'] )
hum = list( data['相对湿度'] )
for i in range( 0, 24 ):
if math.isnan( hum[i] ) == True:
hum[i] = hum[i - 1]
hum_ave = sum( hum ) / 24
hum_max = max( hum )
hum_max_hour = hour[hum.index( hum_max )]
hum_min_hour = hour[hum.index( hum_min )]
x = []
y = []
for i in range( 0, 24 ):
x.append( i )
y.append( hum[hour.index( i )] )
plt.figure( 2 )
plt.plot( x, y, color='blue', label='相对湿度' )
plt.scatter( x, y, color='blue' )
plt.plot( [0, 24], [hum_ave, hum_ave], c='red', linestyle='--', label='平均相对湿度' ) # 画出平均相对湿度虚线
plt.text( hum_max_hour + 0.15, hum_max + 0.15, str( hum_max ), ha='center', va='bottom', fontsize=10.5 ) # 标出最高相对湿度
plt.text( hum_min_hour + 0.15, hum_min + 0.15, str( hum_min ), ha='center', va='bottom', fontsize=10.5 ) # 标出最低相对湿度
plt.xticks( x )
plt.legend()
plt.title( '一天相对湿度变化曲线图' )
plt.xlabel( '时间/h' )
plt.ylabel( '百分比/%' )
plt.show()
def air_curve(data):
"""空气质量曲线绘制"""
hour = list( data['小时'] )
air = list( data['空气质量'] )
print( type( air[0] ) )
for i in range( 0, 24 ):
if math.isnan( air[i] ) == True:
air[i] = air[i - 1]
air_ave = sum( air ) / 24
air_max = max( air )
air_max_hour = hour[air.index( air_max )]
air_min = min( air )
air_min_hour = hour[air.index( air_min )]
x = []
y = []
for i in range( 0, 24 ):
x.append( i )
y.append( air[hour.index( i )] )
plt.figure( 3 )
for i in range( 0, 24 ):
if y[i] <= 50:
plt.bar( x[i], y[i], color='lightgreen', width=0.7 ) # 1等级
elif y[i] <= 100:
plt.bar( x[i], y[i], color='wheat', width=0.7 ) # 2等级
elif y[i] <= 150:
plt.bar( x[i], y[i], color='orange', width=0.7 ) # 3等级
elif y[i] <= 200:
plt.bar( x[i], y[i], color='orangered', width=0.7 ) # 4等级
elif y[i] <= 300:
plt.bar( x[i], y[i], color='darkviolet', width=0.7 ) # 5等级
elif y[i] > 300:
plt.bar( x[i], y[i], color='maroon', width=0.7 ) # 6等级
plt.plot( [0, 24], [air_ave, air_ave], c='black', linestyle='--' )
plt.text( air_max_hour + 0.15, air_max + 0.15, str( air_max ), ha='center', va='bottom', fontsize=10.5 ) # 标出最高空气质量
plt.text( air_min_hour + 0.15, air_min + 0.15, str( air_min ), ha='center', va='bottom', fontsize=10.5 ) # 标出最低空气质量
plt.xticks( x )
plt.title( '一天空气质量变化曲线图' )
plt.xlabel( '时间/h' )
plt.ylabel( '空气质量指数AQI' )
plt.show()
def wind_radar(data):
"""风向雷达图"""
wind = list( data['风力方向'] )
wind_speed = list( data['风级'] )
for i in range( 0, 24 ):
if wind[i] == "北风":
wind[i] = 90
elif wind[i] == "南风":
wind[i] = 270
elif wind[i] == "西风":
wind[i] = 180
elif wind[i] == "东风":
wind[i] = 360
elif wind[i] == "东北风":
wind[i] = 45
elif wind[i] == "西北风":
wind[i] = 135
elif wind[i] == "西南风":
wind[i] = 225
elif wind[i] == "东南风":
wind[i] = 315
degs = np.arange( 45, 361, 45 )
temp = []
for deg in degs:
speed = []
for i in range( 0, 24 ):
if wind[i] == deg:
speed.append( wind_speed[i] )
if len( speed ) == 0:
temp.append( 0 )
else:
temp.append( sum( speed ) / len( speed ) )
print( temp )
N = 8
theta = np.arange( 0. + np.pi / 8, 2 * np.pi + np.pi / 8, 2 * np.pi / 8 )
radii = np.array( temp )
plt.axes( polar=True )
colors = [(1 - x / max( temp ), 1 - x / max( temp ), 0.6) for x in radii]
plt.bar( theta, radii, width=(2 * np.pi / N), bottom=0.0, color=colors )
plt.title( '一天风级图', x=0.2, fontsize=20 )
plt.show()
def calc_corr(a, b):
"""计算相关系数"""
a_avg = sum( a ) / len( a )
b_avg = sum( b ) / len( b )
cov_ab = sum( [(x - a_avg) * (y - b_avg) for x, y in zip( a, b )] )
sq = math.sqrt( sum( [(x - a_avg) ** 2 for x in a] ) * sum( [(x - b_avg) ** 2 for x in b] ) )
corr_factor = cov_ab / sq
return corr_factor
def corr_tem_hum(data):
"""温湿度相关性分析"""
tem = data['温度']
hum = data['相对湿度']
plt.scatter( tem, hum, color='blue' )
plt.title( "温湿度相关性分析图" )
plt.xlabel( "温度/℃" )
plt.ylabel( "相对湿度/%" )
plt.text( 20, 40, "相关系数为:" + str( calc_corr( tem, hum ) ), fontdict={'size': '10', 'color': 'red'} )
plt.show()
print( "相关系数为:" + str( calc_corr( tem, hum ) ) )
def main():
plt.rcParams['font.sans-serif'] = ['SimHei']
plt.rcParams['axes.unicode_minus'] = False
data1 = pd.read_csv( 'weather1.csv', encoding='gb2312' )
print( data1 )
tem_curve( data1 )
hum_curve( data1 )
air_curve( data1 )
wind_radar( data1 )
corr_tem_hum( data1 )
if __name__ == '__main__':
main()
# weather.py
import requests
from bs4 import BeautifulSoup
import csv
import json
def getHTMLtext(url):
"""请求获得网页内容"""
try:
r = requests.get( url, timeout=30 )
r.raise_for_status()
r.encoding = r.apparent_encoding
print( "成功访问" )
return r.text
except:
print( "访问错误" )
return " "
def get_content(html):
"""处理得到有用信息保存数据文件"""
final = [] # 初始化一个列表保存数据
bs = BeautifulSoup( html, "html.parser" )
body = bs.body
data = body.find( 'div', {'id': '7d'} )
# 下面爬取当天的数据
data2 = body.find_all( 'div', {'class': 'left-div'} )
text = data2[2].find( 'script' ).string
text = text[text.index( '=' ) + 1:-2]
jd = json.loads( text )
dayone = jd['od']['od2'] # 找到当天的数据
final_day = [] # 存放当天的数据
count = 0
for i in dayone:
temp = []
if count <= 23:
temp.append( i['od21'] ) # 添加时间
temp.append( i['od22'] ) # 添加当前时刻温度
temp.append( i['od24'] ) # 添加当前时刻风力方向
temp.append( i['od25'] ) # 添加当前时刻风级
temp.append( i['od26'] ) # 添加当前时刻降水量
temp.append( i['od27'] ) # 添加当前时刻相对湿度
temp.append( i['od28'] ) # 添加当前时刻控制质量
# print(temp)
final_day.append( temp )
count = count + 1
# 下面爬取7天的数据
ul = data.find( 'ul' ) # 找到所有的ul标签
li = ul.find_all( 'li' ) # 找到左右的li标签
i = 0 # 控制爬取的天数
for day in li: # 遍历找到的每一个li
if i < 7 and i > 0:
temp = [] # 临时存放每天的数据
date = day.find( 'h1' ).string # 得到日期
date = date[0:date.index( '日' )] # 取出日期号
temp.append( date )
inf = day.find_all( 'p' ) # 找出li下面的p标签,提取第一个p标签的值,即天气
temp.append( inf[0].string )
tem_low = inf[1].find( 'i' ).string # 找到最低气温
if inf[1].find( 'span' ) is None: # 天气预报可能没有最高气温
tem_high = None
else:
tem_high = inf[1].find( 'span' ).string # 找到最高气温
temp.append( tem_low[:-1] )
if tem_high[-1] == '℃':
temp.append( tem_high[:-1] )
else:
temp.append( tem_high )
wind = inf[2].find_all( 'span' ) # 找到风向
for j in wind:
temp.append( j['title'] )
wind_scale = inf[2].find( 'i' ).string # 找到风级
index1 = wind_scale.index( '级' )
temp.append( int( wind_scale[index1 - 1:index1] ) )
final.append( temp )
i = i + 1
return final_day, final
# print(final)
def get_content2(html):
"""处理得到有用信息保存数据文件"""
final = [] # 初始化一个列表保存数据
bs = BeautifulSoup( html, "html.parser" ) # 创建BeautifulSoup对象
body = bs.body
data = body.find( 'div', {'id': '15d'} ) # 找到div标签且id = 15d
ul = data.find( 'ul' ) # 找到所有的ul标签
li = ul.find_all( 'li' ) # 找到左右的li标签
final = []
i = 0 # 控制爬取的天数
for day in li: # 遍历找到的每一个li
if i < 8:
temp = [] #
date = day.find( 'span', {'class': 'time'} ).string
date = date[date.index( '(' ) + 1:-2]
temp.append( date )
weather = day.find( 'span', {'class': 'wea'} ).string
temp.append( weather )
tem = day.find( 'span', {'class': 'tem'} ).text
temp.append( tem[tem.index( '/' ) + 1:-1] )
temp.append( tem[:tem.index( '/' ) - 1] )
wind = day.find( 'span', {'class': 'wind'} ).string
if '转' in wind:
temp.append( wind[:wind.index( '转' )] )
temp.append( wind[wind.index( '转' ) + 1:] )
else:
temp.append( wind )
temp.append( wind )
wind_scale = day.find( 'span', {'class': 'wind1'} ).string
index1 = wind_scale.index( '级' )
temp.append( int( wind_scale[index1 - 1:index1] ) )
final.append( temp )
return final
def write_to_csv(file_name, data, day=14):
"""保存为csv文件"""
with open( file_name, 'a', errors='ignore', newline='' ) as f:
if day == 14:
header = ['日期', '天气', '最低气温', '最高气温', '风向1', '风向2', '风级']
else:
header = ['小时', '温度', '风力方向', '风级', '降水量', '相对湿度', '空气质量']
f_csv = csv.writer( f )
f_csv.writerow( header )
f_csv.writerows( data )
def main():
"""主函数"""
print( "Weather test" )
# 珠海
url1 = 'http://www.weather.com.cn/weather/101280701.shtml'
url2 = 'http://www.weather.com.cn/weather15d/101280701.shtml'
html1 = getHTMLtext( url1 )
data1, data1_7 = get_content( html1 )
html2 = getHTMLtext( url2 )
data8_14 = get_content2( html2 )
data14 = data1_7 + data8_14
# print(data)
write_to_csv( 'weather14.csv', data14, 14 )
write_to_csv( 'weather1.csv', data1, 1 )
if __name__ == '__main__':
main()