数据准备
import pandas as pd
df=pd.read_csv('data/pandas/beijing_tianqi/beijing_tianqi_2018.csv')
df.head()
df["bWendu"]=df["bWendu"].str.replace("℃","").astype('int32')
df["yWendu"]=df["yWendu"].str.replace("℃","").astype('int32')
df.head()
常用汇总函数
df.describe()
df['bWendu'].mean()
df['bWendu'].max()
df['bWendu'].min()
重复项判断,按值计数
df['fengxiang'].unique()
df['fengxiang'].value_counts()
df['bWendu'].duplicated().any()
df['ymd'].duplicated().any()
df['bWendu'].drop_duplicates()
df[df['bWendu'].duplicated() == True]
相关系数和协方差
df.cov()
df.corr()