import numpy as np
import matplotlib.pylab as plt
import pandas as pd 
import os 
导入数据
file=os.walk(r'D:\机器学习\kaggle预测\商店销售')
filename_=[]
filename_data=[]
for root,sub,filename in file:
    for i in filename:
        path=os.path.join(root,i)
        filename_.append(i)
        filename_data.append(pd.read_csv(path))
    
[i for i in filename_]
['holidays_events.csv',
 'oil.csv',
 'sample_submission.csv',
 'stores.csv',
 'test.csv',
 'train.csv',
 'transactions.csv']
#合并数据
df_train1=filename_data[5].merge(filename_data[0],on='date',how='left')
df_train1=df_train1.merge(filename_data[1],on='date',how='left')
df_train1=df_train1.merge(filename_data[3],on='store_nbr',how='left')
df_train1=df_train1.merge(filename_data[6],on=['date','store_nbr'],how='left')
df_train1=df_train1.rename(columns={'type_x':'holiday_type','type_y':'store_type'})
df_train1.head()
| id | date | store_nbr | family | sales | onpromotion | holiday_type | locale | locale_name | description | transferred | dcoilwtico | city | state | store_type | cluster | transactions | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 0 | 2013-01-01 | 1 | AUTOMOTIVE | 0.000 | 0 | Holiday | National | Ecuador | Primer dia del ano | False | NaN | Quito | Pichincha | D | 13 | NaN | 
| 1 | 1 | 2013-01-01 | 1 | BABY CARE | 0.000 | 0 | Holiday | National | Ecuador | Primer dia del ano | False | NaN | Quito | Pichincha | D | 13 | NaN | 
| 2 | 2 | 2013-01-01 | 1 | BEAUTY | 0.000 | 0 | Holiday | National | Ecuador | Primer dia del ano | False | NaN | Quito | Pichincha | D | 13 | NaN | 
| 3 | 3 | 2013-01-01 | 1 | BEVERAGES | 0.000 | 0 | Holiday | National | Ecuador | Primer dia del ano | False | NaN | Quito | Pichincha | D | 13 | NaN | 
| 4 | 4 | 2013-01-01 | 1 | BOOKS | 0.000 | 0 | Holiday | National | Ecuador | Primer dia del ano | False | NaN | Quito | Pichincha | D | 13 | NaN | 
#处理日期:
df_train1['date']=pd.to_datetime(df_train1['date'])
df_train1['year']=df_train1['date'].dt.year
df_train1['month']=df_train1['date'].dt.month
df_train1['week']=df_train1['date'].dt.isocalendar().week
df_train1['quarter']=df_train1['date'].dt.quarter
df_train1['day_of_week']=df_train1['date'].dt.day_name()
df_train1.head()
| id | date | store_nbr | family | sales | onpromotion | holiday_type | locale | locale_name | description | ... | city | state | store_type | cluster | transactions | year | month | week | quarter | day_of_week | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 0 | 2013-01-01 | 1 | AUTOMOTIVE | 0.0 | 0 | Holiday | National | Ecuador | Primer dia del ano | ... | Quito | Pichincha | D | 13 | NaN | 2013 | 1 | 1 | 1 | Tuesday | 
| 1 | 1 | 2013-01-01 | 1 | BABY CARE | 0.0 | 0 | Holiday | National | Ecuador | Primer dia del ano | ... | Quito | Pichincha | D | 13 | NaN | 2013 | 1 | 1 | 1 | Tuesday | 
| 2 | 2 | 2013-01-01 | 1 | BEAUTY | 0.0 | 0 | Holiday | National | Ecuador | Primer dia del ano | ... | Quito | Pichincha | D | 13 | NaN | 2013 | 1 | 1 | 1 | Tuesday | 
| 3 | 3 | 2013-01-01 | 1 | BEVERAGES | 0.0 | 0 | Holiday | National | Ecuador | Primer dia del ano | ... | Quito | Pichincha | D | 13 | NaN | 2013 | 1 | 1 | 1 | Tuesday | 
| 4 | 4 | 2013-01-01 | 1 | BOOKS | 0.0 | 0 | Holiday | National | Ecuador | Primer dia del ano | ... | Quito | Pichincha | D | 13 | NaN | 2013 | 1 | 1 | 1 | Tuesday | 
5 rows × 22 columns
store_nbr、family、cluster绘总
#将每个store_type的销量求平均值
df_st_sa=df_train1.groupby('store_type').agg({'sales':'mean'}).reset_index().sort_values(by='sales',ascending=False)
df_st_sa
| store_type | sales | |
|---|---|---|
| 0 | A | 708.378165 | 
| 3 | D | 352.084510 | 
| 1 | B | 328.275233 | 
| 4 | E | 270.285490 | 
| 2 | C | 197.790647 | 
#对每一个family求均值并排序
df_fa_sa=df_train1.groupby('family').agg({'sales':'mean'}).reset_index().sort_values(by='sales',ascending=False)[:10]
df_fa_sa
| family | sales | |
|---|---|---|
| 12 | GROCERY I | 3790.432797 | 
| 3 | BEVERAGES | 2394.912701 | 
| 30 | PRODUCE | 1355.373698 | 
| 7 | CLEANING | 1074.171518 | 
| 8 | DAIRY | 711.175991 | 
| 5 | BREAD/BAKERY | 464.150612 | 
| 28 | POULTRY | 351.078816 | 
| 24 | MEATS | 341.965905 | 
| 25 | PERSONAL CARE | 271.192381 | 
| 9 | DELI | 265.629746 | 
#对每一个cluster求均值并排序
df_cl_sa=df_train1.groupby('cluster').agg({'sales':'mean'}).reset_index()
df_cl_sa.head()
| cluster | sales | |
|---|---|---|
| 0 | 1 | 327.022808 | 
| 1 | 2 | 261.025731 | 
| 2 | 3 | 194.926534 | 
| 3 | 4 | 297.537877 | 
| 4 | 5 | 1120.118405 | 
from matplotlib.gridspec import GridSpec
plt.figure(figsize=(12,8))
gs=GridSpec(2,2)
ax=plt.subplot(gs[1:,:2])
# plt.barh()
ax.bar(range(df_cl_sa.shape[0]),df_cl_sa.iloc[:,1],width=0.5)
ax.set_title('Clusters VS s Sales')
ax=plt.subplot(gs[:1,:1])
ax.barh(df_fa_sa.iloc[:,0],df_fa_sa.iloc[:,1])
ax.set_title('Average Sales Familys')
ax=plt.subplot(gs[:1,1])
ax.pie(df_st_sa.iloc[:,1],wedgeprops={'width':0.3},labels=df_st_sa.iloc[:,0])
ax.set_title('Highest Sales Stores')
plt.show()

月销售量绘总
#将每年的数据按月求均值得到新的数组
df_2013=df_train1[df_train1['year']==2013][['month','sales']]
df_2013=df_2013.groupby('month').agg({'sales':'mean'}).reset_index().rename(columns={'sales':'s13'})
df_2014=df_train1[df_train1['year']==2014][['month','sales']]
df_2014=df_2014.groupby('month').agg({'sales':'mean'}).reset_index().rename(columns={'sales':'s14'})
df_2015=df_train1[df_train1['year']==2015][['month','sales']]
df_2015=df_2015.groupby('month').agg({'sales':'mean'}).reset_index().rename(columns={'sales':'s15'})
df_2016=df_train1[df_train1['year']==2016][['month','sales']]
df_2016=df_2016.groupby('month').agg({'sales':'mean'}).reset_index().rename(columns={'sales':'s16'})
df_2017=df_train1[df_train1['year']==2017][['month','sales']]
df_2017=df_2017.groupby('month').agg({'sales':'mean'}).reset_index()
#补充2017年后面几个月的数据
df_2017_no=pd.DataFrame({'month':[9,10,11,12],'sales':[0,0,0,0]})
df_2017=df_2017.append(df_2017_no).rename(columns={'sales':'s17'})
# print(df_2017)
#将不同年份的数据合并起来
df_year=df_2013.merge(df_2014,on='month').merge(df_2015,on='month').merge(df_2016,on='month').merge(df_2017,on='month')
# tob_labels=['2013','2014','2015','2016','2017']
df_year
| month | s13 | s14 | s15 | s16 | s17 | |
|---|---|---|---|---|---|---|
| 0 | 1 | 186.952405 | 342.341709 | 269.666595 | 434.050268 | 476.596791 | 
| 1 | 2 | 193.581846 | 241.268892 | 275.420792 | 424.695398 | 465.971468 | 
| 2 | 3 | 206.880581 | 368.661236 | 282.368624 | 418.735398 | 483.400632 | 
| 3 | 4 | 205.639071 | 240.577087 | 279.743138 | 488.108774 | 482.172948 | 
| 4 | 5 | 210.184563 | 242.203129 | 320.958116 | 457.671398 | 487.162797 | 
| 5 | 6 | 215.691343 | 244.634652 | 397.249619 | 419.644575 | 488.707278 | 
| 6 | 7 | 203.983455 | 350.830102 | 403.030170 | 432.562218 | 489.909880 | 
| 7 | 8 | 212.479434 | 251.351805 | 415.692304 | 406.437390 | 465.144891 | 
| 8 | 9 | 220.593588 | 374.530792 | 434.734053 | 419.331240 | 0.000000 | 
| 9 | 10 | 213.164266 | 369.213666 | 432.248428 | 435.002169 | 0.000000 | 
| 10 | 11 | 231.136537 | 384.056027 | 426.579749 | 462.916675 | 0.000000 | 
| 11 | 12 | 298.675144 | 459.818606 | 513.845328 | 557.114822 | 0.000000 | 
#将除月份这一列的值赋给新的datafram
df_year=df_year[['s13','s14','s15','s16','s17']].replace(np.nan,0)
df_year
| s13 | s14 | s15 | s16 | s17 | |
|---|---|---|---|---|---|
| Jan | 186.952405 | 342.341709 | 269.666595 | 434.050268 | 476.596791 | 
| Feb | 193.581846 | 241.268892 | 275.420792 | 424.695398 | 465.971468 | 
| Mar | 206.880581 | 368.661236 | 282.368624 | 418.735398 | 483.400632 | 
| Apr | 205.639071 | 240.577087 | 279.743138 | 488.108774 | 482.172948 | 
| May | 210.184563 | 242.203129 | 320.958116 | 457.671398 | 487.162797 | 
| Jun | 215.691343 | 244.634652 | 397.249619 | 419.644575 | 488.707278 | 
| Ju1 | 203.983455 | 350.830102 | 403.030170 | 432.562218 | 489.909880 | 
| Aug | 212.479434 | 251.351805 | 415.692304 | 406.437390 | 465.144891 | 
| Sep | 220.593588 | 374.530792 | 434.734053 | 419.331240 | 0.000000 | 
| Oct | 213.164266 | 369.213666 | 432.248428 | 435.002169 | 0.000000 | 
| Nov | 231.136537 | 384.056027 | 426.579749 | 462.916675 | 0.000000 | 
| Dec | 298.675144 | 459.818606 | 513.845328 | 557.114822 | 0.000000 | 
df_year.index=['Jan','Feb','Mar','Apr','May','Jun','Ju1','Aug','Sep','Oct','Nov','Dec']
y_data=df_2013['month'].tolist()#转化为一个列表
df_year
| s13 | s14 | s15 | s16 | s17 | |
|---|---|---|---|---|---|
| Jan | 186.952405 | 342.341709 | 269.666595 | 434.050268 | 476.596791 | 
| Feb | 193.581846 | 241.268892 | 275.420792 | 424.695398 | 465.971468 | 
| Mar | 206.880581 | 368.661236 | 282.368624 | 418.735398 | 483.400632 | 
| Apr | 205.639071 | 240.577087 | 279.743138 | 488.108774 | 482.172948 | 
| May | 210.184563 | 242.203129 | 320.958116 | 457.671398 | 487.162797 | 
| Jun | 215.691343 | 244.634652 | 397.249619 | 419.644575 | 488.707278 | 
| Ju1 | 203.983455 | 350.830102 | 403.030170 | 432.562218 | 489.909880 | 
| Aug | 212.479434 | 251.351805 | 415.692304 | 406.437390 | 465.144891 | 
| Sep | 220.593588 | 374.530792 | 434.734053 | 419.331240 | 0.000000 | 
| Oct | 213.164266 | 369.213666 | 432.248428 | 435.002169 | 0.000000 | 
| Nov | 231.136537 | 384.056027 | 426.579749 | 462.916675 | 0.000000 | 
| Dec | 298.675144 | 459.818606 | 513.845328 | 557.114822 | 0.000000 | 
#画每年不同月份,平均销售
plt.figure(figsize=(12,8))
plt.barh(df_year.index,df_year.iloc[:,0],label='2013')
plt.text(100,12.5,'2013')
plt.barh(df_year.index,df_year.iloc[:,1],left=df_year.iloc[:,0],label='2014')
plt.text(500,12.5,'2014')
plt.barh(df_year.index,df_year.iloc[:,2],left=df_year.iloc[:,0]+df_year.iloc[:,1],label='2015')
plt.text(1000,12.5,'2015')
plt.barh(df_year.index,df_year.iloc[:,3],left=df_year.iloc[:,0]+df_year.iloc[:,1]+df_year.iloc[:,2],label='2016')
plt.text(1300,12.5,'2016')
plt.barh(df_year.index,df_year.iloc[:,4],left=df_year.iloc[:,0]+df_year.iloc[:,1]+df_year.iloc[:,2]+df_year.iloc[:,3],label='2017')
plt.text(1700,12.5,'2017')
plt.title('Avg Sales for Each Year',loc='left',y=1.08,fontsize=15)
plt.legend()
plt.show()

月、季、周,星期几绘总
#绘制每个月,每个季度,每个周的平均销售量
import calendar
df_m_sa=df_train1.groupby('month').agg({'sales':'mean'}).reset_index()
df_m_sa['sales']=round(df_m_sa['sales'],2)           #处理sales函数小数位数
df_m_sa['month_text']=df_m_sa['month'].apply(lambda x: calendar.month_abbr[x])#将数值月份转化为文字形
df_m_sa['text']=df_m_sa['month_text']+'-'+df_m_sa['sales'].astype(str)
df_w_sa=df_train1.groupby('week').agg({'sales':'mean'}).reset_index()
df_q_sa=df_train1.groupby('quarter').agg({'sales':'mean'}).reset_index()
df_m_sa.head(),df_w_sa.head(),df_q_sa.head
(    month   sales month_text        text
 0       1  341.92        Jan  Jan-341.92
 1       2  320.93        Feb  Feb-320.93
 2       3  352.01        Mar  Mar-352.01
 3       4  341.17        Apr  Apr-341.17
 4       5  345.65        May  May-345.65,
     week       sales
 0      1  409.099519
 1      2  347.534643
 2      3  338.142199
 3      4  329.186258
 4      5  344.195233,
    quarter       sales
 0        1  338.825392
 1        2  346.546038
 2        3  359.334098
 3        4  399.229622)
from matplotlib.gridspec import GridSpec
plt.figure(figsize=(12,8))
gs=GridSpec(2,2)
print(gs[:2,:2])
ax=plt.subplot(gs[:1,:1])
# plt.barh()
ax.barh(df_m_sa.iloc[:,2],df_m_sa.iloc[:,1])#为每个条形图添加标签要用循环
for a,b in enumerate(df_m_sa.iloc[:,1]):
    ax.text(b-100,a,df_m_sa.iloc[a,3])
ax.set_title('month wise avg sales analysis')
ax=plt.subplot(gs[:1,1])
ax.pie(df_q_sa.iloc[:,1],wedgeprops={'width':0.3},labels=df_q_sa.iloc[:,0],autopct='%1.2f%%',pctdistance=1.25)
ax.set_title('Quarter wise Avg Sales Analy')
ax=plt.subplot(gs[1:,:2])
ax.fill_between(df_w_sa.iloc[:,0],df_w_sa.iloc[:,1],alpha=0.6)
ax.plot(df_w_sa.iloc[:,0],df_w_sa.iloc[:,1],marker='o')
ax.set_title('Week wise Avg Sales Analysis')
# ax[0,1].set_title('Highest Sales Stores')
# ax[1,0].set_title('Clusters VS s Sales')
plt.show()
GridSpec(2, 2)[0:2, 0:2]

df_dw_sa=df_train1.groupby('day_of_week').agg({'sales':'mean'}).reset_index()
df_dw_sa['sales']=round(df_dw_sa['sales'],2)
df_dw_sa
| day_of_week | sales | |
|---|---|---|
| 0 | Friday | 326.73 | 
| 1 | Monday | 348.16 | 
| 2 | Saturday | 434.79 | 
| 3 | Sunday | 464.74 | 
| 4 | Thursday | 286.57 | 
| 5 | Tuesday | 319.92 | 
| 6 | Wednesday | 330.77 | 
plt.barh(df_dw_sa.iloc[:,0],df_dw_sa.iloc[:,1])#为每个条形图添加标签要用循环
for a,b in enumerate(df_dw_sa.iloc[:,1]):
    plt.text(b-50,a,df_dw_sa.iloc[a,1])
# plt.yticks(['Monday','Tuesday','Wednesday','Thursday','Friday','Saturday','Sunday'])
plt.title('Avg Sales VS Day of Week')
plt.show()

store_nbr与holiday关联
df_st_ht=df_train1.groupby(['store_type','holiday_type']).agg({'sales':'mean'}).reset_index()
df_st_ht['sales']=round(df_st_ht['sales'],2)
df_st_ht.head()
| store_type | holiday_type | sales | |
|---|---|---|---|
| 0 | A | Additional | 957.70 | 
| 1 | A | Bridge | 969.82 | 
| 2 | A | Event | 813.56 | 
| 3 | A | Holiday | 723.28 | 
| 4 | A | Transfer | 984.63 | 
plt.scatter(df_st_ht.iloc[:,0],df_st_ht.iloc[:,1],s=df_st_ht.iloc[:,2],c=df_st_ht.iloc[:,2],cmap='plasma')
plt.colorbar()
plt.text(4.7,5.5,'sales')
plt.xlim(-0.5,4.5)
plt.ylim(-0.5,5.5)
plt.title('Average Sales:Store Type vs holiday type ')
plt.show()

df_y_m_st=df_train1.groupby(['year','month','store_type']).agg({'sales':'mean'}).reset_index()
df_y_m_st['sales']=round(df_y_m_st['sales'],2)
df_y_m_st['month']=df_y_m_st['month'].apply(lambda x:calendar.month_abbr[x])
df_y_m_st.head()
| year | month | store_type | sales | |
|---|---|---|---|---|
| 0 | 2013 | Jan | A | 392.85 | 
| 1 | 2013 | Jan | B | 155.11 | 
| 2 | 2013 | Jan | C | 109.06 | 
| 3 | 2013 | Jan | D | 191.16 | 
| 4 | 2013 | Jan | E | 60.52 | 
280 rows × 4 columns
# a=df_y_m_st['year']==2013
# df_y_m_st.loc[a,'month']
store_nbr与holiday绘总不同年份
a=df_y_m_st['year']==2013
b=df_y_m_st['year']==2014
c=df_y_m_st['year']==2015
d=df_y_m_st['year']==2016
e=df_y_m_st['year']==2017
fig,ax=plt.subplots(5,1,figsize=(24,20))
ax[0].scatter(df_y_m_st.loc[a,'month'],df_y_m_st.loc[a,'store_type'],df_y_m_st.loc[a,'sales'],c=df_y_m_st.loc[a,'sales'],cmap='plasma')
ax[0].text(12,0.01,'year=2013',size=20,rotation='270')
ax[0].set_xticks([])
ax[1].scatter(df_y_m_st.loc[a,'month'],df_y_m_st.loc[a,'store_type'],df_y_m_st.loc[b,'sales'],c=df_y_m_st.loc[b,'sales'],cmap='plasma')
ax[1].text(12,0.01,'year=2014',size=20,rotation='270')
ax[1].set_xticks([])
ax[2].scatter(df_y_m_st.loc[a,'month'],df_y_m_st.loc[a,'store_type'],df_y_m_st.loc[c,'sales'],c=df_y_m_st.loc[c,'sales'],cmap='plasma')
ax[2].text(12,0.01,'year=2015',size=20,rotation='270')
ax[2].set_xticks([])
ax[3].scatter(df_y_m_st.loc[a,'month'],df_y_m_st.loc[a,'store_type'],df_y_m_st.loc[d,'sales'],c=df_y_m_st.loc[d,'sales'],cmap='plasma')
ax[3].text(12,0.01,'year=2016',size=20,rotation='270')
ax[3].set_xticks([])
ax[4].scatter(df_y_m_st.loc[e,'month'],df_y_m_st.loc[e,'store_type'],df_y_m_st.loc[e,'sales'],c=df_y_m_st.loc[e,'sales'],cmap='plasma')
ax[4].text(11.5,0.01,'year=2017',size=20,rotation='270')
ax[4].set_xticks(['Jan','Feb','Mar','Apr','May','Jun','Jul','Aug','Oct','Sep','Nve','Dec'])
for i in range(5):
    ax[i].spines['top'].set_visible(False)
    ax[i].spines['right'].set_visible(False)
    ax[i].spines['bottom'].set_visible(False)
    ax[i].spines['left'].set_visible(False)
    ax[i].set_ylim([-0.5,5])
    ax[i].set_xlim([-0.5,15])
    ax[i].tick_params(axis='both',which='major',labelsize=25)#改变坐标轴的大小
#     ax[i].set_colorbar()   
fig.colorbar(ax[0].scatter(df_y_m_st.loc[a,'month'],df_y_m_st.loc[a,'store_type'],df_y_m_st.loc[a,'sales'],c=df_y_m_st.loc[a,'sales'],cmap='plasma'), ax=[ax[0], ax[1],ax[2],ax[3],ax[4]], shrink=0.9)
plt.text(16,32,'sales',size=20)
plt.show()

month与holiday绘总
df_m_ht=df_train1.groupby(['month','holiday_type']).agg({'sales':'mean'}).reset_index()
df_m_ht['sales']=round(df_m_ht['sales'],2)
df_m_ht['month']=df_m_ht['month'].apply(lambda x :calendar.month_abbr[x])
plt.scatter(df_m_ht.iloc[:,0],df_m_ht.iloc[:,1],s=df_m_ht.iloc[:,2],c=df_m_ht.iloc[:,2],cmap='plasma')
plt.colorbar()
plt.text(12.5,6,'sales')
plt.xlim(-1,12)
plt.ylim(-0.5,5.5)
plt.title('Average Sales:Month vs holiday type ')
plt.show()

df_y_m_ht=df_train1.groupby(['year','month','holiday_type']).agg({'sales':'mean'}).reset_index()
df_y_m_ht['sales']=round(df_y_m_ht['sales'],2)
df_y_m_ht['month']=df_y_m_ht['month'].apply(lambda x:calendar.month_abbr[x])
df_y_m_ht.head()
| year | month | holiday_type | sales | |
|---|---|---|---|---|
| 0 | 2013 | Jan | Holiday | 1.41 | 
| 1 | 2013 | Jan | Work Day | 247.08 | 
| 2 | 2013 | Feb | Holiday | 164.82 | 
| 3 | 2013 | Mar | Holiday | 307.44 | 
| 4 | 2013 | Apr | Holiday | 228.52 | 
96 rows × 4 columns
month与holiday绘总不同年份
a=df_y_m_ht['year']==2013
b=df_y_m_ht['year']==2014
c=df_y_m_ht['year']==2015
d=df_y_m_ht['year']==2016
e=df_y_m_ht['year']==2017
fig,ax=plt.subplots(5,1,figsize=(24,20))
ax[0].scatter(df_y_m_ht.loc[a,'month'],df_y_m_ht.loc[a,'holiday_type'],df_y_m_ht.loc[a,'sales'],c=df_y_m_ht.loc[a,'sales'],cmap='plasma')
ax[0].text(12,0.01,'year=2013',size=20,rotation='270')
ax[0].set_xticks([])
ax[1].scatter(df_y_m_ht.loc[b,'month'],df_y_m_ht.loc[b,'holiday_type'],df_y_m_ht.loc[b,'sales'],c=df_y_m_ht.loc[b,'sales'],cmap='plasma')
ax[1].text(12,0.01,'year=2014',size=20,rotation='270')
ax[1].set_xticks([])
ax[2].scatter(df_y_m_ht.loc[c,'month'],df_y_m_ht.loc[c,'holiday_type'],df_y_m_ht.loc[c,'sales'],c=df_y_m_ht.loc[c,'sales'],cmap='plasma')
ax[2].text(12,0.01,'year=2015',size=20,rotation='270')
ax[2].set_xticks([])
ax[3].scatter(df_y_m_ht.loc[d,'month'],df_y_m_ht.loc[d,'holiday_type'],df_y_m_ht.loc[d,'sales'],c=df_y_m_ht.loc[d,'sales'],cmap='plasma')
ax[3].text(12,0.01,'year=2016',size=20,rotation='270')
ax[3].set_xticks([])
ax[4].scatter(df_y_m_ht.loc[e,'month'],df_y_m_ht.loc[e,'holiday_type'],df_y_m_ht.loc[e,'sales'],c=df_y_m_ht.loc[e,'sales'],cmap='plasma')
ax[4].text(11.5,0.01,'year=2017',size=20,rotation='270')
ax[4].set_xticks(['Jan','Feb','Mar','Apr','May','Jun','Jul','Aug','Oct','Sep','Nve','Dec'])
for i in range(5):
    ax[i].spines['top'].set_visible(False)
    ax[i].spines['right'].set_visible(False)
    ax[i].spines['bottom'].set_visible(False)
    ax[i].spines['left'].set_visible(False)
    ax[i].set_ylim([-0.5,6])
    ax[i].set_xlim([-0.5,12])
    ax[i].tick_params(axis='both',which='major',labelsize=25)#改变坐标轴的大小
#     ax[i].set_colorbar()   
fig.colorbar(ax[0].scatter(df_y_m_ht.loc[a,'month'],df_y_m_ht.loc[a,'holiday_type'],df_y_m_ht.loc[a,'sales'],c=df_y_m_ht.loc[a,'sales'],cmap='plasma'), ax=[ax[0], ax[1],ax[2],ax[3],ax[4]], shrink=0.9)
plt.text(16,32,'sales',size=20)
plt.show()











