删除数据
- drop(labels,axis=0,index,columns) 删除在axis轴上的指定labels索引标签的数据
- dropna(axis=None,how=‘any|all’)删除存在NAN值的行或列
- drop_duplicates(keeps=‘first|last’)删除重复行的数据,keep指定保留行的第一列还是最后一列
import numpy as np
import pandas as pd
from pandas import DataFrame,Series
df = DataFrame(np.random.randint(1,151,size=(10,5)),
columns=['MySQL','PostgreSQL','Oracle','MongoDB','SQLite'])
df
| MySQL | PostgreSQL | Oracle | MongoDB | SQLite |
|---|
| 0 | 136 | 28 | 108 | 18 | 132 |
| 1 | 53 | 12 | 137 | 45 | 14 |
| 2 | 61 | 59 | 132 | 74 | 103 |
| 3 | 150 | 106 | 4 | 74 | 61 |
| 4 | 50 | 95 | 135 | 78 | 140 |
| 5 | 70 | 112 | 62 | 62 | 128 |
| 6 | 125 | 133 | 138 | 70 | 135 |
| 7 | 46 | 39 | 71 | 50 | 117 |
| 8 | 50 | 119 | 59 | 89 | 130 |
| 9 | 122 | 93 | 29 | 56 | 16 |
df.loc[10] = df.loc[6]
df.loc[11] = df.loc[7]
df
| MySQL | PostgreSQL | Oracle | MongoDB | SQLite |
|---|
| 0 | 136 | 28 | 108 | 18 | 132 |
| 1 | 53 | 12 | 137 | 45 | 14 |
| 2 | 61 | 59 | 132 | 74 | 103 |
| 3 | 150 | 106 | 4 | 74 | 61 |
| 4 | 50 | 95 | 135 | 78 | 140 |
| 5 | 70 | 112 | 62 | 62 | 128 |
| 6 | 125 | 133 | 138 | 70 | 135 |
| 7 | 46 | 39 | 71 | 50 | 117 |
| 8 | 50 | 119 | 59 | 89 | 130 |
| 9 | 122 | 93 | 29 | 56 | 16 |
| 10 | 125 | 133 | 138 | 70 | 135 |
| 11 | 46 | 39 | 71 | 50 | 117 |
df.drop(labels=['PostgreSQL','MongoDB'],axis='columns')
| MySQL | Oracle | SQLite |
|---|
| 0 | 136 | 108 | 132 |
| 1 | 53 | 137 | 14 |
| 2 | 61 | 132 | 103 |
| 3 | 150 | 4 | 61 |
| 4 | 50 | 135 | 140 |
| 5 | 70 | 62 | 128 |
| 6 | 125 | 138 | 135 |
| 7 | 46 | 71 | 117 |
| 8 | 50 | 59 | 130 |
| 9 | 122 | 29 | 16 |
| 10 | 125 | 138 | 135 |
| 11 | 46 | 71 | 117 |
df.drop(labels=[3,4],axis=0)
| MySQL | PostgreSQL | Oracle | MongoDB | SQLite |
|---|
| 0 | 136 | 28 | 108 | 18 | 132 |
| 1 | 53 | 12 | 137 | 45 | 14 |
| 2 | 61 | 59 | 132 | 74 | 103 |
| 5 | 70 | 112 | 62 | 62 | 128 |
| 6 | 125 | 133 | 138 | 70 | 135 |
| 7 | 46 | 39 | 71 | 50 | 117 |
| 8 | 50 | 119 | 59 | 89 | 130 |
| 9 | 122 | 93 | 29 | 56 | 16 |
| 10 | 125 | 133 | 138 | 70 | 135 |
| 11 | 46 | 39 | 71 | 50 | 117 |
df
| MySQL | PostgreSQL | Oracle | MongoDB | SQLite |
|---|
| 0 | 136 | 28 | 108 | 18 | 132 |
| 1 | 53 | 12 | 137 | 45 | 14 |
| 2 | 61 | 59 | 132 | 74 | 103 |
| 3 | 150 | 106 | 4 | 74 | 61 |
| 4 | 50 | 95 | 135 | 78 | 140 |
| 5 | 70 | 112 | 62 | 62 | 128 |
| 6 | 125 | 133 | 138 | 70 | 135 |
| 7 | 46 | 39 | 71 | 50 | 117 |
| 8 | 50 | 119 | 59 | 89 | 130 |
| 9 | 122 | 93 | 29 | 56 | 16 |
| 10 | 125 | 133 | 138 | 70 | 135 |
| 11 | 46 | 39 | 71 | 50 | 117 |
df.drop(index=[3,4],columns=['PostgreSQL','MongoDB'],inplace=False)
| MySQL | Oracle | SQLite |
|---|
| 0 | 136 | 108 | 132 |
| 1 | 53 | 137 | 14 |
| 2 | 61 | 132 | 103 |
| 5 | 70 | 62 | 128 |
| 6 | 125 | 138 | 135 |
| 7 | 46 | 71 | 117 |
| 8 | 50 | 59 | 130 |
| 9 | 122 | 29 | 16 |
| 10 | 125 | 138 | 135 |
| 11 | 46 | 71 | 117 |
df
| MySQL | PostgreSQL | Oracle | MongoDB | SQLite |
|---|
| 0 | 136 | 28 | 108 | 18 | 132 |
| 1 | 53 | 12 | 137 | 45 | 14 |
| 2 | 61 | 59 | 132 | 74 | 103 |
| 3 | 150 | 106 | 4 | 74 | 61 |
| 4 | 50 | 95 | 135 | 78 | 140 |
| 5 | 70 | 112 | 62 | 62 | 128 |
| 6 | 125 | 133 | 138 | 70 | 135 |
| 7 | 46 | 39 | 71 | 50 | 117 |
| 8 | 50 | 119 | 59 | 89 | 130 |
| 9 | 122 | 93 | 29 | 56 | 16 |
| 10 | 125 | 133 | 138 | 70 | 135 |
| 11 | 46 | 39 | 71 | 50 | 117 |
df.drop([1,3])
| MySQL | PostgreSQL | Oracle | MongoDB | SQLite |
|---|
| 0 | 136 | 28 | 108 | 18 | 132 |
| 2 | 61 | 59 | 132 | 74 | 103 |
| 4 | 50 | 95 | 135 | 78 | 140 |
| 5 | 70 | 112 | 62 | 62 | 128 |
| 6 | 125 | 133 | 138 | 70 | 135 |
| 7 | 46 | 39 | 71 | 50 | 117 |
| 8 | 50 | 119 | 59 | 89 | 130 |
| 9 | 122 | 93 | 29 | 56 | 16 |
| 10 | 125 | 133 | 138 | 70 | 135 |
| 11 | 46 | 39 | 71 | 50 | 117 |
df
| MySQL | PostgreSQL | Oracle | MongoDB | SQLite |
|---|
| 0 | 136 | 28 | 108 | 18 | 132 |
| 1 | 53 | 12 | 137 | 45 | 14 |
| 2 | 61 | 59 | 132 | 74 | 103 |
| 3 | 150 | 106 | 4 | 74 | 61 |
| 4 | 50 | 95 | 135 | 78 | 140 |
| 5 | 70 | 112 | 62 | 62 | 128 |
| 6 | 125 | 133 | 138 | 70 | 135 |
| 7 | 46 | 39 | 71 | 50 | 117 |
| 8 | 50 | 119 | 59 | 89 | 130 |
| 9 | 122 | 93 | 29 | 56 | 16 |
| 10 | 125 | 133 | 138 | 70 | 135 |
| 11 | 46 | 39 | 71 | 50 | 117 |
df.loc[3,'MySQL'] = np.nan
df
| MySQL | PostgreSQL | Oracle | MongoDB | SQLite |
|---|
| 0 | 136.0 | 28 | 108 | 18 | 132 |
| 1 | 53.0 | 12 | 137 | 45 | 14 |
| 2 | 61.0 | 59 | 132 | 74 | 103 |
| 3 | NaN | 106 | 4 | 74 | 61 |
| 4 | 50.0 | 95 | 135 | 78 | 140 |
| 5 | 70.0 | 112 | 62 | 62 | 128 |
| 6 | 125.0 | 133 | 138 | 70 | 135 |
| 7 | 46.0 | 39 | 71 | 50 | 117 |
| 8 | 50.0 | 119 | 59 | 89 | 130 |
| 9 | 122.0 | 93 | 29 | 56 | 16 |
| 10 | 125.0 | 133 | 138 | 70 | 135 |
| 11 | 46.0 | 39 | 71 | 50 | 117 |
df.dropna(axis=0,how='any')
| MySQL | PostgreSQL | Oracle | MongoDB | SQLite |
|---|
| 0 | 136.0 | 28 | 108 | 18 | 132 |
| 1 | 53.0 | 12 | 137 | 45 | 14 |
| 2 | 61.0 | 59 | 132 | 74 | 103 |
| 4 | 50.0 | 95 | 135 | 78 | 140 |
| 5 | 70.0 | 112 | 62 | 62 | 128 |
| 6 | 125.0 | 133 | 138 | 70 | 135 |
| 7 | 46.0 | 39 | 71 | 50 | 117 |
| 8 | 50.0 | 119 | 59 | 89 | 130 |
| 9 | 122.0 | 93 | 29 | 56 | 16 |
| 10 | 125.0 | 133 | 138 | 70 | 135 |
| 11 | 46.0 | 39 | 71 | 50 | 117 |
df.dropna(axis=0,how='all')
| MySQL | PostgreSQL | Oracle | MongoDB | SQLite |
|---|
| 0 | 136.0 | 28 | 108 | 18 | 132 |
| 1 | 53.0 | 12 | 137 | 45 | 14 |
| 2 | 61.0 | 59 | 132 | 74 | 103 |
| 3 | NaN | 106 | 4 | 74 | 61 |
| 4 | 50.0 | 95 | 135 | 78 | 140 |
| 5 | 70.0 | 112 | 62 | 62 | 128 |
| 6 | 125.0 | 133 | 138 | 70 | 135 |
| 7 | 46.0 | 39 | 71 | 50 | 117 |
| 8 | 50.0 | 119 | 59 | 89 | 130 |
| 9 | 122.0 | 93 | 29 | 56 | 16 |
| 10 | 125.0 | 133 | 138 | 70 | 135 |
| 11 | 46.0 | 39 | 71 | 50 | 117 |
df
| MySQL | PostgreSQL | Oracle | MongoDB | SQLite |
|---|
| 0 | 136.0 | 28 | 108 | 18 | 132 |
| 1 | 53.0 | 12 | 137 | 45 | 14 |
| 2 | 61.0 | 59 | 132 | 74 | 103 |
| 3 | NaN | 106 | 4 | 74 | 61 |
| 4 | 50.0 | 95 | 135 | 78 | 140 |
| 5 | 70.0 | 112 | 62 | 62 | 128 |
| 6 | 125.0 | 133 | 138 | 70 | 135 |
| 7 | 46.0 | 39 | 71 | 50 | 117 |
| 8 | 50.0 | 119 | 59 | 89 | 130 |
| 9 | 122.0 | 93 | 29 | 56 | 16 |
| 10 | 125.0 | 133 | 138 | 70 | 135 |
| 11 | 46.0 | 39 | 71 | 50 | 117 |
df.drop_duplicates()
| MySQL | PostgreSQL | Oracle | MongoDB | SQLite |
|---|
| 0 | 136.0 | 28 | 108 | 18 | 132 |
| 1 | 53.0 | 12 | 137 | 45 | 14 |
| 2 | 61.0 | 59 | 132 | 74 | 103 |
| 3 | NaN | 106 | 4 | 74 | 61 |
| 4 | 50.0 | 95 | 135 | 78 | 140 |
| 5 | 70.0 | 112 | 62 | 62 | 128 |
| 6 | 125.0 | 133 | 138 | 70 | 135 |
| 7 | 46.0 | 39 | 71 | 50 | 117 |
| 8 | 50.0 | 119 | 59 | 89 | 130 |
| 9 | 122.0 | 93 | 29 | 56 | 16 |
df
| MySQL | PostgreSQL | Oracle | MongoDB | SQLite |
|---|
| 0 | 136.0 | 28 | 108 | 18 | 132 |
| 1 | 53.0 | 12 | 137 | 45 | 14 |
| 2 | 61.0 | 59 | 132 | 74 | 103 |
| 3 | NaN | 106 | 4 | 74 | 61 |
| 4 | 50.0 | 95 | 135 | 78 | 140 |
| 5 | 70.0 | 112 | 62 | 62 | 128 |
| 6 | 125.0 | 133 | 138 | 70 | 135 |
| 7 | 46.0 | 39 | 71 | 50 | 117 |
| 8 | 50.0 | 119 | 59 | 89 | 130 |
| 9 | 122.0 | 93 | 29 | 56 | 16 |
| 10 | 125.0 | 133 | 138 | 70 | 135 |
| 11 | 46.0 | 39 | 71 | 50 | 117 |
df.drop_duplicates(keep="last")
| MySQL | PostgreSQL | Oracle | MongoDB | SQLite |
|---|
| 0 | 136.0 | 28 | 108 | 18 | 132 |
| 1 | 53.0 | 12 | 137 | 45 | 14 |
| 2 | 61.0 | 59 | 132 | 74 | 103 |
| 3 | NaN | 106 | 4 | 74 | 61 |
| 4 | 50.0 | 95 | 135 | 78 | 140 |
| 5 | 70.0 | 112 | 62 | 62 | 128 |
| 8 | 50.0 | 119 | 59 | 89 | 130 |
| 9 | 122.0 | 93 | 29 | 56 | 16 |
| 10 | 125.0 | 133 | 138 | 70 | 135 |
| 11 | 46.0 | 39 | 71 | 50 | 117 |
df.duplicated()
0 False
1 False
2 False
3 False
4 False
5 False
6 False
7 False
8 False
9 False
10 True
11 True
dtype: bool
df[df.duplicated()]
| MySQL | PostgreSQL | Oracle | MongoDB | SQLite |
|---|
| 10 | 125.0 | 133 | 138 | 70 | 135 |
| 11 | 46.0 | 39 | 71 | 50 | 117 |
df[df.duplicated()].index
Int64Index([10, 11], dtype='int64')
df.drop(df[df.duplicated()].index)
| MySQL | PostgreSQL | Oracle | MongoDB | SQLite |
|---|
| 0 | 136.0 | 28 | 108 | 18 | 132 |
| 1 | 53.0 | 12 | 137 | 45 | 14 |
| 2 | 61.0 | 59 | 132 | 74 | 103 |
| 3 | NaN | 106 | 4 | 74 | 61 |
| 4 | 50.0 | 95 | 135 | 78 | 140 |
| 5 | 70.0 | 112 | 62 | 62 | 128 |
| 6 | 125.0 | 133 | 138 | 70 | 135 |
| 7 | 46.0 | 39 | 71 | 50 | 117 |
| 8 | 50.0 | 119 | 59 | 89 | 130 |
| 9 | 122.0 | 93 | 29 | 56 | 16 |
数据映射
- rename() 针对索引标签的重命名
- replace() 针对数据,替换数据
- map() 针对数据,根据数据可以映射成新的数据
df
| MySQL | PostgreSQL | Oracle | MongoDB | SQLite |
|---|
| 0 | 136.0 | 28 | 108 | 18 | 132 |
| 1 | 53.0 | 12 | 137 | 45 | 14 |
| 2 | 61.0 | 59 | 132 | 74 | 103 |
| 3 | NaN | 106 | 4 | 74 | 61 |
| 4 | 50.0 | 95 | 135 | 78 | 140 |
| 5 | 70.0 | 112 | 62 | 62 | 128 |
| 6 | 125.0 | 133 | 138 | 70 | 135 |
| 7 | 46.0 | 39 | 71 | 50 | 117 |
| 8 | 50.0 | 119 | 59 | 89 | 130 |
| 9 | 122.0 | 93 | 29 | 56 | 16 |
| 10 | 125.0 | 133 | 138 | 70 | 135 |
| 11 | 46.0 | 39 | 71 | 50 | 117 |
"""
df.rename(
mapper=None, 其实是个字典
index=None, 替换行标签的内容
columns=None, 替换列标签的内容
axis=None, 0——行标签;1——列标签
copy=True,
inplace=False, 是否在原本上替换,False表示不在原本上替换
level=None, # 指定更改的索引层数
errors='ignore',
)
"""
df.rename({'MySQL':'MySQL1m','PostgreSQL':'PostgreSQL11m'},axis=1)
| MySQL1m | PostgreSQL11m | Oracle | MongoDB | SQLite |
|---|
| 0 | 136.0 | 28 | 108 | 18 | 132 |
| 1 | 53.0 | 12 | 137 | 45 | 14 |
| 2 | 61.0 | 59 | 132 | 74 | 103 |
| 3 | NaN | 106 | 4 | 74 | 61 |
| 4 | 50.0 | 95 | 135 | 78 | 140 |
| 5 | 70.0 | 112 | 62 | 62 | 128 |
| 6 | 125.0 | 133 | 138 | 70 | 135 |
| 7 | 46.0 | 39 | 71 | 50 | 117 |
| 8 | 50.0 | 119 | 59 | 89 | 130 |
| 9 | 122.0 | 93 | 29 | 56 | 16 |
| 10 | 125.0 | 133 | 138 | 70 | 135 |
| 11 | 46.0 | 39 | 71 | 50 | 117 |
df
| MySQL | PostgreSQL | Oracle | MongoDB | SQLite |
|---|
| 0 | 136.0 | 28 | 108 | 18 | 132 |
| 1 | 53.0 | 12 | 137 | 45 | 14 |
| 2 | 61.0 | 59 | 132 | 74 | 103 |
| 3 | NaN | 106 | 4 | 74 | 61 |
| 4 | 50.0 | 95 | 135 | 78 | 140 |
| 5 | 70.0 | 112 | 62 | 62 | 128 |
| 6 | 125.0 | 133 | 138 | 70 | 135 |
| 7 | 46.0 | 39 | 71 | 50 | 117 |
| 8 | 50.0 | 119 | 59 | 89 | 130 |
| 9 | 122.0 | 93 | 29 | 56 | 16 |
| 10 | 125.0 | 133 | 138 | 70 | 135 |
| 11 | 46.0 | 39 | 71 | 50 | 117 |
df.rename({1:'A',0:'B'})
| MySQL | PostgreSQL | Oracle | MongoDB | SQLite |
|---|
| B | 136.0 | 28 | 108 | 18 | 132 |
| A | 53.0 | 12 | 137 | 45 | 14 |
| 2 | 61.0 | 59 | 132 | 74 | 103 |
| 3 | NaN | 106 | 4 | 74 | 61 |
| 4 | 50.0 | 95 | 135 | 78 | 140 |
| 5 | 70.0 | 112 | 62 | 62 | 128 |
| 6 | 125.0 | 133 | 138 | 70 | 135 |
| 7 | 46.0 | 39 | 71 | 50 | 117 |
| 8 | 50.0 | 119 | 59 | 89 | 130 |
| 9 | 122.0 | 93 | 29 | 56 | 16 |
| 10 | 125.0 | 133 | 138 | 70 | 135 |
| 11 | 46.0 | 39 | 71 | 50 | 117 |
df
| MySQL | PostgreSQL | Oracle | MongoDB | SQLite |
|---|
| 0 | 136.0 | 28 | 108 | 18 | 132 |
| 1 | 53.0 | 12 | 137 | 45 | 14 |
| 2 | 61.0 | 59 | 132 | 74 | 103 |
| 3 | NaN | 106 | 4 | 74 | 61 |
| 4 | 50.0 | 95 | 135 | 78 | 140 |
| 5 | 70.0 | 112 | 62 | 62 | 128 |
| 6 | 125.0 | 133 | 138 | 70 | 135 |
| 7 | 46.0 | 39 | 71 | 50 | 117 |
| 8 | 50.0 | 119 | 59 | 89 | 130 |
| 9 | 122.0 | 93 | 29 | 56 | 16 |
| 10 | 125.0 | 133 | 138 | 70 | 135 |
| 11 | 46.0 | 39 | 71 | 50 | 117 |
df.rename(columns={'MySQL':'MySQL1m','PostgreSQL':'PostgreSQL11m'})
| MySQL1m | PostgreSQL11m | Oracle | MongoDB | SQLite |
|---|
| 0 | 136.0 | 28 | 108 | 18 | 132 |
| 1 | 53.0 | 12 | 137 | 45 | 14 |
| 2 | 61.0 | 59 | 132 | 74 | 103 |
| 3 | NaN | 106 | 4 | 74 | 61 |
| 4 | 50.0 | 95 | 135 | 78 | 140 |
| 5 | 70.0 | 112 | 62 | 62 | 128 |
| 6 | 125.0 | 133 | 138 | 70 | 135 |
| 7 | 46.0 | 39 | 71 | 50 | 117 |
| 8 | 50.0 | 119 | 59 | 89 | 130 |
| 9 | 122.0 | 93 | 29 | 56 | 16 |
| 10 | 125.0 | 133 | 138 | 70 | 135 |
| 11 | 46.0 | 39 | 71 | 50 | 117 |
"""
df.replace(
to_replace=None, 去替换的 类型是可以是列表,可以是元组,也可以是字典
value=None, 值
inplace=False,
limit=None,
regex=False,
method='pad',
)
"""
df.replace(14,140)
| MySQL | PostgreSQL | Oracle | MongoDB | SQLite |
|---|
| 0 | 136.0 | 28 | 108 | 18 | 132 |
| 1 | 53.0 | 12 | 137 | 45 | 140 |
| 2 | 61.0 | 59 | 132 | 74 | 103 |
| 3 | NaN | 106 | 4 | 74 | 61 |
| 4 | 50.0 | 95 | 135 | 78 | 140 |
| 5 | 70.0 | 112 | 62 | 62 | 128 |
| 6 | 125.0 | 133 | 138 | 70 | 135 |
| 7 | 46.0 | 39 | 71 | 50 | 117 |
| 8 | 50.0 | 119 | 59 | 89 | 130 |
| 9 | 122.0 | 93 | 29 | 56 | 16 |
| 10 | 125.0 | 133 | 138 | 70 | 135 |
| 11 | 46.0 | 39 | 71 | 50 | 117 |
df
| MySQL | PostgreSQL | Oracle | MongoDB | SQLite |
|---|
| 0 | 136.0 | 28 | 108 | 18 | 132 |
| 1 | 53.0 | 12 | 137 | 45 | 14 |
| 2 | 61.0 | 59 | 132 | 74 | 103 |
| 3 | NaN | 106 | 4 | 74 | 61 |
| 4 | 50.0 | 95 | 135 | 78 | 140 |
| 5 | 70.0 | 112 | 62 | 62 | 128 |
| 6 | 125.0 | 133 | 138 | 70 | 135 |
| 7 | 46.0 | 39 | 71 | 50 | 117 |
| 8 | 50.0 | 119 | 59 | 89 | 130 |
| 9 | 122.0 | 93 | 29 | 56 | 16 |
| 10 | 125.0 | 133 | 138 | 70 | 135 |
| 11 | 46.0 | 39 | 71 | 50 | 117 |
df.replace((14,4),(140,180))
| MySQL | PostgreSQL | Oracle | MongoDB | SQLite |
|---|
| 0 | 136.0 | 28 | 108 | 18 | 132 |
| 1 | 53.0 | 12 | 137 | 45 | 140 |
| 2 | 61.0 | 59 | 132 | 74 | 103 |
| 3 | NaN | 106 | 180 | 74 | 61 |
| 4 | 50.0 | 95 | 135 | 78 | 140 |
| 5 | 70.0 | 112 | 62 | 62 | 128 |
| 6 | 125.0 | 133 | 138 | 70 | 135 |
| 7 | 46.0 | 39 | 71 | 50 | 117 |
| 8 | 50.0 | 119 | 59 | 89 | 130 |
| 9 | 122.0 | 93 | 29 | 56 | 16 |
| 10 | 125.0 | 133 | 138 | 70 | 135 |
| 11 | 46.0 | 39 | 71 | 50 | 117 |
df.replace({14:140,4:180})
| MySQL | PostgreSQL | Oracle | MongoDB | SQLite |
|---|
| 0 | 136.0 | 28 | 108 | 18 | 132 |
| 1 | 53.0 | 12 | 137 | 45 | 140 |
| 2 | 61.0 | 59 | 132 | 74 | 103 |
| 3 | NaN | 106 | 180 | 74 | 61 |
| 4 | 50.0 | 95 | 135 | 78 | 140 |
| 5 | 70.0 | 112 | 62 | 62 | 128 |
| 6 | 125.0 | 133 | 138 | 70 | 135 |
| 7 | 46.0 | 39 | 71 | 50 | 117 |
| 8 | 50.0 | 119 | 59 | 89 | 130 |
| 9 | 122.0 | 93 | 29 | 56 | 16 |
| 10 | 125.0 | 133 | 138 | 70 | 135 |
| 11 | 46.0 | 39 | 71 | 50 | 117 |
df
| MySQL | PostgreSQL | Oracle | MongoDB | SQLite |
|---|
| 0 | 136.0 | 28 | 108 | 18 | 132 |
| 1 | 53.0 | 12 | 137 | 45 | 14 |
| 2 | 61.0 | 59 | 132 | 74 | 103 |
| 3 | NaN | 106 | 4 | 74 | 61 |
| 4 | 50.0 | 95 | 135 | 78 | 140 |
| 5 | 70.0 | 112 | 62 | 62 | 128 |
| 6 | 125.0 | 133 | 138 | 70 | 135 |
| 7 | 46.0 | 39 | 71 | 50 | 117 |
| 8 | 50.0 | 119 | 59 | 89 | 130 |
| 9 | 122.0 | 93 | 29 | 56 | 16 |
| 10 | 125.0 | 133 | 138 | 70 | 135 |
| 11 | 46.0 | 39 | 71 | 50 | 117 |
df.replace(np.nan,method='bfill')
| MySQL | PostgreSQL | Oracle | MongoDB | SQLite |
|---|
| 0 | 136.0 | 28 | 108 | 18 | 132 |
| 1 | 53.0 | 12 | 137 | 45 | 14 |
| 2 | 61.0 | 59 | 132 | 74 | 103 |
| 3 | 50.0 | 106 | 4 | 74 | 61 |
| 4 | 50.0 | 95 | 135 | 78 | 140 |
| 5 | 70.0 | 112 | 62 | 62 | 128 |
| 6 | 125.0 | 133 | 138 | 70 | 135 |
| 7 | 46.0 | 39 | 71 | 50 | 117 |
| 8 | 50.0 | 119 | 59 | 89 | 130 |
| 9 | 122.0 | 93 | 29 | 56 | 16 |
| 10 | 125.0 | 133 | 138 | 70 | 135 |
| 11 | 46.0 | 39 | 71 | 50 | 117 |
df.replace(np.nan,method='ffill')
| MySQL | PostgreSQL | Oracle | MongoDB | SQLite |
|---|
| 0 | 136.0 | 28 | 108 | 18 | 132 |
| 1 | 53.0 | 12 | 137 | 45 | 14 |
| 2 | 61.0 | 59 | 132 | 74 | 103 |
| 3 | 61.0 | 106 | 4 | 74 | 61 |
| 4 | 50.0 | 95 | 135 | 78 | 140 |
| 5 | 70.0 | 112 | 62 | 62 | 128 |
| 6 | 125.0 | 133 | 138 | 70 | 135 |
| 7 | 46.0 | 39 | 71 | 50 | 117 |
| 8 | 50.0 | 119 | 59 | 89 | 130 |
| 9 | 122.0 | 93 | 29 | 56 | 16 |
| 10 | 125.0 | 133 | 138 | 70 | 135 |
| 11 | 46.0 | 39 | 71 | 50 | 117 |
df.replace(np.nan,method='pad')
| MySQL | PostgreSQL | Oracle | MongoDB | SQLite |
|---|
| 0 | 136.0 | 28 | 108 | 18 | 132 |
| 1 | 53.0 | 12 | 137 | 45 | 14 |
| 2 | 61.0 | 59 | 132 | 74 | 103 |
| 3 | 61.0 | 106 | 4 | 74 | 61 |
| 4 | 50.0 | 95 | 135 | 78 | 140 |
| 5 | 70.0 | 112 | 62 | 62 | 128 |
| 6 | 125.0 | 133 | 138 | 70 | 135 |
| 7 | 46.0 | 39 | 71 | 50 | 117 |
| 8 | 50.0 | 119 | 59 | 89 | 130 |
| 9 | 122.0 | 93 | 29 | 56 | 16 |
| 10 | 125.0 | 133 | 138 | 70 | 135 |
| 11 | 46.0 | 39 | 71 | 50 | 117 |
df.replace(1,method='ffill')
| MySQL | PostgreSQL | Oracle | MongoDB | SQLite |
|---|
| 0 | 136.0 | 28 | 108 | 18 | 132 |
| 1 | 53.0 | 12 | 137 | 45 | 14 |
| 2 | 61.0 | 59 | 132 | 74 | 103 |
| 3 | NaN | 106 | 4 | 74 | 61 |
| 4 | 50.0 | 95 | 135 | 78 | 140 |
| 5 | 70.0 | 112 | 62 | 62 | 128 |
| 6 | 125.0 | 133 | 138 | 70 | 135 |
| 7 | 46.0 | 39 | 71 | 50 | 117 |
| 8 | 50.0 | 119 | 59 | 89 | 130 |
| 9 | 122.0 | 93 | 29 | 56 | 16 |
| 10 | 125.0 | 133 | 138 | 70 | 135 |
| 11 | 46.0 | 39 | 71 | 50 | 117 |
df.replace(1,method='bfill')
| MySQL | PostgreSQL | Oracle | MongoDB | SQLite |
|---|
| 0 | 136.0 | 28 | 108 | 18 | 132 |
| 1 | 53.0 | 12 | 137 | 45 | 14 |
| 2 | 61.0 | 59 | 132 | 74 | 103 |
| 3 | NaN | 106 | 4 | 74 | 61 |
| 4 | 50.0 | 95 | 135 | 78 | 140 |
| 5 | 70.0 | 112 | 62 | 62 | 128 |
| 6 | 125.0 | 133 | 138 | 70 | 135 |
| 7 | 46.0 | 39 | 71 | 50 | 117 |
| 8 | 50.0 | 119 | 59 | 89 | 130 |
| 9 | 122.0 | 93 | 29 | 56 | 16 |
| 10 | 125.0 | 133 | 138 | 70 | 135 |
| 11 | 46.0 | 39 | 71 | 50 | 117 |
df['DB'] = df['Oracle'].map({108:90,132:100})
df
| MySQL | PostgreSQL | Oracle | MongoDB | SQLite | DB |
|---|
| 0 | 136.0 | 28 | 108 | 18 | 132 | 90.0 |
| 1 | 53.0 | 12 | 137 | 45 | 14 | NaN |
| 2 | 61.0 | 59 | 132 | 74 | 103 | 100.0 |
| 3 | NaN | 106 | 4 | 74 | 61 | NaN |
| 4 | 50.0 | 95 | 135 | 78 | 140 | NaN |
| 5 | 70.0 | 112 | 62 | 62 | 128 | NaN |
| 6 | 125.0 | 133 | 138 | 70 | 135 | NaN |
| 7 | 46.0 | 39 | 71 | 50 | 117 | NaN |
| 8 | 50.0 | 119 | 59 | 89 | 130 | NaN |
| 9 | 122.0 | 93 | 29 | 56 | 16 | NaN |
| 10 | 125.0 | 133 | 138 | 70 | 135 | NaN |
| 11 | 46.0 | 39 | 71 | 50 | 117 | NaN |
df['DB'] =df['Oracle'].map(lambda item: 'A' if item>80 else 'B' if item > 60 else 'C')
df
| MySQL | PostgreSQL | Oracle | MongoDB | SQLite | DB |
|---|
| 0 | 136.0 | 28 | 108 | 18 | 132 | A |
| 1 | 53.0 | 12 | 137 | 45 | 14 | A |
| 2 | 61.0 | 59 | 132 | 74 | 103 | A |
| 3 | NaN | 106 | 4 | 74 | 61 | C |
| 4 | 50.0 | 95 | 135 | 78 | 140 | A |
| 5 | 70.0 | 112 | 62 | 62 | 128 | B |
| 6 | 125.0 | 133 | 138 | 70 | 135 | A |
| 7 | 46.0 | 39 | 71 | 50 | 117 | B |
| 8 | 50.0 | 119 | 59 | 89 | 130 | C |
| 9 | 122.0 | 93 | 29 | 56 | 16 | C |
| 10 | 125.0 | 133 | 138 | 70 | 135 | A |
| 11 | 46.0 | 39 | 71 | 50 | 117 | B |
数据统计信息
df.describe()
| MySQL | PostgreSQL | Oracle | MongoDB | SQLite |
|---|
| count | 11.000000 | 12.000000 | 12.000000 | 12.000000 | 12.000000 |
| mean | 80.363636 | 80.666667 | 90.333333 | 61.333333 | 102.333333 |
| std | 37.749895 | 43.020784 | 47.206189 | 18.936897 | 45.976938 |
| min | 46.000000 | 12.000000 | 4.000000 | 18.000000 | 14.000000 |
| 25% | 50.000000 | 39.000000 | 61.250000 | 50.000000 | 92.500000 |
| 50% | 61.000000 | 94.000000 | 89.500000 | 66.000000 | 122.500000 |
| 75% | 123.500000 | 113.750000 | 135.500000 | 74.000000 | 132.750000 |
| max | 136.000000 | 133.000000 | 138.000000 | 89.000000 | 140.000000 |
df.describe(include=[np.int])
| PostgreSQL | Oracle | MongoDB | SQLite |
|---|
| count | 12.000000 | 12.000000 | 12.000000 | 12.000000 |
| mean | 80.666667 | 90.333333 | 61.333333 | 102.333333 |
| std | 43.020784 | 47.206189 | 18.936897 | 45.976938 |
| min | 12.000000 | 4.000000 | 18.000000 | 14.000000 |
| 25% | 39.000000 | 61.250000 | 50.000000 | 92.500000 |
| 50% | 94.000000 | 89.500000 | 66.000000 | 122.500000 |
| 75% | 113.750000 | 135.500000 | 74.000000 | 132.750000 |
| max | 133.000000 | 138.000000 | 89.000000 | 140.000000 |
df.std()
MySQL 37.749895
PostgreSQL 43.020784
Oracle 47.206189
MongoDB 18.936897
SQLite 45.976938
dtype: float64
df.std(axis=1)
0 57.173420
1 50.790747
2 31.236197
3 42.594014
4 38.187694
5 31.003226
6 28.472794
7 31.627520
8 35.359581
9 44.144082
10 28.472794
11 31.627520
dtype: float64
df.std(axis=1).sort_values()
6 28.472794
10 28.472794
5 31.003226
2 31.236197
7 31.627520
11 31.627520
8 35.359581
4 38.187694
3 42.594014
9 44.144082
1 50.790747
0 57.173420
dtype: float64
df.loc[0]
MySQL 136
PostgreSQL 28
Oracle 108
MongoDB 18
SQLite 132
DB A
Name: 0, dtype: object
df.loc[6]
MySQL 125
PostgreSQL 133
Oracle 138
MongoDB 70
SQLite 135
DB A
Name: 6, dtype: object
take排序索引标签
df
| MySQL | PostgreSQL | Oracle | MongoDB | SQLite | DB |
|---|
| 0 | 136.0 | 28 | 108 | 18 | 132 | A |
| 1 | 53.0 | 12 | 137 | 45 | 14 | A |
| 2 | 61.0 | 59 | 132 | 74 | 103 | A |
| 3 | NaN | 106 | 4 | 74 | 61 | C |
| 4 | 50.0 | 95 | 135 | 78 | 140 | A |
| 5 | 70.0 | 112 | 62 | 62 | 128 | B |
| 6 | 125.0 | 133 | 138 | 70 | 135 | A |
| 7 | 46.0 | 39 | 71 | 50 | 117 | B |
| 8 | 50.0 | 119 | 59 | 89 | 130 | C |
| 9 | 122.0 | 93 | 29 | 56 | 16 | C |
| 10 | 125.0 | 133 | 138 | 70 | 135 | A |
| 11 | 46.0 | 39 | 71 | 50 | 117 | B |
df.take([0,1,5,2,3,4],axis=1)
| MySQL | PostgreSQL | DB | Oracle | MongoDB | SQLite |
|---|
| 0 | 136.0 | 28 | A | 108 | 18 | 132 |
| 1 | 53.0 | 12 | A | 137 | 45 | 14 |
| 2 | 61.0 | 59 | A | 132 | 74 | 103 |
| 3 | NaN | 106 | C | 4 | 74 | 61 |
| 4 | 50.0 | 95 | A | 135 | 78 | 140 |
| 5 | 70.0 | 112 | B | 62 | 62 | 128 |
| 6 | 125.0 | 133 | A | 138 | 70 | 135 |
| 7 | 46.0 | 39 | B | 71 | 50 | 117 |
| 8 | 50.0 | 119 | C | 59 | 89 | 130 |
| 9 | 122.0 | 93 | C | 29 | 56 | 16 |
| 10 | 125.0 | 133 | A | 138 | 70 | 135 |
| 11 | 46.0 | 39 | B | 71 | 50 | 117 |
df.take([0,1,-1,2,3,4],axis=1)
| MySQL | PostgreSQL | DB | Oracle | MongoDB | SQLite |
|---|
| 0 | 136.0 | 28 | A | 108 | 18 | 132 |
| 1 | 53.0 | 12 | A | 137 | 45 | 14 |
| 2 | 61.0 | 59 | A | 132 | 74 | 103 |
| 3 | NaN | 106 | C | 4 | 74 | 61 |
| 4 | 50.0 | 95 | A | 135 | 78 | 140 |
| 5 | 70.0 | 112 | B | 62 | 62 | 128 |
| 6 | 125.0 | 133 | A | 138 | 70 | 135 |
| 7 | 46.0 | 39 | B | 71 | 50 | 117 |
| 8 | 50.0 | 119 | C | 59 | 89 | 130 |
| 9 | 122.0 | 93 | C | 29 | 56 | 16 |
| 10 | 125.0 | 133 | A | 138 | 70 | 135 |
| 11 | 46.0 | 39 | B | 71 | 50 | 117 |
df2 = df.take([0,1,-1,2,3,4],axis=1)
df2
| MySQL | PostgreSQL | DB | Oracle | MongoDB | SQLite |
|---|
| 0 | 136.0 | 28 | A | 108 | 18 | 132 |
| 1 | 53.0 | 12 | A | 137 | 45 | 14 |
| 2 | 61.0 | 59 | A | 132 | 74 | 103 |
| 3 | NaN | 106 | C | 4 | 74 | 61 |
| 4 | 50.0 | 95 | A | 135 | 78 | 140 |
| 5 | 70.0 | 112 | B | 62 | 62 | 128 |
| 6 | 125.0 | 133 | A | 138 | 70 | 135 |
| 7 | 46.0 | 39 | B | 71 | 50 | 117 |
| 8 | 50.0 | 119 | C | 59 | 89 | 130 |
| 9 | 122.0 | 93 | C | 29 | 56 | 16 |
| 10 | 125.0 | 133 | A | 138 | 70 | 135 |
| 11 | 46.0 | 39 | B | 71 | 50 | 117 |
np.random.permutation(df.index)
array([ 5, 2, 7, 8, 1, 3, 6, 0, 4, 11, 10, 9], dtype=int64)
df.take(np.random.permutation(df.index))
| MySQL | PostgreSQL | Oracle | MongoDB | SQLite | DB |
|---|
| 11 | 46.0 | 39 | 71 | 50 | 117 | B |
| 4 | 50.0 | 95 | 135 | 78 | 140 | A |
| 9 | 122.0 | 93 | 29 | 56 | 16 | C |
| 1 | 53.0 | 12 | 137 | 45 | 14 | A |
| 6 | 125.0 | 133 | 138 | 70 | 135 | A |
| 7 | 46.0 | 39 | 71 | 50 | 117 | B |
| 10 | 125.0 | 133 | 138 | 70 | 135 | A |
| 5 | 70.0 | 112 | 62 | 62 | 128 | B |
| 2 | 61.0 | 59 | 132 | 74 | 103 | A |
| 8 | 50.0 | 119 | 59 | 89 | 130 | C |
| 3 | NaN | 106 | 4 | 74 | 61 | C |
| 0 | 136.0 | 28 | 108 | 18 | 132 | A |
np.random.permutation(df.columns)
array(['Oracle', 'MongoDB', 'SQLite', 'DB', 'PostgreSQL', 'MySQL'],
dtype=object)
column_map = dict.fromkeys(df.columns,0)
column_map
{'MySQL': 0, 'PostgreSQL': 0, 'Oracle': 0, 'MongoDB': 0, 'SQLite': 0, 'DB': 0}
column_map = dict(enumerate(df.columns))
column_map
{0: 'MySQL', 1: 'PostgreSQL', 2: 'Oracle', 3: 'MongoDB', 4: 'SQLite', 5: 'DB'}
column_map = { v:k for k,v in dict(enumerate(df.columns)).items()}
column_map
{'MySQL': 0, 'PostgreSQL': 1, 'Oracle': 2, 'MongoDB': 3, 'SQLite': 4, 'DB': 5}
random_columns = np.random.permutation(df.columns)
column_index = [column_map[k] for k in random_columns]
df.take(column_index,axis=1)
| Oracle | PostgreSQL | MongoDB | SQLite | MySQL | DB |
|---|
| 0 | 108 | 28 | 18 | 132 | 136.0 | A |
| 1 | 137 | 12 | 45 | 14 | 53.0 | A |
| 2 | 132 | 59 | 74 | 103 | 61.0 | A |
| 3 | 4 | 106 | 74 | 61 | NaN | C |
| 4 | 135 | 95 | 78 | 140 | 50.0 | A |
| 5 | 62 | 112 | 62 | 128 | 70.0 | B |
| 6 | 138 | 133 | 70 | 135 | 125.0 | A |
| 7 | 71 | 39 | 50 | 117 | 46.0 | B |
| 8 | 59 | 119 | 89 | 130 | 50.0 | C |
| 9 | 29 | 93 | 56 | 16 | 122.0 | C |
| 10 | 138 | 133 | 70 | 135 | 125.0 | A |
| 11 | 71 | 39 | 50 | 117 | 46.0 | B |
df.take(column_index,axis=1)
| Oracle | PostgreSQL | MongoDB | SQLite | MySQL | DB |
|---|
| 0 | 108 | 28 | 18 | 132 | 136.0 | A |
| 1 | 137 | 12 | 45 | 14 | 53.0 | A |
| 2 | 132 | 59 | 74 | 103 | 61.0 | A |
| 3 | 4 | 106 | 74 | 61 | NaN | C |
| 4 | 135 | 95 | 78 | 140 | 50.0 | A |
| 5 | 62 | 112 | 62 | 128 | 70.0 | B |
| 6 | 138 | 133 | 70 | 135 | 125.0 | A |
| 7 | 71 | 39 | 50 | 117 | 46.0 | B |
| 8 | 59 | 119 | 89 | 130 | 50.0 | C |
| 9 | 29 | 93 | 56 | 16 | 122.0 | C |
| 10 | 138 | 133 | 70 | 135 | 125.0 | A |
| 11 | 71 | 39 | 50 | 117 | 46.0 | B |
数据分类处理
- groupby([‘列名’,…])返回DataFrameGroupBy
- 分组之后,可以针对某一数值列进行聚合操作(sum,mean,max,min,std等)
- 可以自定义聚合函数,使用transform或apply
df
| MySQL | PostgreSQL | Oracle | MongoDB | SQLite | DB |
|---|
| 0 | 136.0 | 28 | 108 | 18 | 132 | A |
| 1 | 53.0 | 12 | 137 | 45 | 14 | A |
| 2 | 61.0 | 59 | 132 | 74 | 103 | A |
| 3 | NaN | 106 | 4 | 74 | 61 | C |
| 4 | 50.0 | 95 | 135 | 78 | 140 | A |
| 5 | 70.0 | 112 | 62 | 62 | 128 | B |
| 6 | 125.0 | 133 | 138 | 70 | 135 | A |
| 7 | 46.0 | 39 | 71 | 50 | 117 | B |
| 8 | 50.0 | 119 | 59 | 89 | 130 | C |
| 9 | 122.0 | 93 | 29 | 56 | 16 | C |
| 10 | 125.0 | 133 | 138 | 70 | 135 | A |
| 11 | 46.0 | 39 | 71 | 50 | 117 | B |
df.groupby('DB')
<pandas.core.groupby.generic.DataFrameGroupBy object at 0x08A144D0>
df.groupby('DB')['SQLite'].sum()
DB
A 659
B 362
C 207
Name: SQLite, dtype: int32
df.groupby('DB')['SQLite'].mean()
DB
A 109.833333
B 120.666667
C 69.000000
Name: SQLite, dtype: float64
df.groupby('DB')['SQLite'].count()
DB
A 6
B 3
C 3
Name: SQLite, dtype: int64
df.groupby('DB')['SQLite','PostgreSQL'].mean()
| SQLite | PostgreSQL |
|---|
| DB | | |
|---|
| A | 109.833333 | 76.666667 |
| B | 120.666667 | 63.333333 |
| C | 69.000000 | 106.000000 |
df.groupby('DB').mean()
| MySQL | PostgreSQL | Oracle | MongoDB | SQLite |
|---|
| DB | | | | | |
|---|
| A | 91.666667 | 76.666667 | 131.333333 | 59.166667 | 109.833333 |
| B | 54.000000 | 63.333333 | 68.000000 | 54.000000 | 120.666667 |
| C | 86.000000 | 106.000000 | 30.666667 | 73.000000 | 69.000000 |
df.groupby('DB').transform(sum)
| MySQL | PostgreSQL | Oracle | MongoDB | SQLite |
|---|
| 0 | 550.0 | 460 | 788 | 355 | 659 |
| 1 | 550.0 | 460 | 788 | 355 | 659 |
| 2 | 550.0 | 460 | 788 | 355 | 659 |
| 3 | 172.0 | 318 | 92 | 219 | 207 |
| 4 | 550.0 | 460 | 788 | 355 | 659 |
| 5 | 162.0 | 190 | 204 | 162 | 362 |
| 6 | 550.0 | 460 | 788 | 355 | 659 |
| 7 | 162.0 | 190 | 204 | 162 | 362 |
| 8 | 172.0 | 318 | 92 | 219 | 207 |
| 9 | 172.0 | 318 | 92 | 219 | 207 |
| 10 | 550.0 | 460 | 788 | 355 | 659 |
| 11 | 162.0 | 190 | 204 | 162 | 362 |
df.groupby('DB').apply(sum)
| MySQL | PostgreSQL | Oracle | MongoDB | SQLite | DB |
|---|
| DB | | | | | | |
|---|
| A | 550.0 | 460 | 788 | 355 | 659 | AAAAAA |
| B | 162.0 | 190 | 204 | 162 | 362 | BBB |
| C | 172.0 | 318 | 92 | 219 | 207 | CCC |
def sum_data(item):
print(type(item))
return item.sum()
df.groupby('DB').apply(sum_data)
<class 'pandas.core.frame.DataFrame'>
<class 'pandas.core.frame.DataFrame'>
<class 'pandas.core.frame.DataFrame'>
| MySQL | PostgreSQL | Oracle | MongoDB | SQLite | DB |
|---|
| DB | | | | | | |
|---|
| A | 550.0 | 460 | 788 | 355 | 659 | AAAAAA |
| B | 162.0 | 190 | 204 | 162 | 362 | BBB |
| C | 172.0 | 318 | 92 | 219 | 207 | CCC |
def sum_data(item):
display(item)
return item.sum()
df.groupby('DB').apply(sum_data)
| MySQL | PostgreSQL | Oracle | MongoDB | SQLite | DB |
|---|
| 0 | 136.0 | 28 | 108 | 18 | 132 | A |
| 1 | 53.0 | 12 | 137 | 45 | 14 | A |
| 2 | 61.0 | 59 | 132 | 74 | 103 | A |
| 4 | 50.0 | 95 | 135 | 78 | 140 | A |
| 6 | 125.0 | 133 | 138 | 70 | 135 | A |
| 10 | 125.0 | 133 | 138 | 70 | 135 | A |
| MySQL | PostgreSQL | Oracle | MongoDB | SQLite | DB |
|---|
| 5 | 70.0 | 112 | 62 | 62 | 128 | B |
| 7 | 46.0 | 39 | 71 | 50 | 117 | B |
| 11 | 46.0 | 39 | 71 | 50 | 117 | B |
| MySQL | PostgreSQL | Oracle | MongoDB | SQLite | DB |
|---|
| 3 | NaN | 106 | 4 | 74 | 61 | C |
| 8 | 50.0 | 119 | 59 | 89 | 130 | C |
| 9 | 122.0 | 93 | 29 | 56 | 16 | C |
| MySQL | PostgreSQL | Oracle | MongoDB | SQLite | DB |
|---|
| DB | | | | | | |
|---|
| A | 550.0 | 460 | 788 | 355 | 659 | AAAAAA |
| B | 162.0 | 190 | 204 | 162 | 362 | BBB |
| C | 172.0 | 318 | 92 | 219 | 207 | CCC |
df.sum()
MySQL 884
PostgreSQL 968
Oracle 1084
MongoDB 736
SQLite 1228
DB AAACABABCCAB
dtype: object
df.iloc[:,:-1]
| MySQL | PostgreSQL | Oracle | MongoDB | SQLite |
|---|
| 0 | 136.0 | 28 | 108 | 18 | 132 |
| 1 | 53.0 | 12 | 137 | 45 | 14 |
| 2 | 61.0 | 59 | 132 | 74 | 103 |
| 3 | NaN | 106 | 4 | 74 | 61 |
| 4 | 50.0 | 95 | 135 | 78 | 140 |
| 5 | 70.0 | 112 | 62 | 62 | 128 |
| 6 | 125.0 | 133 | 138 | 70 | 135 |
| 7 | 46.0 | 39 | 71 | 50 | 117 |
| 8 | 50.0 | 119 | 59 | 89 | 130 |
| 9 | 122.0 | 93 | 29 | 56 | 16 |
| 10 | 125.0 | 133 | 138 | 70 | 135 |
| 11 | 46.0 | 39 | 71 | 50 | 117 |
df.iloc[:,:-1].sum()
MySQL 884.0
PostgreSQL 968.0
Oracle 1084.0
MongoDB 736.0
SQLite 1228.0
dtype: float64
df.iloc[:,:-1].values
array([[136., 28., 108., 18., 132.],
[ 53., 12., 137., 45., 14.],
[ 61., 59., 132., 74., 103.],
[ nan, 106., 4., 74., 61.],
[ 50., 95., 135., 78., 140.],
[ 70., 112., 62., 62., 128.],
[125., 133., 138., 70., 135.],
[ 46., 39., 71., 50., 117.],
[ 50., 119., 59., 89., 130.],
[122., 93., 29., 56., 16.],
[125., 133., 138., 70., 135.],
[ 46., 39., 71., 50., 117.]])
df.iloc[:,:-1].values.sum()
nan
df.iloc[:,:-1].fillna(0).values.sum()
4900.0
def sum_data(item):
return item.iloc[:,:-1].fillna(0).values.sum()
df.groupby('DB').apply(sum_data)
DB
A 2812.0
B 1080.0
C 1008.0
dtype: float64
def sum_data(item):
return Series({'总成绩':item.iloc[:,:-1].fillna(0).values.sum()})
df.groupby('DB').apply(sum_data)
| 总成绩 |
|---|
| DB | |
|---|
| A | 2812.0 |
| B | 1080.0 |
| C | 1008.0 |
df
| MySQL | PostgreSQL | Oracle | MongoDB | SQLite | DB |
|---|
| 0 | 136.0 | 28 | 108 | 18 | 132 | A |
| 1 | 53.0 | 12 | 137 | 45 | 14 | A |
| 2 | 61.0 | 59 | 132 | 74 | 103 | A |
| 3 | NaN | 106 | 4 | 74 | 61 | C |
| 4 | 50.0 | 95 | 135 | 78 | 140 | A |
| 5 | 70.0 | 112 | 62 | 62 | 128 | B |
| 6 | 125.0 | 133 | 138 | 70 | 135 | A |
| 7 | 46.0 | 39 | 71 | 50 | 117 | B |
| 8 | 50.0 | 119 | 59 | 89 | 130 | C |
| 9 | 122.0 | 93 | 29 | 56 | 16 | C |
| 10 | 125.0 | 133 | 138 | 70 | 135 | A |
| 11 | 46.0 | 39 | 71 | 50 | 117 | B |
def sum_data(item):
MySQL_total = item['MySQL'].sum()
PostgreSQL_total = item['PostgreSQL'].sum()
DB_Total = item[['Oracle','MongoDB','SQLite']].fillna(0).values.sum()
return Series({'DBS':DB_Total,'MySQL':MySQL_total,'PostgreSQL':PostgreSQL_total})
df.groupby('DB').apply(sum_data)
| DBS | MySQL | PostgreSQL |
|---|
| DB | | | |
|---|
| A | 1802.0 | 550.0 | 460.0 |
| B | 728.0 | 162.0 | 190.0 |
| C | 518.0 | 172.0 | 318.0 |