发布时间:2019-09-10 08:51:09编辑:auto阅读(1997)
# -*- coding: utf-8 -*- import pandas as pd import numpy as np df = pd.DataFrame({'key1':list('aabba'), 'key2': ['one','two','one','two','one'], 'data1': ['1','3','5','7','9'], 'data2': ['2','4','6','8','10']}) print df grouped = df.groupby(['key1']).size() #按key1的值分组,并统计个数 print grouped print '++++++++++++++' grouped1 = df['data1'].astype(float).groupby(df['key1']).mean() #先将data1转换成浮点型,然后分组求均值 print grouped1 print type(grouped1) #series类型 print '++++++++++++++++++' df['add'] = ['AA','BB',"CC",'DD','EE'] #dataframe追加一列 print df grouped2=df.groupby(['key1','key2']).size() #按两列属性分组 #注意若groupby前面用df的形式则后面参数直接用['key1']的形式 print grouped2 print type(grouped2) print '++++++++++++++++++' grouped3=df['data1'].astype(float).groupby([df['key1'],df['add']]).mean() #按key1与key2分组,求data1这一列均值 #注意若groupby前面用df['data1']的形式则后面参数必须用df['key1']的形式 print grouped3 print type(grouped3) #series
运行结果如下:
data1 data2 key1 key2 0 1 2 a one 1 3 4 a two 2 5 6 b one 3 7 8 b two 4 9 10 a one key1 a 3 b 2 dtype: int64 ++++++++++++++ key1 a 4.333333 b 6.000000 Name: data1, dtype: float64 <class 'pandas.core.series.Series'> ++++++++++++++++++ data1 data2 key1 key2 add 0 1 2 a one AA 1 3 4 a two BB 2 5 6 b one CC 3 7 8 b two DD 4 9 10 a one EE key1 key2 a one 2 two 1 b one 1 two 1 dtype: int64 <class 'pandas.core.series.Series'> ++++++++++++++++++ key1 add a AA 1.0 BB 3.0 EE 9.0 b CC 5.0 DD 7.0 Name: data1, dtype: float64 <class 'pandas.core.series.Series'>
上一篇: python 网络编程学习 非阻塞soc
下一篇: python 字典操作提取key,val
47851
46411
37294
34741
29323
25979
24927
19959
19551
18040
5800°
6422°
5937°
5967°
7074°
5921°
5952°
6447°
6409°
7789°