python中groupby()函数讲解

发布时间:2019-09-10 08:51:09编辑:auto阅读(1997)

    # -*- coding: utf-8 -*-
    import pandas as pd
    import  numpy as np
    
    df = pd.DataFrame({'key1':list('aabba'),
                      'key2': ['one','two','one','two','one'],
                      'data1': ['1','3','5','7','9'],
                      'data2': ['2','4','6','8','10']})
    print df
    
    grouped = df.groupby(['key1']).size()      #按key1的值分组,并统计个数
    print grouped
    print '++++++++++++++'
    
    grouped1 = df['data1'].astype(float).groupby(df['key1']).mean()     #先将data1转换成浮点型,然后分组求均值
    print grouped1
    print type(grouped1)       #series类型
    print '++++++++++++++++++'
    
    df['add'] = ['AA','BB',"CC",'DD','EE']       #dataframe追加一列
    print df
    grouped2=df.groupby(['key1','key2']).size()      #按两列属性分组
    #注意若groupby前面用df的形式则后面参数直接用['key1']的形式
    print grouped2
    print type(grouped2)
    print '++++++++++++++++++'
    
    grouped3=df['data1'].astype(float).groupby([df['key1'],df['add']]).mean()  #按key1与key2分组,求data1这一列均值
    #注意若groupby前面用df['data1']的形式则后面参数必须用df['key1']的形式
    print grouped3
    print type(grouped3)          #series
    
    
    
    
    运行结果如下:
      data1 data2 key1 key2
    0     1     2    a  one
    1     3     4    a  two
    2     5     6    b  one
    3     7     8    b  two
    4     9    10    a  one
    key1
    a    3
    b    2
    dtype: int64
    ++++++++++++++
    key1
    a    4.333333
    b    6.000000
    Name: data1, dtype: float64
    <class 'pandas.core.series.Series'>
    ++++++++++++++++++
      data1 data2 key1 key2 add
    0     1     2    a  one  AA
    1     3     4    a  two  BB
    2     5     6    b  one  CC
    3     7     8    b  two  DD
    4     9    10    a  one  EE
    key1  key2
    a     one     2
          two     1
    b     one     1
          two     1
    dtype: int64
    <class 'pandas.core.series.Series'>
    ++++++++++++++++++
    key1  add
    a     AA     1.0
          BB     3.0
          EE     9.0
    b     CC     5.0
          DD     7.0
    Name: data1, dtype: float64
    <class 'pandas.core.series.Series'>
    

关键字