tynbl.github.io

Pandas统计计算和描述

import numpy as np
import pandas as pd
df_obj = pd.DataFrame(np.random.randn(5,4), columns = ['a', 'b', 'c', 'd'])
df_obj
a b c d
0 -0.281460 -0.458650 -1.102619 0.226367
1 -0.045202 -0.801324 -2.910940 -1.249168
2 1.260569 -0.262852 1.348291 -0.103332
3 -0.688225 -0.658354 -1.498003 -1.288892
4 -0.415888 2.523040 0.107214 -0.849228
df_obj.sum()
a   -0.170205
b    0.341860
c   -4.056056
d   -3.264252
dtype: float64
df_obj.max()
a    1.260569
b    2.523040
c    1.348291
d    0.226367
dtype: float64
df_obj.min(axis=1)
0   -1.102619
1   -2.910940
2   -0.262852
3   -1.498003
4   -0.849228
dtype: float64
df_obj.describe()
a b c d
count 5.000000 5.000000 5.000000 5.000000
mean -0.034041 0.068372 -0.811211 -0.652850
std 0.760118 1.387206 1.618056 0.684416
min -0.688225 -0.801324 -2.910940 -1.288892
25% -0.415888 -0.658354 -1.498003 -1.249168
50% -0.281460 -0.458650 -1.102619 -0.849228
75% -0.045202 -0.262852 0.107214 -0.103332
max 1.260569 2.523040 1.348291 0.226367