import pandas as pd
import numpy as np
s = pd.Series([1, 3, 6, np.nan, 44, 1]) df= pd.DataFrame(np.random.random((4,5))) # data frame 常用属性
df.dtypes
df.index
df.columns
df.values # data frame 常用方法
df.describe()
df.T
df.sort_index(axis = 1, ascending = False)
df.sort_values(by = 4) # 选择数据
dates = pd.date_range('', periods = 6)
df = pd.DataFrame(np.arange(24).reshape((6,4)), index = dates,
columns = ['A', 'B', 'C', 'D']) '''row or column''' # 行不可隔着选择
print(df[0:3])
print(df[['A', 'D']]) '''select by label:loc''' # 行不可隔着选择
print(df.loc['', :])
print(df.loc[:,['A', 'B']]) '''select by position:iloc'''
print(df.iloc[[0, 2], [0, 3]]) '''mixed selection:ix'''
print(df.ix[[0, 2], ['A', 'D']]) '''Boolean indexing'''
print(df[df.B > 5]) # 设置数据
df.iloc[2, 2] = 111
df.loc['', 'D'] = 222
df.B[df.A > 5] = 0
print(df) df['F'] = np.nan
df['E'] = range(6)
print(df) # 处理缺失数据
df.iloc[0, 1] = np.nan
df.iloc[1, 2] = np.nan
print(df)
print(df.dropna(axis = 0, how = 'all')) # how = {'any', 'all'}
print(df.fillna(value = 0))
print(np.any(df.isnull())) # data frame 合并
'''concatenating'''
df1 = pd.DataFrame(np.ones((3,4))*0, columns = ['a', 'b', 'c', 'd'])
df2 = pd.DataFrame(np.ones((3,4))*1, columns = ['a', 'b', 'c', 'd'])
df3 = pd.DataFrame(np.ones((3,4))*2, columns = ['a', 'b', 'c', 'd']) res = pd.concat([df1, df2, df3], axis = 0, ignore_index = True)
res1 = pd.concat([df1, df2, df3], axis = 1) '''join参数'''
df1 = pd.DataFrame(np.ones((3,4))*0, columns = ['a', 'b', 'c', 'd'], index = [1, 2, 3])
df2 = pd.DataFrame(np.ones((3,4))*1, columns = ['b', 'c', 'd', 'e'], index = [2, 3, 4]) res = pd.concat([df1, df2], join = 'outer', ignore_index = True)
res = pd.concat([df1, df2], join = 'inner', ignore_index = True)
print(res) '''join_axes'''
res = pd.concat([df1, df2], axis = 1, join = 'inner')
res = pd.concat([df1, df2], axis = 1, join_axes = [df1.index]) # append
df1 = pd.DataFrame(np.ones((3,4))*0, columns = ['a', 'b', 'c', 'd'], index = [1, 2, 3])
df2 = pd.DataFrame(np.ones((3,4))*1, columns = ['b', 'c', 'd', 'e'], index = [2, 3, 4])
df3 = pd.DataFrame(np.ones((3,4))*1, columns = ['b', 'c', 'd', 'e'], index = [2, 3, 4]) res = df1.append([df2, df3], ignore_index = True)
res1 = pd.concat([df1, df2, df3])
print(res)
print(res1) # data frame merge
'''merge one key'''
left = pd.DataFrame({'key':['K1','K2','K3'],
'A':[1,2,3],
'B':[4,5,6]}) right = pd.DataFrame({'key':['K0','K1','K3'],
'A':[11,43,53],
'D':[12,-1,0]})
res = pd.merge(left, right, on = 'key', how = 'outer')
print(res) '''merge two or more keys'''
left = pd.DataFrame({'key0':['K1','K2','K3'],
'key1':['X0','X2','X3'],
'A':[1,2,3],
'B':[4,5,6]}) right = pd.DataFrame({'key0':['K0','K1','K3'],
'key1':['X1','X0','K3'],
'A':[11,43,53],
'D':[12,-1,0]})
res = pd.merge(left, right, on = ['key0', 'key1'], how = 'outer')
print(res) '''merge index'''
left = pd.DataFrame({'A':[1,2,3],
'B':[4,5,6]},
index = ['K0', 'K1', 'K2']) right = pd.DataFrame({'A':[11,43,53],
'D':[12,-1,0]},
index = ['K1', 'K2', 'K3'])
res = pd.merge(left, right, left_index = True,
right_index = True)
print(res) '''handle overlapping columns'''
left = pd.DataFrame({'key':['K1','K2','K3'],
'A':[1,2,3],
'B':[4,5,6]}) right = pd.DataFrame({'key':['K0','K1','K3'],
'A':[11,43,53],
'B':[12,-1,0]})
res = pd.merge(left, right, on = 'key',
suffixes = ['_left', '_right'] , how = 'outer')
print(res) # 作图
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt '''plot data'''
'''Series'''
data = pd.Series(np.random.randn(1000), index = np.arange(1000))
data = data.cumsum()
data.plot()
print(data) '''Data Frame'''
data = pd.DataFrame(np.random.randn(1000, 4),
index = np.arange(1000),
columns = list("ABCD"))
print(data.head())
data = data.cumsum()
data.plot()
ax = data.plot.scatter(x = 'A', y = 'C',
color = 'Red',
label = 'Class 2')
data.plot.scatter(x = 'A', y = 'B',
color = 'DarkGreen',
label = 'Class 2',
ax = ax)

最新文章

  1. java-集合4
  2. win7远程桌面恢复全屏状态快捷键
  3. Java NIO服务器端开发
  4. 转ATL对象类型
  5. Parallel for-each loops in .NET C# z
  6. [NOIP2013]转圈游戏
  7. 几本关于PHP安全的书
  8. bzoj 3225: [Sdoi2008] 立方体覆盖 题解
  9. TensorFlow深度学习笔记 文本与序列的深度模型
  10. form2js的使用(续BootstrapTable)
  11. 织梦调用seotitle
  12. Vue.js响应式原理
  13. CentOS 7.4编译安装Nginx1.10.3+MySQL5.7.16
  14. tarroni music
  15. centos7----pstree
  16. HTML中的元素分类
  17. 用UICollectionView实现无限轮播图
  18. Netty核心概念(5)之Channel
  19. PBOC中文件结构,文件类型解析
  20. ubuntu安装elasticSearch及插件

热门文章

  1. 学习鸟哥的Linux私房菜笔记(10)——bash2
  2. 【codeforces 782D】 Innokenty and a Football League
  3. Excel、记事本数据导入到数据库
  4. less - 循环 loop
  5. 【从翻译mos文章】采用高速全扫描索引(index ffs) 为了避免全表扫描
  6. C# 7.0 使用下划线忽略使用的变量
  7. IE8支持function.bind()方法
  8. 创Python规划2
  9. 3-2 从降级的例子 认识Polly套路
  10. MySQL第五个学习笔记 该数据表的操作