import pandas as pd
path = r'F:\数据分析专用\数据分析与机器学习\food_info.csv'
with open(path, 'r') as f:
data = pd.read_csv(f)
print(type(data))
print(data.dtypes)
print(help(pd.read_csv))

文件操作

print(data.head(3))
print(data.tail(3))
print(data.columns)

查看数据

data.sort_values('Carbohydrt_(g)', inplace=True)
print(data['Carbohydrt_(g)'])
# print(data)
data.sort_values('Carbohydrt_(g)', inplace=True, ascending=False)
print(data['Carbohydrt_(g)'])

数据排序

age = t_s['Age']
age_is_null = pd.isnull(age)
age_null_true = age[age_is_null]
age_null_count = len(age_null_true)
print(age_null_count)
#----------------------------------------
count_list = []
for i in age:
if i != i:
count_list.append(i)
print(len(count_list))

数据的筛选

mean_age = sum(t_s['Age'])/len(t_s['Age'])
print(mean_age)
#--------------------------------------------------
good_ages = t_s['Age'][age_is_null==False]
correct_mean_age = sum(good_ages)/len(good_ages)
print(correct_mean_age)
#---------------------------------------------
correct_mean_age = t_s['Age'].mean()
print(correct_mean_age)

数据的处理方法(平均数)

passenger_classes = {1, 2, 3}
faces_by_class = {}
for this_class in passenger_classes:
pclass_rows = t_s[t_s['Pclass']==this_class]
pclass_fares = pclass_rows['Fare']
fare_for_class = pclass_fares.mean()
faces_by_class[this_class] = fare_for_class
print(faces_by_class)
#--------------------------------------------------------
passenger_s = t_s.pivot_table(index='Pclass', values='Survived', aggfunc=np.mean)
print(passenger_s)
#--------------------------------------------------------
passenger_age = t_s.pivot_table(index='Pclass', values='Age')
print(passenger_age)
#--------------------------------------------------------
passenger_price = t_s.pivot_table(index='Pclass', values='Fare')
print(passenger_price)
#--------------------------------------------------------
port_stats = t_s.pivot_table(index='Embarked', values=['Fare', 'Survived'], aggfunc=np.sum)
print(port_stats)

数据透视表


def hundredth_row(column):
hundredth_item = column.loc[99]
return hundredth_item hundredth_row = t_r.apply(hundredth_row)
print(hundredth_row)

自定义函数

def which_class(row):
pclass = row['Pclass']
if pd.isnull(pclass):
return 'Unknown'
elif pclass == 1:
return "First Class"
elif pclass == 2:
return "Second Class"
elif pclass == 3:
return "Third Class" classes = t_r.apply(which_class, axis=1)
print(classes)

自定义函数


import pandas as pd
path = r'F:\数据分析专用\数据分析与机器学习\fandango_score_comparison.csv'
with open(path, 'r', encoding='utf-8') as f:
data = pd.read_csv(f)
# print(data.dtypes)
series_film = data['FILM']
print(type(series_film))
series_rt = data['RottenTomatoes']
print(series_rt[0:5])
#--------------------------------------------------------------
from pandas import Series
film_names = series_film.values
print(type(film_names))
rt_scores = series_rt.values
series_custom = Series(rt_scores, index=film_names)
series_custom[['Minions (2015)', 'Leviathan (2014)']]

Series结构

最新文章

  1. github标记
  2. Ubuntu 16.04 + Caffe
  3. 获取request的变量
  4. docker jenkins
  5. linux:centos准备及安装
  6. 论文笔记之:Playing for Data: Ground Truth from Computer Games
  7. devexpress中gridcontrol 一些样式改变
  8. SQL Server 2008无日志文件附加数据库
  9. MediaPlayer本地播放流程解析(一)
  10. Echarts环形进度使用 【1 简单的使用示例】
  11. python网络爬虫之scrapy 工程创建以及原理介绍
  12. Java-IO之CharArrayReader
  13. 【Android】用Cubism 2制作自己的Live2D——官方App样例源码学习(1)!
  14. 2017 ACM Jordanian Collegiate Programming Contest
  15. [LeetCode] Most Common Word 最常见的单词
  16. Silverlight 样式的灵活使用
  17. 使用PageHelper插件分页结合mybatis返回的列表个数不对问题解决
  18. 织梦Dedecms文件目录结构
  19. POJ2115 C Looooops 扩展欧几里德
  20. git 配置提交过滤文件

热门文章

  1. 3.在eclipse中创建Web项目,并部署到Tomcat上
  2. 【hihocoder 1296】数论三·约瑟夫问题
  3. 使用idea搭建maven项目时 java目录下的xml文件没有加载的解决方法
  4. node环境变量配置,npm环境变量配置
  5. Windows系统SNMP数据监测与OID
  6. 《coredump问题原理探究》Linux x86版7.7节 set对象
  7. HDU 5402 Travelling Salesman Problem (模拟 有规律)(左上角到右下角路径权值最大,输出路径)
  8. uva live 6827 Galaxy collision
  9. 写一个类似淘宝的ios app需要用到哪些技术?
  10. 深度学习实战篇-基于RNN的中文分词探索