CODE:

#!/usr/bin/python
# -*- coding: utf-8 -*- '''
Created on 2014-8-19
@author: guaguastd
@name: job_title_standard.py
''' import os
import csv
from collections import Counter
from operator import itemgetter
from prettytable import PrettyTable # specify csv directory
CSV_FILE = os.path.join(r"E:", "\\", "eclipse", "LinkedIn", "dfile", "my_connections.csv") # define a set of transforms that converts the first item
# to the second item
transforms = [
('Sr.', 'Senior'),
('Sr', 'Senior'),
('Jr.', 'Junior'),
('Jr', 'Junior'),
('CEO', 'Chief Executive Officer'),
('COO', 'Chief Operating Officer'),
('CTO', 'Chief Technology Officer'),
('CFO', 'Chief Finance Officer'),
('VP', 'Vice President'),
] csvReader = csv.DictReader(open(CSV_FILE), delimiter=',', quotechar='"')
contacts = [row for row in csvReader] # Read in a list of titles and split
# apart any combined titles like "President/CEO."
# "President & CEO", "President and CEO"
titles = []
for contact in contacts:
titles.extend([t.strip() for t in contact['Job Title'].split('/')
if contact['Job Title'].strip() != '']) # Replace common/known abbreviations
for i, _ in enumerate(titles):
for transform in transforms:
titles[i] = titles[i].replace(*transform) # Print out a table of titles sorted by frequency
pt = PrettyTable(field_names=['Title', 'Freq'])
pt.align = 'l'
c = Counter(titles)
[pt.add_row([title, freq])
for (title, freq) in sorted(c.items(), key=itemgetter(1), reverse=True)
if freq > 0]
print pt # Print out a table of tokens sorted by frequency
tokens = []
for title in titles:
tokens.extend([t.strip(',') for t in title.split()])
pt = PrettyTable(field_names=['Token', 'Freq'])
pt.align = 'l'
c = Counter(tokens)
[pt.add_row([token, freq])
for (token, freq) in sorted(c.items(), key=itemgetter(1), reverse=True)
if freq > 0 and len(token) > 2]
print pt

RESULT:

+-----------------------------------+------+
| Title | Freq |
+-----------------------------------+------+
| Senior Software Developer | 1 |
| Sales Manager | 1 |
| Software Manager | 1 |
| Online Marketing Manager | 1 |
| Senior Consultant | 1 |
| Chief Executive Officer & Founder | 1 |
| Director | 1 |
| S | 1 |
| Student | 1 |
| Senior Software Engineer | 1 |
| ??? | 1 |
+-----------------------------------+------+
+------------+------+
| Token | Freq |
+------------+------+
| Manager | 3 |
| Senior | 3 |
| Software | 3 |
| Marketing | 1 |
| Founder | 1 |
| Consultant | 1 |
| Executive | 1 |
| Sales | 1 |
| Developer | 1 |
| Director | 1 |
| Chief | 1 |
| Officer | 1 |
| Student | 1 |
| Online | 1 |
| ??? | 1 |
| Engineer | 1 |
+------------+------+

最新文章

  1. WebApi系列~StringContent与FormUrlEncodedContent
  2. ssh反向连接和简单实现
  3. 设计模式可复用面向对象软件设计基础之对象创建型模式—ABSTRACT FACTORY( 抽象工厂)
  4. [转]俞敏洪:我和马云就差了8个字... [来自: news.mbalib.com]
  5. phpcms安装
  6. NGUI 动态添加控件
  7. hdu Strange fuction
  8. Swift学习笔记十二
  9. 【转】can't find referenced method 'android.app.RemoteInput[] getRemoteInputs()' in class android.app.Notification$Action
  10. 使用Markdown在博客里插入代码
  11. centos7 install jdk
  12. MVC 错误处理1
  13. mongoDB global,startUplog
  14. SVN和GIT的使用
  15. CAGradientLayer颜色渐变器
  16. Python之matplotlib模块安装
  17. [BZOJ 2654]tree(陈立杰)
  18. Luogu1574 超级数
  19. ie烦人的bug篇
  20. 支付宝 app支付 沙盘使用

热门文章

  1. iOS-Cocoapods更新不及时
  2. inux监控平台搭建-监控项
  3. pat 甲级 1072. Gas Station (30)
  4. 用来武装Firebug的十四款Firefox插件
  5. jquery text
  6. js Regex match, exec, test & jquery plugin, visit the official website!
  7. 自己写的页面加载进度条jquery插件
  8. Python 函数的一般形式及参数
  9. javascript 动态添加城市
  10. Python Challenge 第八关