import xlrd
import jieba
import sys
import importlib
import os #python内置的包,用于进行文件目录操作,我们将会用到os.listdir函数
import pickle #导入cPickle包并且取一个别名pickle #持久化类
import random
import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
from pylab import mpl
from sklearn.naive_bayes import MultinomialNB # 导入多项式贝叶斯算法包
from sklearn import svm from sklearn import metrics
from sklearn.datasets.base import Bunch
from sklearn.feature_extraction.text import TfidfVectorizer
importlib.reload(sys) #把内容和类别转化成一个向量的形式
trainContentdatasave=[] #存储所有训练和测试数据的分词
testContentdatasave=[] trainContentdata = []
testContentdata = []
trainlabeldata = []
testlabeldata = [] #导入文本描述的训练和测试数据
def importTrainContentdata():
file = '20180716_train.xls'
wb = xlrd.open_workbook(file)
ws = wb.sheet_by_name("Sheet1")
for r in range(ws.nrows):
trainContentdata.append(ws.cell(r, 0).value) def importTestContentdata():
file = '20180716_test.xls'
wb = xlrd.open_workbook(file)
ws = wb.sheet_by_name("Sheet1")
for r in range(ws.nrows):
testContentdata.append(ws.cell(r, 0).value) #导入类别的训练和测试数据
def importTrainlabeldata():
file = '20180716_train_label.xls'
wb = xlrd.open_workbook(file)
ws = wb.sheet_by_name("Sheet1")
for r in range(ws.nrows):
trainlabeldata.append(ws.cell(r, 0).value) def importTestlabeldata():
file = '20180716_test_label.xls'
wb = xlrd.open_workbook(file)
ws = wb.sheet_by_name("Sheet1")
for r in range(ws.nrows):
testlabeldata.append(ws.cell(r, 0).value) if __name__=="__main__": importTrainContentdata()
importTestContentdata()
importTrainlabeldata()
importTestlabeldata() '''贝叶斯
clf = MultinomialNB(alpha=0.052).fit(train_set.tdm, train_set.label)
#clf = svm.SVC(C=0.7, kernel='poly', gamma=10, decision_function_shape='ovr')
clf.fit(train_set.tdm, train_set.label)
predicted=clf.predict(test_set.tdm) 逻辑回归
tv = TfidfVectorizer()
train_data = tv.fit_transform(X_train)
test_data = tv.transform(X_test) lr = LogisticRegression(C=3)
lr.fit(train_set.tdm, train_set.label)
predicted=lr.predict(test_set.tdm)
print(lr.score(test_set.tdm, test_set.label))
#print(test_set.tdm) #SVM
clf = SVC(C=1500)
clf.fit(train_set.tdm, train_set.label)
predicted=clf.predict(test_set.tdm)
print(clf.score(test_set.tdm, test_set.label))
''' tv = TfidfVectorizer()
train_data = tv.fit_transform(trainContentdata)
test_data = tv.transform(testContentdata) clf = SVC(C=1500)
clf.fit(train_data, trainlabeldata)
print(clf.score(test_data, testlabeldata)) a=[]
b=[]
for i in range(len(predicted)):
b.append((int)(float(predicted[i])))
a.append(int(test_set.label[i][0])) '''
f=open('F:/goverment/ArticleMining/predict.txt', 'w')
for i in range(len(predicted)):
f.write(str(b[i]))
f.write('\n')
f.write("写好了")
f.close()
#for i in range(len(predicted)):
#print(b[i])
'''
#metrics_result(a, b)

最新文章

  1. 判断json数据是否为空
  2. WPF常用方法,事件驱动和控件遍历
  3. C#随机颜色和随机字母
  4. VC++中内存对齐
  5. poj3468(线段树 边覆盖)
  6. float保留7位double保留15位之后的数字四舍五进
  7. mongodb集成spring
  8. BZOJ 2535 Plane 航空管制2
  9. HTML5 + AJAX ( 原生JavaScript ) 异步多文件上传
  10. KB奇遇记(2):缘起
  11. Dashboard集群
  12. JS-随机生成的密码
  13. PHP 5 Filesystem 函数
  14. Angular4学习笔记-目录汇总
  15. apache2.4 httpd.conf httpd-vhost.conf配置
  16. LeetCode - Reorganize String
  17. 【C#】详解C#委托
  18. hdu 2433 Travel
  19. windows 下获取当前进程的线程数量
  20. iOS开发之AFNetworking实现数据传输和文件上传

热门文章

  1. LeetCode OJ:House Robber II(房屋窃贼II)
  2. Linux系统中DHCP的配置
  3. vue.js 源代码学习笔记 ----- 工具方法 lang
  4. react 入门的好东西 可以做出一个完整的网站
  5. ThinkPHP5.0完全开发手册.【CHM】下载
  6. if条件判断
  7. Linux 释放物理内存和虚拟内存
  8. Linux基本概念及操作
  9. Swift UIAlertController、UISegmentedControl
  10. Spring 学习笔记(一)