样本示意,为kdd99数据源:

0,udp,private,SF,105,146,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0.00,0.00,0.00,0.00,1.00,0.00,0.00,255,254,1.00,0.01,0.00,0.00,0.00,0.00,0.00,0.00,normal.
0,udp,private,SF,105,146,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0.00,0.00,0.00,0.00,1.00,0.00,0.00,255,254,1.00,0.01,0.00,0.00,0.00,0.00,0.00,0.00,normal.
0,udp,private,SF,105,146,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0.00,0.00,0.00,0.00,1.00,0.00,0.00,255,254,1.00,0.01,0.00,0.00,0.00,0.00,0.00,0.00,normal.
0,udp,private,SF,105,146,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,0.00,0.00,0.00,0.00,1.00,0.00,0.00,255,254,1.00,0.01,0.00,0.00,0.00,0.00,0.00,0.00,snmpgetattack.
0,udp,private,SF,105,146,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,0.00,0.00,0.00,0.00,1.00,0.00,0.00,255,254,1.00,0.01,0.01,0.00,0.00,0.00,0.00,0.00,snmpgetattack.
0,udp,private,SF,105,146,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,0.00,0.00,0.00,0.00,1.00,0.00,0.00,255,255,1.00,0.00,0.01,0.00,0.00,0.00,0.00,0.00,snmpgetattack.
0,udp,domain_u,SF,29,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,1,0.00,0.00,0.00,0.00,0.50,1.00,0.00,10,3,0.30,0.30,0.30,0.00,0.00,0.00,0.00,0.00,normal.
0,udp,private,SF,105,146,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0.00,0.00,0.00,0.00,1.00,0.00,0.00,255,253,0.99,0.01,0.00,0.00,0.00,0.00,0.00,0.00,normal.
0,udp,private,SF,105,146,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,0.00,0.00,0.00,0.00,1.00,0.00,0.00,255,254,1.00,0.01,0.00,0.00,0.00,0.00,0.00,0.00,snmpgetattack.
0,tcp,http,SF,223,185,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,4,4,0.00,0.00,0.00,0.00,1.00,0.00,0.00,71,255,1.00,0.00,0.01,0.01,0.00,0.00,0.00,0.00,normal.
0,udp,private,SF,105,146,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,0.00,0.00,0.00,0.00,1.00,0.00,0.00,255,254,1.00,0.01,0.00,0.00,0.00,0.00,0.00,0.00,snmpgetattack.
0,tcp,http,SF,230,260,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,19,0.00,0.00,0.00,0.00,1.00,0.00,0.11,3,255,1.00,0.00,0.33,0.07,0.33,0.00,0.00,0.00,normal.
0,udp,private,SF,105,146,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0.00,0.00,0.00,0.00,1.00,0.00,0.00,255,254,1.00,0.01,0.01,0.00,0.00,0.00,0.00,0.00,normal.
0,udp,private,SF,105,146,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,0.00,0.00,0.00,0.00,1.00,0.00,0.00,255,252,0.99,0.01,0.00,0.00,0.00,0.00,0.00,0.00,snmpgetattack.
1,tcp,smtp,SF,3170,329,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,2,0.00,0.00,0.00,0.00,1.00,0.00,1.00,54,39,0.72,0.11,0.02,0.00,0.02,0.00,0.09,0.13,normal.
0,tcp,http,SF,297,13787,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,2,2,0.00,0.00,0.00,0.00,1.00,0.00,0.00,177,255,1.00,0.00,0.01,0.01,0.00,0.00,0.00,0.00,normal.
0,tcp,http,SF,291,3542,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,12,12,0.00,0.00,0.00,0.00,1.00,0.00,0.00,187,255,1.00,0.00,0.01,0.01,0.00,0.00,0.00,0.00,normal.
0,tcp,http,SF,295,753,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,21,22,0.00,0.00,0.00,0.00,1.00,0.00,0.09,196,255,1.00,0.00,0.01,0.01,0.00,0.00,0.00,0.00,normal.
0,udp,private,SF,105,146,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,0.00,0.00,0.00,0.00,1.00,0.00,0.00,255,254,1.00,0.01,0.01,0.00,0.00,0.00,0.00,0.00,snmpgetattack.
0,udp,private,SF,105,146,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0.00,0.00,0.00,0.00,1.00,0.00,0.00,255,254,1.00,0.01,0.00,0.00,0.00,0.00,0.00,0.00,snmpgetattack.
0,tcp,http,SF,268,9235,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,5,5,0.00,0.00,0.00,0.00,1.00,0.00,0.00,58,255,1.00,0.00,0.02,0.05,0.00,0.00,0.00,0.00,normal.
0,udp,private,SF,105,146,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,0.00,0.00,0.00,0.00,1.00,0.00,0.00,255,253,0.99,0.01,0.00,0.00,0.00,0.00,0.00,0.00,snmpgetattack.
0,tcp,http,SF,223,185,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,3,3,0.00,0.00,0.00,0.00,1.00,0.00,0.00,255,255,1.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,normal.
0,tcp,http,SF,227,8841,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,13,13,0.00,0.00,0.00,0.00,1.00,0.00,0.00,255,255,1.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,normal.
0,tcp,http,SF,222,19564,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,22,23,0.00,0.00,0.00,0.00,1.00,0.00,0.09,255,255,1.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,normal.
0,tcp,ftp_data,SF,740,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,0.00,0.00,0.00,0.00,1.00,0.00,0.00,77,33,0.34,0.08,0.34,0.06,0.00,0.00,0.00,0.00,normal.
0,udp,private,SF,105,146,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,0.00,0.00,0.00,0.00,1.00,0.00,0.00,255,254,1.00,0.01,0.00,0.00,0.00,0.00,0.00,0.00,normal.
0,tcp,ftp_data,SF,35195,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,10,10,0.00,0.00,0.00,0.00,1.00,0.00,0.00,92,44,0.43,0.07,0.43,0.05,0.00,0.00,0.00,0.00,normal.
0,tcp,ftp_data,SF,8325,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,20,20,0.00,0.00,0.00,0.00,1.00,0.00,0.00,103,54,0.49,0.06,0.49,0.04,0.00,0.00,0.00,0.00,normal.

代码:

# -*- coding:utf-8 -*-

import re
import matplotlib.pyplot as plt
import os
from sklearn.feature_extraction.text import CountVectorizer
from sklearn import preprocessing
from sklearn import cross_validation
import os
from sklearn.datasets import load_iris
from sklearn import tree
import pydotplus
from sklearn.preprocessing import LabelEncoder
import numpy as np
import pandas as pd
from sklearn_pandas import DataFrameMapper def label(x):
if x == "normal.":
return 0
else:
return 1 if __name__ == '__main__':
data = pd.read_csv('../data/kddcup99/corrected', sep=",", header=None)
print data.columns
print data.iloc[0,0], data.iloc[0,1]
print len(data)
col_cnt = len(data.columns) normal = data.loc[data.loc[:, col_cnt-1] == "normal.", :]
print "normal len:", len(normal)
guess = data.loc[data.loc[:, col_cnt-1] == "guess_passwd.", :]
print "normal len:", len(guess) data = pd.concat([normal, guess])
print len(data) le = preprocessing.LabelEncoder()
for i in range(col_cnt-1):
if isinstance(data.iloc[0,i], str):
print "tranform string column only:", i
data.loc[:,i] = le.fit_transform(data.loc[:,i])
data.loc[:,col_cnt-1] = data.loc[:,col_cnt-1].apply(label)
print data.iloc[0,0], data.iloc[0,1]
x = data.iloc[:, range(col_cnt-1)]
#x = data.iloc[:, [0,4,5,6,7,8,22,23,24,25,26,27,28,29,30]]
y = data.iloc[:, col_cnt-1]
  
''' also OK
    data = data.as_matrix()
    x = data[:, range(col_cnt-1)]
    y = data[:, col_cnt-1]
'''
print "x=>"
print x.iloc[0:3, :]
print "y=>"
print y[-3:]
#v=load_kdd99("../data/kddcup99/corrected")
#x,y=get_guess_passwdandNormal(v)
clf = tree.DecisionTreeClassifier()
clf = clf.fit(x, y)
print clf print cross_validation.cross_val_score(clf, x, y, n_jobs=-1, cv=10) clf = clf.fit(x, y)
dot_data = tree.export_graphviz(clf, out_file=None)
graph = pydotplus.graph_from_dot_data(dot_data)
graph.write_pdf("../photo/6/iris-dt.pdf")

结果:

Int64Index([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33,
34, 35, 36, 37, 38, 39, 40, 41],
dtype='int64')
0 udp
311029
normal len: 60593
normal len: 4367
64960
tranform string column only: 1
tranform string column only: 2
tranform string column only: 3
0 2
x=>
0 1 2 3 4 5 6 7 8 9 ... 31 32 33 34 35 \
0 0 2 15 7 105 146 0 0 0 0 ... 255 254 1.0 0.01 0.0
1 0 2 15 7 105 146 0 0 0 0 ... 255 254 1.0 0.01 0.0
2 0 2 15 7 105 146 0 0 0 0 ... 255 254 1.0 0.01 0.0 36 37 38 39 40
0 0.0 0.0 0.0 0.0 0.0
1 0.0 0.0 0.0 0.0 0.0
2 0.0 0.0 0.0 0.0 0.0 [3 rows x 41 columns]
y=>
142098 1
142099 1
142101 1
Name: 41, dtype: int64
DecisionTreeClassifier(class_weight=None, criterion='gini', max_depth=None,
max_features=None, max_leaf_nodes=None,
min_impurity_decrease=0.0, min_impurity_split=None,
min_samples_leaf=1, min_samples_split=2,
min_weight_fraction_leaf=0.0, presort=False, random_state=None,
splitter='best')
fg[ 0.9561336 0.99892258 0.99938433 0.99984606 0.99984606 0.99969212
1. 0.99984604 0.99969207 1. ]

最新文章

  1. 教你开发asp.net的单点登录系统
  2. [WPF系列]-基础 TextBlock
  3. js实现『加载更多』功能实例
  4. URL、Session、Cookies、Server.Transfer、Application和跨页面传送,利弊比较
  5. CORBA的简单介绍及HelloWorld(zhuan)
  6. Spring REST for DELETE Request Method Not Supoorted
  7. 恒天云技术分享系列2 - vlan管理GUI开发
  8. frame与iframe的区别?
  9. 工作中小知识点汇总(sql)
  10. zoj2818 Root of the Problem 简单数学 开方
  11. 设计模式的征途—4.抽象工厂(Abstract Factory)模式
  12. iOS逆向开发(5):微信强制升级的突破 | 多开 | 微信5.0
  13. Essential pro angular and asp.net core 笔记
  14. v-module绑定vuex里面的数据
  15. mysql学习之路_视图
  16. [转] Compile、Make和Build的区别
  17. 【转】UTF16和UTF8什么区别?
  18. ubuntu16.04-caffe安装过程详解-草稿
  19. ZT 人生真的是一场马拉松吗?
  20. LogCat大量Unexpected value from nativeGetEnabledTags: 0

热门文章

  1. 洛谷 2409 dp 月赛题目
  2. 第九章 Servlet API
  3. jquery-jquery异步提交表单插件
  4. HDU 2512
  5. Spring boot 使用@Value注入属性
  6. Xamarin部署时遇到错误: Failure [INSTALL_FAILED_UPDATE_INCOMPATIBLE]
  7. Oracle 实现 mysql 更新 update limit
  8. (转载) Android studio如何生成aar包
  9. Xshell调整终端显示的最大行数(缓冲区)
  10. mac下maven的安装配置与使用