原生形式使用Xgboost(import xgboost as xgb)

from sklearn import datasets
from sklearn.model_selection import train_test_split
import xgboost as xgb
import numpy as np
from sklearn.metrics import precision_score, recall_score # 加载数据
iris = datasets.load_iris()
X = iris.data
y = iris.target # 划分训练集和测试集
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=42)
print("Train data length:", len(X_train))
print("Test data length:", len(X_test)) # 转换为DMatrix数据格式
dtrain = xgb.DMatrix(X_train, label=y_train)
dtest = xgb.DMatrix(X_test, label=y_test) # 设置参数
parameters = {
'eta': 0.3,
'silent': True, # option for logging
'objective': 'multi:softprob', # error evaluation for multiclass tasks
'num_class': 3, # number of classes to predic
'max_depth': 3 # depth of the trees in the boosting process
}
num_round = 20 # the number of training iterations # 模型训练
bst = xgb.train(parameters, dtrain, num_round) # 模型预测
preds = bst.predict(dtest) print(preds[:5]) # 选择表示最高概率的列
best_preds = np.asarray([np.argmax(line) for line in preds])
print(best_preds) # 模型评估
print(precision_score(y_test, best_preds, average='macro')) # 精准率
print(recall_score(y_test, best_preds, average='macro')) # 召回率

Sklearn接口形式使用Xgboost(from xgboost import XGBClassifier)

from sklearn import datasets
from sklearn.model_selection import train_test_split
from xgboost import XGBClassifier
from sklearn.metrics import precision_score, recall_score # 加载数据
iris = datasets.load_iris()
X = iris.data
y = iris.target # 划分训练集和测试集
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=42)
print("Train data length:", len(X_train))
print("Test data length:", len(X_test)) # 模型训练
model = XGBClassifier(
learning_rate=0.01,
n_estimators=3000,
max_depth=4,
min_child_weight=5,
gamma=0.1,
subsample=0.8,
colsample_bytree=0.8,
reg_alpha=1,
objective='binary:logistic',
nthread=8,
scale_pos_weight=1,
seed=27
)
model.fit(X_train, y_train) # 预测
y_pred = model.predict(X_test) # 模型评估
print(precision_score(y_test, y_pred, average='macro')) # 精准率
print(recall_score(y_test, y_pred, average='macro')) # 召回率

最新文章

  1. Java中9种IO的读取方式
  2. php 跨服务器ftp移动文件
  3. this的用法
  4. hdu Code Lock
  5. Android ListView 子控件点击事件
  6. 一些好用的nginx第三方模块
  7. eclipse中使用jython
  8. poj 1273 Drainage Ditches【最大流入门】
  9. BootStrap学习之先导篇——响应式网页
  10. shell全备份脚本(借鉴别人的,在其基础上修复完善了bug)
  11. HDU1789(Doing Homework again)题解
  12. iOS获取设备唯一标识的各种方法?IDFA、IDFV、UDID分别是什么含义?
  13. Install Ubuntu On Windows10(win10上安装linux系统)
  14. 使用django UWSGI 出现 Bad Request (400)
  15. Python构建发布
  16. Windows上模拟Linux环境的软件Cygwin
  17. Python中编码和字符串
  18. jar包的读取
  19. 安装opencv2.4.9
  20. Django 自定义模板标签TemplateTags

热门文章

  1. CircularSlider半弧形滑动条
  2. computed和watch的使用场景
  3. 如何解决Win10不能新建项目的问题?
  4. “高可用性”(High Availability)??
  5. chkconfig 系统服务管理
  6. SpringBoot LoggerFactory is not a Logback LoggerContext but Logback is on the classpath
  7. LightOJ - 1151 Snakes and Ladders
  8. 0003SpringBoot整合SpringDataJPA
  9. mybatis配置和映射文件
  10. Mac: ld: library not found for -lgcc_s.10.4