Decision Tree such as C4.5 is easy to parallel. Following is an example.

This is a non-parallel version:

public void learnFromDataSet(Iterable<Sample<FK, FV, Boolean>> dataset){
for(Sample sample : dataset){
model.addSample((MapBasedBinarySample<FK, FV>)sample);
}
Queue<TreeNode<FK, FV>> Q = new LinkedList<TreeNode<FK, FV>>();
TreeNode<FK, FV> root = model.selectRootTreeNode();
model.addTreeNode(root);
Q.add(root);
while (!Q.isEmpty()){
TreeNode v = Q.poll();
if(v.getDepth() >= model.getMaxDepth()){
continue;
}
FeatureSplit<FK> featureSplit = model.selectFeature(v);
if(featureSplit.getFeatureId() == null){
continue;
}
v.setFeatureSplit(featureSplit);
Pair<TreeNode<FK,FV>, TreeNode<FK, FV>> children =
model.newTreeNode(v, featureSplit);
TreeNode leftNode = children.getKey();
TreeNode rightNode = children.getValue();
if(leftNode != null
&& leftNode.getSampleSize() > model.getMinSampleSizeInNode()){
v.setLeft(leftNode);
model.addTreeNode(leftNode);
Q.add(leftNode);
}
if(rightNode != null
&& rightNode.getSampleSize() > model.getMinSampleSizeInNode()){
v.setRight(rightNode);
model.addTreeNode(rightNode);
Q.add(rightNode);
}
}
}

And this is a parallel version:

public class NodeSplitThread implements Runnable{
private TreeNode<FK, FV> node = null;
private Queue<TreeNode<FK, FV>> Q = null; public NodeSplitThread(TreeNode<FK, FV> node, Queue<TreeNode<FK, FV>> Q){
this.node = node;
this.Q = Q;
}
@Override
public void run() {
if(node.getDepth() >= model.getMaxDepth()){
return;
}
FeatureSplit<FK> featureSplit = model.selectFeature(node);
if(featureSplit.getFeatureId() == null){
return;
}
node.setFeatureSplit(featureSplit);
Pair<TreeNode<FK,FV>, TreeNode<FK, FV>> children = model.newTreeNode(node, featureSplit);
TreeNode<FK, FV> leftNode = children.getKey();
TreeNode<FK, FV> rightNode = children.getValue(); if(leftNode != null && leftNode.getSampleSize() > model.getMinSampleSizeInNode()){
node.setLeft(leftNode);
model.addTreeNode(leftNode);
Q.add(leftNode);
}
if(rightNode != null && rightNode.getSampleSize() > model.getMinSampleSizeInNode()){
node.setRight(rightNode);
model.addTreeNode(rightNode);
Q.add(rightNode);
}
}
} public List<TreeNode<FK, FV>> pollTopN(Queue<TreeNode<FK, FV>> Q, int n){
List<TreeNode<FK, FV>> ret = new ArrayList<TreeNode<FK, FV>>();
for(int i = 0; i < n; ++i){
if(Q.isEmpty()) break;
TreeNode<FK, FV> node = Q.poll();
ret.add(node);
}
return ret;
} @Override
public void learnFromDataSet(Iterable<Sample<FK, FV, Boolean>> dataset){ for(Sample sample : dataset){
model.addSample((MapBasedBinarySample<FK, FV>)sample);
}
Queue<TreeNode<FK, FV>> Q = new ConcurrentLinkedQueue<TreeNode<FK, FV>>();
TreeNode<FK, FV> root = model.selectRootTreeNode();
model.addTreeNode(root);
Q.add(root);
ExecutorService threadPool = Executors.newFixedThreadPool(10);
while (!Q.isEmpty()){
List<TreeNode<FK, FV>> nodes = pollTopN(Q, 10);
List<Future> tasks = new ArrayList<Future>(nodes.size());
for(TreeNode<FK, FV> node : nodes){
Future task = threadPool.submit(new NodeSplitThread(node, Q));
tasks.add(task);
}
for(Future task : tasks){
try {
task.get();
} catch (InterruptedException e) {
continue;
} catch (ExecutionException e) {
continue;
}
}
}
threadPool.shutdown();
try {
threadPool.awaitTermination(60, TimeUnit.SECONDS);
} catch (InterruptedException e) {
threadPool.shutdownNow();
Thread.interrupted();
}
threadPool.shutdownNow();
}

http://xlvector.net/blog/?p=896

最新文章

  1. 广州PostgreSQL用户会技术交流会小记 2015-9-19
  2. VMware Player安装Debian系统
  3. Delphi 中记录类型 给记录指针赋值。
  4. python学习之——selenium元素定位
  5. python中的Iterable, Iterator,生成器概念
  6. SSH入门简单搭建例子
  7. webstorm常用快捷键(常用)
  8. PHP定义静态方法的原则
  9. IE中float元素如果同时设置了margin值,此时margin的值会变为双倍的解决方法
  10. 2.如何使用matlab拟合曲线
  11. CentOS 6.5 中安装 Mysql 5.6,并远程连接Mysql
  12. bugku web web3
  13. python集合(set)的运算
  14. SSH(Spring+Struts2+Hibernate) of mappings(SSH三大框架的映射问题)
  15. nodeJs的Buffer操作
  16. ubuntu16.04+caffe+GPU+cuda+cudnn安装教程
  17. python pandas使用数据透视表
  18. Vue + Element UI 实现权限管理系统(更换皮肤主题)
  19. Codeforces805 A. Fake NP 2017-05-05 08:30 327人阅读 评论(0) 收藏
  20. 浅谈NodeJs的模块机制

热门文章

  1. linux convert命令安装及使用
  2. golang包管理
  3. 如何调用别人发布的WebService程序
  4. ubuntu下vim配置(刷题和比赛两套)
  5. Java 实现选择排序
  6. SQLServer------begin tran/commit tran事务的使用方法
  7. PVS 7.6 部署教程
  8. linux安装nagios客户端
  9. YARN的设计
  10. weblogic11g重置控制密码