MapReduce读取HBase数据

代码如下

package com.hbase.mapreduce;

import java.io.IOException;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.Cell;
import org.apache.hadoop.hbase.CellUtil;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.KeyValue;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapreduce.TableInputFormat;
import org.apache.hadoop.hbase.mapreduce.TableMapper;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner; /**
* @author:FengZhen
* @create:2018年9月17日
* MapReduce读取HBase中数据
*/
public class AnalyzeData extends Configured implements Tool{ private static String addr="HDP233,HDP232,HDP231";
private static String port="2181"; public enum Counters { ROWS, COLS, VALID, ERROR } static class AnalyzeMapper extends TableMapper<Text, IntWritable>{
private IntWritable ONE = new IntWritable(1);
@Override
protected void map(ImmutableBytesWritable key, Result value,
Mapper<ImmutableBytesWritable, Result, Text, IntWritable>.Context context)
throws IOException, InterruptedException {
context.getCounter(Counters.ROWS).increment(1);
try {
for (Cell cell : value.listCells()) {
context.getCounter(Counters.COLS).increment(1);
String hbaseValue = Bytes.toString(CellUtil.cloneValue(cell));
context.write(new Text(hbaseValue), ONE);
context.getCounter(Counters.VALID).increment(1);
}
} catch (Exception e) {
e.printStackTrace();
context.getCounter(Counters.ERROR).increment(1);
}
}
} static class AnalyzeReducer extends Reducer<Text, IntWritable, Text, IntWritable>{
@Override
protected void reduce(Text key, Iterable<IntWritable> values,
Reducer<Text, IntWritable, Text, IntWritable>.Context context) throws IOException, InterruptedException {
int count = 0;
for (IntWritable intWritable : values) {
count = count + intWritable.get();
}
context.write(key, new IntWritable(count));
}
} public int run(String[] arg0) throws Exception {
String table = arg0[0];
String column = arg0[1];
String outPath = arg0[2]; Scan scan = new Scan();
if (null != column) {
byte[][] colkey = KeyValue.parseColumn(Bytes.toBytes(column));
if (colkey.length > 1) {
scan.addColumn(colkey[0], colkey[1]);
}else {
scan.addFamily(colkey[0]);
}
} Configuration configuration = HBaseConfiguration.create();
configuration.set("hbase.zookeeper.quorum",addr);
configuration.set("hbase.zookeeper.property.clientPort", port);
configuration.set(TableInputFormat.INPUT_TABLE, table); Job job = Job.getInstance(configuration);
job.setJobName("AnalyzeData");
job.setJarByClass(AnalyzeData.class); job.setMapperClass(AnalyzeMapper.class);
job.setInputFormatClass(TableInputFormat.class);
TableInputFormat.addColumns(scan, KeyValue.parseColumn(Bytes.toBytes(column))); job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(IntWritable.class); //使用TableMapReduceUtil会报类找不到错误
//Caused by: java.lang.ClassNotFoundException: com.yammer.metrics.core.MetricsRegistry
//TableMapReduceUtil.initTableMapperJob(table, scan, AnalyzeMapper.class, Text.class, IntWritable.class, job); job.setReducerClass(AnalyzeReducer.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class); job.setNumReduceTasks(1);
FileOutputFormat.setOutputPath(job, new Path(outPath)); return job.waitForCompletion(true) ? 0 : 1;
} public static void main(String[] args) throws Exception {
String[] params = new String[] {"test_table_mr","data:info","hdfs://fz/data/fz/output/mrReadHBase"};
int exitCode = ToolRunner.run(new AnalyzeData(), params);
System.exit(exitCode);
}
}

最新文章

  1. Nginx %00空字节执行php漏洞
  2. 人工神经网络NN
  3. 1.4 云计算的SPI服务模型
  4. vim常用命令汇总
  5. 小米2000万买域名mi.com
  6. -_-#微信内置JavaScript API WeixinJSBridge
  7. OC基础 点语法的使用
  8. HTML5 CSS3专题 纯CSS打造相冊效果
  9. React Native 系列(三) -- 项目结构介绍
  10. Swift基础之设计折线坐标图
  11. Android项目-高考作文-使用ORMLite抽象公共的Dao层
  12. oracle网络服务之beq协议和SDU优化(性能提升可达30%)
  13. Jmeter自带录制功能
  14. sql之Replace
  15. matlab中mat文件简单存/取
  16. idea集成maven
  17. loging日志文件
  18. linux保证程序单实例运行
  19. udp编程中,一次能发送多少个bytes为好?
  20. Linux文件备份

热门文章

  1. 关于java后台如何接收xml格式的数据
  2. java中使用MD5进行加密
  3. 《从零开始学Swift》学习笔记(Day 44)——重写属性
  4. SharePoint服务器端对象模型 之 使用CAML进行数据查询(Part 3)
  5. Intellij IDEA工具的常用快捷键
  6. 巨蟒python全栈开发django7:多表增加和查询
  7. 发挥inline-block作用
  8. pymysql连数据库简单版
  9. 前端框架之jQuery(二)----轮播图,放大镜
  10. 目标是:互联网方向的Java开发工程师