需求:订单数据

    求出每个订单中最贵的商品?

    订单id正序,成交金额倒序。
结果文件三个,每个结果文件只要一条数据。

1.Mapper类

package com.css.order.mr;

import java.io.IOException;

import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper; public class OrderMapper extends Mapper<LongWritable, Text, OrderBean, NullWritable>{ @Override
protected void map(LongWritable key, Text value,Context context)
throws IOException, InterruptedException {
// 获取每行数据
String line = value.toString();
// 切分数据
String[] fields = line.split("\t");
// 取出字段
Integer order_id = Integer.parseInt(fields[0]);
Double price = Double.parseDouble(fields[2]);
OrderBean orderBean = new OrderBean(order_id, price);
// 输出
context.write(orderBean, NullWritable.get());
}
}

2.Reducer类

package com.css.order.mr;

import java.io.IOException;

import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.mapreduce.Reducer; public class OrderReducer extends Reducer<OrderBean, NullWritable, OrderBean, NullWritable>{
@Override
protected void reduce(OrderBean key, Iterable<NullWritable> values,
Context context)throws IOException, InterruptedException {
// 输出
context.write(key, NullWritable.get());
}
}

3.封装类

package com.css.order.mr;

import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException; import org.apache.hadoop.io.WritableComparable; public class OrderBean implements WritableComparable<OrderBean>{ // 定义属性
private int order_id; // 定义订单id
private double price; // 价格 public OrderBean(){
} public OrderBean(int order_id, double price) {
super();
this.order_id = order_id;
this.price = price;
} public int getOrder_id() {
return order_id;
} public void setOrder_id(int order_id) {
this.order_id = order_id;
} public double getPrice() {
return price;
} public void setPrice(double price) {
this.price = price;
} // 序列化
@Override
public void write(DataOutput out) throws IOException {
out.writeInt(order_id);
out.writeDouble(price);
} // 反序列化
@Override
public void readFields(DataInput in) throws IOException {
order_id = in.readInt();
price = in.readDouble();
} @Override
public String toString() {
return order_id + "\t" + price;
} // 排序
@Override
public int compareTo(OrderBean o) {
int rs;
// 根据id排序
if (order_id > o.order_id) {
// id 大的往下排
rs = 1;
}else if (order_id < o.order_id) {
// id小的往上排
rs = -1;
}else {
// id相等 价格高的往上排
rs = price > o.getPrice() ? -1 : 1;
}
return rs;
} }

4.自定义分区类

package com.css.order.mr;

import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.mapreduce.Partitioner; public class OrderPartitioner extends Partitioner<OrderBean, NullWritable>{ @Override
public int getPartition(OrderBean key, NullWritable value, int numPartitions) {
return (key.getOrder_id() & Integer.MAX_VALUE) % numPartitions;
}
}

5.自定义排序分组类

package com.css.order.mr;

import org.apache.hadoop.io.WritableComparable;
import org.apache.hadoop.io.WritableComparator; public class OrderGroupingComparator extends WritableComparator{ // 构造必须加
protected OrderGroupingComparator() {
super(OrderBean.class, true);
} // 重写比较
@Override
public int compare(WritableComparable a, WritableComparable b) {
OrderBean aBean = (OrderBean) a;
OrderBean bBean = (OrderBean) b;
int rs;
// id不同不是同一对象
if (aBean.getOrder_id() > bBean.getOrder_id()) {
rs = 1;
}else if (aBean.getOrder_id() < bBean.getOrder_id()) {
rs = -1;
}else {
rs = 0;
}
return rs;
}
}

6.Driver类

package com.css.order.mr;

import java.io.IOException;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; public class OrderDriver {
public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
// 1.获取job信息
Configuration conf = new Configuration();
Job job = Job.getInstance(conf); // 2.获取jar包
job.setJarByClass(OrderDriver.class); // 3.获取mapper与reducer
job.setMapperClass(OrderMapper.class);
job.setReducerClass(OrderReducer.class); // 4.定义mapper输出类型
job.setMapOutputKeyClass(OrderBean.class);
job.setMapOutputValueClass(NullWritable.class); // 5.定义reducer输出类型
job.setOutputKeyClass(OrderBean.class);
job.setOutputValueClass(NullWritable.class); // 6.设置reducer端的分组
job.setGroupingComparatorClass(OrderGroupingComparator.class); // 7.设置分区
job.setPartitionerClass(OrderPartitioner.class); // 8.设置reduceTask个数
job.setNumReduceTasks(3); // 9.设置数据的输入与输出
FileInputFormat.setInputPaths(job, new Path("c://in1026"));
FileOutputFormat.setOutputPath(job, new Path("c://out1026")); // 10.提交任务
boolean rs = job.waitForCompletion(true);
System.out.println(rs ? 0 : 1);
}
}

7.mr输入文件order.java

1001    Tmall_01    998
1001 Tmall_06 88.8
1001 Tmall_03 522.8
1002 Tmall_03 522.8
1002 Tmall_04 132.4
1002 Tmall_05 372.4
1003 Tmall_01 998
1003 Tmall_02 8.5
1003 Tmall_04 132.4

8.输出文件

(1)part-r-00000
1002 522.8
(2)part-r-00001
1003 998.0
(3)part-r-00002
1001 998.0

最新文章

  1. 【转】搞清FastCgi与PHP-fpm之间的关系
  2. 五分钟理解一致性哈希算法(consistent hashing)
  3. IOS 错误 [UIWebView cut:]: unrecognized selector sent to instance
  4. QQ互联OAuth
  5. 校验两次密码一致的js代码
  6. java识别简单的验证码
  7. P142-1
  8. NTP服务搭建
  9. PHP基础在线视频教程高清版
  10. Camtasia Studio的安装步骤
  11. android 回调机制实例!
  12. sql server的两个类型转换函数
  13. 数据验证validator 与 DWZ
  14. Python 从入门到入门基础练习十五题
  15. 离线批量数据通道Tunnel的最佳实践及常见问题
  16. Vue:(五)axios
  17. centos6.5 yum安装postgresql9.3
  18. 机器学习英雄访谈录之 DL 自由职业者:Tuatini Godard
  19. OpenStack网络详解
  20. C语言按行读文件及字符串分割

热门文章

  1. pentestbox使用教程
  2. 存档格式选择--JSON
  3. Office Web App2013 在线查看PDF文件
  4. Hibernate Annotation 字段 默认值
  5. IOS 中微信 网页授权报 key[也就是code]失效 解决办法
  6. EasyUI Ajax 表单
  7. The Properties of Posterior of Topic Model
  8. HBaseclientAPI基本操作
  9. 让所有IE支持HTML5的解决方案
  10. Linux下的高级拾色器—Pick