继上篇

MapReduce清洗数据

package mapreduce;

import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat; public class CleanData {
public static class Map extends Mapper<Object , Text , Text , IntWritable>{
private static Text newKey=new Text();
private static String chage(String data) {
char[] str = data.toCharArray();
String[] time = new String[7];
int j = 0;
int k = 0;
for(int i=0;i<str.length;i++) {
if(str[i]=='/'||str[i]==':'||str[i]==32) {
time[k] = data.substring(j,i);
j = i+1;
k++;
}
}
time[k] = data.substring(j, data.length()); switch(time[1]) { case "Jan":time[1]="01";break; case
"Feb":time[1]="02";break; case "Mar":time[1]="03";break; case
"Apr":time[1]="04";break; case "May":time[1]="05";break; case
"Jun":time[1]="06";break; case "Jul":time[1]="07";break; case
"Aug":time[1]="08";break; case "Sep":time[1]="09";break; case
"Oct":time[1]="10";break; case "Nov":time[1]="11";break; case
"Dec":time[1]="12";break; } data = time[2]+"-"+time[1]+"-"+time[0]+" "+time[3]+":"+time[4]+":"+time[5];
return data;
}
public void map(Object key,Text value,Context context) throws IOException, InterruptedException{
String line=value.toString();
System.out.println(line);
String arr[]=line.split(","); String ip = arr[0];
String date = arr[1];
String day = arr[2];
String traffic = arr[3];
String type = arr[4];
String id = arr[5]; date = chage(date);
traffic = traffic.substring(0, traffic.length()-1); newKey.set(ip+'\t'+date+'\t'+day+'\t'+traffic+'\t'+type);
//newKey.set(ip+','+date+','+day+','+traffic+','+type);
int click=Integer.parseInt(id);
context.write(newKey, new IntWritable(click));
}
}
public static class Reduce extends Reducer<Text, IntWritable, Text, IntWritable>{
public void reduce(Text key,Iterable<IntWritable> values,Context context) throws IOException, InterruptedException{
for(IntWritable val : values){
context.write(key, val);
}
}
}
public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException{
Configuration conf=new Configuration();
System.out.println("start");
Job job =new Job(conf,"cleanData");
job.setJarByClass(CleanData.class);
job.setMapperClass(Map.class);
job.setReducerClass(Reduce.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);
job.setInputFormatClass(TextInputFormat.class);
job.setOutputFormatClass(TextOutputFormat.class);
Path in=new Path("hdfs://192.168.137.67:9000/mymapreducel/in/result.txt");
Path out=new Path("hdfs://192.168.137.67:9000/mymapreducelShiYan/out1");
FileInputFormat.addInputPath(job,in);
FileOutputFormat.setOutputPath(job,out);
System.exit(job.waitForCompletion(true) ? 0 : 1); }
}

今天遇到了一个

java.lang.ClassCastException: class org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos$GetFileInfoRequestProto cannot be cast to class com.google.protobuf.Message (org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos$GetFileI....

的错误

搞了好几个小时也没有解决,最后没办法了把导的包全部移除后重新导入,解决了问题。

最新文章

  1. django小结
  2. Slackware Linux or FreeBSD 配置中文环境。
  3. c语言基础表达式, 关系运算符, 逻辑运算符, 位运算符, 数据的取值范围, 分支结构(if...else, switch...case)
  4. Android使用SQLite数据库(3)
  5. 【BZOJ-2725】故乡的梦 Dijsktra + Tarjan + Dinic + BFS + 堆
  6. windows 命令修改IP
  7. SQLSERVER备份事务日志的作用
  8. 原 Linux搭建SVN 服务器2
  9. PHPCMS v9 自定义表单添加验证码验证
  10. JavaScript使用点滴
  11. 线段树——codevs 1690 开关灯
  12. C#标识符与关键字
  13. SQLServer之创建用户定义的数据库角色
  14. fillder---工具栏隐藏/显示
  15. shell脚本--初识CGI
  16. 【JAVA】使用IntelliJ IDEA创建Java控制台工程
  17. 修改Unity中Lua文件的默认打开程序
  18. Java IO/NIO教程
  19. jupyter notebook远程服务器终端连接
  20. SAS 报表输出一些新式控制

热门文章

  1. VMware显示错误:“未能锁定文件 无法打开磁盘 ..\*.vmdk 或者某一个快照所依赖的磁盘。”解决办法
  2. Maven 基础(二) | 解决依赖冲突的正确姿势
  3. const和volitale
  4. 【5min+】闪电光速拳? .NetCore 中的Span
  5. redis简单操作
  6. 关于程序员须知的 linux 基础
  7. Dubbo入门到实战
  8. SpringBoot学习(一):SpringBoot入门
  9. springIOC源码接口分析(七):ApplicationEventPublisher
  10. kaggle实战之 bag of words meet bag of poopcorn