0. 说明

  测试序列文件的读写操作 && 测试序列文件的排序操作 && 测试序列文件的合并操作 && 测试序列文件的压缩方式 && 测试将日志文件转换成序列文件

  作为 Hadoop 序列文件 中的 SequenceFile 的基本操作 部分的补充存在


1. 测试读写 && 压缩

package hadoop.sequencefile;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.SequenceFile;
import org.apache.hadoop.io.Text;
import org.junit.Test; import java.io.IOException; /**
* 测试序列文件
*/
public class TestSeqFile { /**
* 测试序列文件写操作
*/
@Test
public void testWriteSeq() throws Exception { Configuration conf = new Configuration(); // 设置文件系统为本地模式
conf.set("fs.defaultFS", "file:///"); FileSystem fs = FileSystem.get(conf); // Path path = new Path("E:/test/none.seq");
// Path path = new Path("E:/test/record.seq");
Path path = new Path("E:/test/block.seq");
// 不压缩
// SequenceFile.Writer writer = SequenceFile.createWriter(fs, conf, path, IntWritable.class, Text.class,SequenceFile.CompressionType.NONE);
// 记录压缩
// SequenceFile.Writer writer = SequenceFile.createWriter(fs, conf, path, IntWritable.class, Text.class,SequenceFile.CompressionType.RECORD);
// 块压缩
SequenceFile.Writer writer = SequenceFile.createWriter(fs, conf, path, IntWritable.class, Text.class, SequenceFile.CompressionType.BLOCK); for (int i = 1; i <= 1000; i++) {
IntWritable key = new IntWritable(i);
Text value = new Text("helloworld" + i); writer.append(key, value); } writer.close();
} /**
* 测试序列文件读操作
*/
@Test
public void testReadSeq() throws Exception {
Configuration conf = new Configuration(); // 设置文件系统为本地模式
conf.set("fs.defaultFS", "file:///"); FileSystem fs = FileSystem.get(conf); Path path = new Path("E:/test/block.seq"); SequenceFile.Reader reader = new SequenceFile.Reader(fs, path, conf); //初始化两个 Writable 对象
IntWritable key = new IntWritable();
Text value = new Text(); while ((reader.next(key, value))) {
long position = reader.getPosition();
System.out.println("key: " + key.get() + " , " + " val: " + value.toString() + " , " + " pos: " + position);
}
} }

2. 测试排序

package hadoop.sequencefile;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.SequenceFile;
import org.apache.hadoop.io.Text;
import org.junit.Test; import java.util.Random; /**
* 测试排序
*/
public class TestSeqFileSort { /**
* 创建无序 key-value 文件
*/
@Test
public void testWriteRandom() throws Exception { Configuration conf = new Configuration(); conf.set("fs.defaultFS", "file:///"); FileSystem fs = FileSystem.get(conf); Path p = new Path("E:/test/random.seq"); SequenceFile.Writer writer = SequenceFile.createWriter(fs, conf, p, IntWritable.class, Text.class, SequenceFile.CompressionType.RECORD); // 初始化 random
Random r = new Random(); for (int i = 1; i < 100000; i++) {
// 在0-99999之中随机选取一个值
int j = r.nextInt(100000);
IntWritable key = new IntWritable(j);
Text value = new Text("helloworld" + j); writer.append(key, value); } writer.close(); } /**
* 测试seqFile排序
*/
@Test
public void testSort() throws Exception { Configuration conf = new Configuration(); conf.set("fs.defaultFS", "file:///"); FileSystem fs = FileSystem.get(conf); Path pin = new Path("E:/test/random.seq");
Path pout = new Path("E:/test/sort.seq"); SequenceFile.Sorter sorter = new SequenceFile.Sorter(fs, IntWritable.class, Text.class, conf); sorter.sort(pin, pout);
} /**
* 测试序列文件读操作
*/
@Test
public void testReadSeq() throws Exception {
Configuration conf = new Configuration(); // 设置文件系统为本地模式
conf.set("fs.defaultFS", "file:///"); FileSystem fs = FileSystem.get(conf); Path path = new Path("E:/test/sort.seq"); SequenceFile.Reader reader = new SequenceFile.Reader(fs, path, conf); //初始化两个 Writable 对象
IntWritable key = new IntWritable();
Text value = new Text(); while ((reader.next(key, value))) {
long position = reader.getPosition();
System.out.println("key: " + key.get() + " , " + " val: " + value.toString() + " , " + " pos: " + position);
}
} }

3. 测试合并

package hadoop.sequencefile;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.SequenceFile;
import org.apache.hadoop.io.Text;
import org.junit.Test; /**
* 测试文件合并,必须是同一种压缩类型
*/
public class TestSeqFileMerge {
/**
* 测试序列文件写操作
* 创建两个文件,范围为1-100,100-200
*/
@Test
public void testWriteSeq() throws Exception { Configuration conf = new Configuration(); // 设置文件系统为本地模式
conf.set("fs.defaultFS", "file:///"); FileSystem fs = FileSystem.get(conf); // Path path = new Path("E:/test/block1.seq");
Path path = new Path("E:/test/block2.seq"); // 块压缩
SequenceFile.Writer writer = SequenceFile.createWriter(fs, conf, path, IntWritable.class, Text.class, SequenceFile.CompressionType.BLOCK); // for (int i = 1; i <= 100; i++) {
for (int i = 101; i <= 200; i++) {
IntWritable key = new IntWritable(i);
Text value = new Text("helloworld" + i); writer.append(key, value); } writer.close();
} /**
* 测试文件合并,合并的同时排序
*/
@Test
public void testMerge() throws Exception {
Configuration conf = new Configuration(); conf.set("fs.defaultFS", "file:///"); FileSystem fs = FileSystem.get(conf); Path pin1 = new Path("E:/test/block1.seq");
Path pin2 = new Path("E:/test/block2.seq");
Path pout = new Path("E:/test/merge.seq"); SequenceFile.Sorter sorter = new SequenceFile.Sorter(fs, IntWritable.class, Text.class, conf); Path[] p = {pin1, pin2}; sorter.merge(p, pout);
} /**
* 测试序列文件读操作
*/
@Test
public void testReadSeq() throws Exception {
Configuration conf = new Configuration(); // 设置文件系统为本地模式
conf.set("fs.defaultFS", "file:///"); FileSystem fs = FileSystem.get(conf); Path path = new Path("E:/test/merge.seq"); SequenceFile.Reader reader = new SequenceFile.Reader(fs, path, conf); //初始化两个 Writable 对象
IntWritable key = new IntWritable();
Text value = new Text(); while ((reader.next(key, value))) {
long position = reader.getPosition();
System.out.println("key: " + key.get() + " , " + " val: " + value.toString() + " , " + " pos: " + position);
}
} }

4. 测试将日志文件转换成序列文件

package hadoop.sequencefile;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.SequenceFile;
import org.apache.hadoop.io.Text; import java.io.BufferedReader;
import java.io.FileReader;
import java.io.IOException; /**
* 测试将日志文件转换成序列文件
* Windows 下查看压缩后的 SequenceFile :
* hdfs dfs -text file:///E:/test/access.seq
*/
public class Log2Seq {
public static void main(String[] args) throws Exception {
Configuration conf = new Configuration(); // 设置文件系统为本地模式
conf.set("fs.defaultFS", "file:///"); FileSystem fs = FileSystem.get(conf); Path path = new Path("E:/test/access.seq"); // 不压缩
// SequenceFile.Writer writer = SequenceFile.createWriter(fs, conf, path, IntWritable.class, Text.class,SequenceFile.CompressionType.NONE);
// 记录压缩
// SequenceFile.Writer writer = SequenceFile.createWriter(fs, conf, path, IntWritable.class, Text.class,SequenceFile.CompressionType.RECORD);
// 块压缩
SequenceFile.Writer writer = SequenceFile.createWriter(fs, conf, path, NullWritable.class, Text.class, SequenceFile.CompressionType.BLOCK); BufferedReader br = new BufferedReader(new FileReader("E:/file/access.log1")); String line = null;
while ((line = br.readLine()) != null) {
NullWritable key = NullWritable.get();
Text value = new Text(line);
writer.append(key, value);
} writer.close();
}
}

最新文章

  1. 【情人节来一发】网站添加QQ客服功能
  2. Slick – 这是你需要的最后一款 jQuery 传送带插件
  3. python 向上取整ceil 向下取整floor 四舍五入round
  4. js call与apply的区别-Tom
  5. 知问前端——概述及jQuery UI
  6. linux LVM 逻辑卷
  7. 设计管理员表;webservice用于网络安全的高端内提供服务的
  8. SqlDataReader中的GetSqlValue()方法
  9. DNS远程和DNS日志
  10. 利用Apache commons-net 包进行FTP文件和文件夹的上传与下载
  11. 仿9GAG制作过程(一)
  12. PHP HTML混写,PHP中把大块HTML文本直接赋值给字符串变量的方法
  13. spring线程池(同步、异步)
  14. debian下创建新用户useradd
  15. 关于 百度 Ueditor (在chrome浏览器) 上传图片时 打开文件夹的延迟问题
  16. cdoj841-休生伤杜景死惊开 (逆序数变形)【线段树 树状数组】
  17. do...while和while...do的两种场景比较
  18. linux 命令行选项
  19. #pragam预处理分析
  20. 1、hadoop HA分布式集群搭建

热门文章

  1. MapReduce对交易日志进行排序的Demo(MR的二次排序)
  2. mysql 开发进阶篇系列 24 查询缓存下
  3. webpack-loader是怎样炼成的
  4. c time类型详解
  5. OpenCV设置保存图像压缩率
  6. c++中虚函数和多态性
  7. T-SQL:Varchar和Nvarchar区别(八)
  8. 一道生成不重复随机数字的C#笔试编程题
  9. Web前端基础——jQuery(三)
  10. Java基础——Servlet(七)过滤器&amp;监听器 相关