hadoop FileSplit
2024-10-19 12:33:31
/** A section of an input file. Returned by {@link
* InputFormat#getSplits(JobContext)} and passed to
* {@link InputFormat#createRecordReader(InputSplit,TaskAttemptContext)}.
*
* 文件的一部分,通过InputFormat#getSplits(JobContext)生成
* 作为参数生产RecordReader:InputFormat#createRecordReader(InputSplit,TaskAttemptContext)
* 实现了InputSplit接口
*/
@InterfaceAudience.Public
@InterfaceStability.Stable
public class FileSplit extends InputSplit implements Writable {
private Path file;
private long start;
private long length;
private String[] hosts;
private SplitLocationInfo[] hostInfos; public FileSplit() {} /** Constructs a split with host information
*
* @param file the file name。 文件名称
* @param start the position of the first byte in the file to process。第一个byte的偏移量
* @param length the number of bytes in the file to process。 split的长度
* @param hosts the list of hosts containing the block, possibly null。 split所在的主机列表
*/
public FileSplit(Path file, long start, long length, String[] hosts) {
this.file = file;
this.start = start;
this.length = length;
this.hosts = hosts;
} /** Constructs a split with host and cached-blocks information
*
* @param file the file name。 文件名称
* @param start the position of the first byte in the file to process。第一个byte的偏移量
* @param length the number of bytes in the file to process split的长度
* @param hosts the list of hosts containing the block split所在的主机列表
* @param inMemoryHosts the list of hosts containing the block in memory 在内存中保存block的机器列表
*/
public FileSplit(Path file, long start, long length, String[] hosts,
String[] inMemoryHosts) {
this(file, start, length, hosts);
hostInfos = new SplitLocationInfo[hosts.length];
for (int i = 0; i < hosts.length; i++) {
// because N will be tiny, scanning is probably faster than a HashSet
boolean inMemory = false;
for (String inMemoryHost : inMemoryHosts) {
if (inMemoryHost.equals(hosts[i])) {
inMemory = true;
break;
}
}
hostInfos[i] = new SplitLocationInfo(hosts[i], inMemory);
}
} /** The file containing this split's data. */
public Path getPath() { return file; } /** The position of the first byte in the file to process. */
public long getStart() { return start; } /** The number of bytes in the file to process. */
@Override
public long getLength() { return length; } @Override
public String toString() { return file + ":" + start + "+" + length; } ////////////////////////////////////////////
// Writable methods
//////////////////////////////////////////// @Override
public void write(DataOutput out) throws IOException {
Text.writeString(out, file.toString());
out.writeLong(start);
out.writeLong(length);
} @Override
public void readFields(DataInput in) throws IOException {
file = new Path(Text.readString(in));
start = in.readLong();
length = in.readLong();
hosts = null;
} @Override
public String[] getLocations() throws IOException {
if (this.hosts == null) {
return new String[]{};
} else {
return this.hosts;
}
} @Override
@Evolving
public SplitLocationInfo[] getLocationInfo() throws IOException {
return hostInfos;
}
}
最新文章
- JAVA调用 keytool 生成keystore 和 cer 证书
- java web项目启动时自动加载自定义properties文件
- 矩形覆盖(codevs 1101)
- 《JS高程》创建对象的7种方式(完整版)
- hdu 1233
- Java——有关日期的方法
- java 如何得到ISO 8601 时间格式
- 理解js中的运算符优先级
- OpenCV2.4.9 Qt5.3.1 开发环境配置错误原因与解决方案
- springcloud相关资料收集
- python魔法方法-比较相关
- Linux服务器---安装jdk
- 带你从零学ReactNative开发跨平台App开发-[react native 仿boss直聘](十三)
- python参数Sample Code
- 06 Python字符编码与文件处理
- mysql--对库,表基本操作语句,增删改查
- poppo大根堆的原理与实现。
- java基础 01
- 爬虫之cookiejar模块
- 新建IP核为灰色并显示there is no project open
热门文章
- 面向对象的tab选项卡实现
- php模板引擎smarty
- bootstrap table 怎么自适应宽度
- POJ 1320 Street Numbers 解佩尔方程
- Bzoj1313 [HAOI2008]下落的圆盘
- [Codevs1519]过路费解题报告|最小生成树|LCA
- mongoDB的简单使用
- 【 Linux 】Keepalived实现双主模型高可用集群
- hdu 1348(凸包)
- android studio 自定义路径安装报错";You are attempting to install the android SDK