diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/FileInputFormat.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/FileInputFormat.java index 46bb4b629c816..e1e7fb39f2877 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/FileInputFormat.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/FileInputFormat.java @@ -361,47 +361,45 @@ public InputSplit[] getSplits(JobConf job, int numSplits) for (FileStatus file: files) { Path path = file.getPath(); long length = file.getLen(); - if (length != 0) { - FileSystem fs = path.getFileSystem(job); - BlockLocation[] blkLocations; - if (file instanceof LocatedFileStatus) { - blkLocations = ((LocatedFileStatus) file).getBlockLocations(); - } else { - blkLocations = fs.getFileBlockLocations(file, 0, length); + FileSystem fs = path.getFileSystem(job); + BlockLocation[] blkLocations; + if (file instanceof LocatedFileStatus) { + blkLocations = ((LocatedFileStatus) file).getBlockLocations(); + } else { + blkLocations = fs.getFileBlockLocations(file, 0, length); + } + if (blkLocations.length == 0){ + continue; + } + if (isSplitable(fs, path)) { + long blockSize = file.getBlockSize(); + long splitSize = computeSplitSize(goalSize, minSize, blockSize); + + long bytesRemaining = length; + while (((double) bytesRemaining)/splitSize > SPLIT_SLOP) { + String[][] splitHosts = getSplitHostsAndCachedHosts(blkLocations, + length-bytesRemaining, splitSize, clusterMap); + splits.add(makeSplit(path, length-bytesRemaining, splitSize, + splitHosts[0], splitHosts[1])); + bytesRemaining -= splitSize; } - if (isSplitable(fs, path)) { - long blockSize = file.getBlockSize(); - long splitSize = computeSplitSize(goalSize, minSize, blockSize); - - long bytesRemaining = length; - while (((double) bytesRemaining)/splitSize > SPLIT_SLOP) { - String[][] splitHosts = getSplitHostsAndCachedHosts(blkLocations, - length-bytesRemaining, splitSize, clusterMap); - splits.add(makeSplit(path, length-bytesRemaining, splitSize, - splitHosts[0], splitHosts[1])); - bytesRemaining -= splitSize; - } - if (bytesRemaining != 0) { - String[][] splitHosts = getSplitHostsAndCachedHosts(blkLocations, length - - bytesRemaining, bytesRemaining, clusterMap); - splits.add(makeSplit(path, length - bytesRemaining, bytesRemaining, - splitHosts[0], splitHosts[1])); - } - } else { - if (LOG.isDebugEnabled()) { - // Log only if the file is big enough to be splitted - if (length > Math.min(file.getBlockSize(), minSize)) { - LOG.debug("File is not splittable so no parallelization " - + "is possible: " + file.getPath()); - } + if (bytesRemaining != 0) { + String[][] splitHosts = getSplitHostsAndCachedHosts(blkLocations, length + - bytesRemaining, bytesRemaining, clusterMap); + splits.add(makeSplit(path, length - bytesRemaining, bytesRemaining, + splitHosts[0], splitHosts[1])); + } + } else { + if (LOG.isDebugEnabled()) { + // Log only if the file is big enough to be splitted + if (length > Math.min(file.getBlockSize(), minSize)) { + LOG.debug("File is not splittable so no parallelization " + + "is possible: " + file.getPath()); } - String[][] splitHosts = getSplitHostsAndCachedHosts(blkLocations,0,length,clusterMap); - splits.add(makeSplit(path, 0, length, splitHosts[0], splitHosts[1])); } - } else { - //Create empty hosts array for zero length files - splits.add(makeSplit(path, 0, length, new String[0])); + String[][] splitHosts = getSplitHostsAndCachedHosts(blkLocations,0,length,clusterMap); + splits.add(makeSplit(path, 0, length, splitHosts[0], splitHosts[1])); } } sw.stop();