public class IOUtilFunctions extends Object
Modifier and Type | Class and Description |
---|---|
static class |
IOUtilFunctions.CountRowsTask |
Modifier and Type | Field and Description |
---|---|
static String |
EMPTY_TEXT_LINE |
static org.apache.hadoop.fs.PathFilter |
hiddenFileFilter |
static String |
LIBSVM_DELIM |
static String |
LIBSVM_INDEX_DELIM |
Constructor and Description |
---|
IOUtilFunctions() |
Modifier and Type | Method and Description |
---|---|
static int |
baToInt(byte[] ba,
int off) |
static long |
baToLong(byte[] ba,
int off) |
static int |
baToShort(byte[] ba,
int off) |
static void |
checkAndRaiseErrorCSVEmptyField(String row,
boolean fill,
boolean emptyFound) |
static void |
checkAndRaiseErrorCSVNumColumns(String fname,
String line,
String[] parts,
long ncol) |
static void |
closeSilently(Closeable io) |
static void |
closeSilently(org.apache.hadoop.mapred.RecordReader<?,?> rr) |
static int |
countNnz(String[] cols)
Returns the number of non-zero entries but avoids the expensive
string to double parsing.
|
static int |
countNnz(String[] cols,
int pos,
int len)
Returns the number of non-zero entries but avoids the expensive
string to double parsing.
|
static int |
countNumColumnsCSV(org.apache.hadoop.mapred.InputSplit[] splits,
org.apache.hadoop.mapred.InputFormat informat,
org.apache.hadoop.mapred.JobConf job,
String delim)
Counts the number of columns in a given collection of csv file splits.
|
static int |
countTokensCSV(String str,
String delim)
Counts the number of tokens defined by the given delimiter, respecting
the rules for quotes and escapes defined in RFC4180,
with robustness for various special cases.
|
static void |
deleteCrcFilesFromLocalFileSystem(org.apache.hadoop.fs.FileSystem fs,
org.apache.hadoop.fs.Path path)
Delete the CRC files from the local file system associated with a
particular file and its metadata file.
|
static <T> T |
get(Future<T> in) |
static byte[] |
getBytes(ByteBuffer buff) |
static org.apache.hadoop.fs.FileSystem |
getFileSystem(org.apache.hadoop.conf.Configuration conf) |
static org.apache.hadoop.fs.FileSystem |
getFileSystem(org.apache.hadoop.fs.Path fname) |
static org.apache.hadoop.fs.FileSystem |
getFileSystem(org.apache.hadoop.fs.Path fname,
org.apache.hadoop.conf.Configuration conf) |
static org.apache.hadoop.fs.FileSystem |
getFileSystem(String fname) |
static String |
getPartFileName(int pos) |
static org.apache.hadoop.fs.Path[] |
getSequenceFilePaths(org.apache.hadoop.fs.FileSystem fs,
org.apache.hadoop.fs.Path file) |
static int |
getUTFSize(String value)
Returns the serialized size in bytes of the given string value,
following the modified UTF-8 specification as used by Java's
DataInput/DataOutput.
|
static void |
intToBa(int val,
byte[] ba,
int off) |
static boolean |
isObjectStoreFileScheme(org.apache.hadoop.fs.Path path) |
static boolean |
isSameFileScheme(org.apache.hadoop.fs.Path path1,
org.apache.hadoop.fs.Path path2) |
static void |
longToBa(long val,
byte[] ba,
int off) |
static FileFormatPropertiesMM |
readAndParseMatrixMarketHeader(String filename) |
static String[] |
readMatrixMarketHeader(String filename) |
static void |
shortToBa(int val,
byte[] ba,
int off) |
static org.apache.hadoop.mapred.InputSplit[] |
sortInputSplits(org.apache.hadoop.mapred.InputSplit[] splits) |
static String[] |
split(String str,
String delim)
Splits a string by a specified delimiter into all tokens, including empty.
|
static String[] |
splitByFirst(String str,
String delim) |
static String[] |
splitCSV(String str,
String delim)
Splits a string by a specified delimiter into all tokens, including empty
while respecting the rules for quotes and escapes defined in RFC4180,
with robustness for various special cases.
|
static String[] |
splitCSV(String str,
String delim,
String[] tokens,
Set<String> naStrings)
Splits a string by a specified delimiter into all tokens, including empty
while respecting the rules for quotes and escapes defined in RFC4180,
with robustness for various special cases.
|
static InputStream |
toInputStream(String input) |
static String |
toString(InputStream input) |
public static final org.apache.hadoop.fs.PathFilter hiddenFileFilter
public static final String EMPTY_TEXT_LINE
public static final String LIBSVM_DELIM
public static final String LIBSVM_INDEX_DELIM
public static org.apache.hadoop.fs.FileSystem getFileSystem(String fname) throws IOException
IOException
public static org.apache.hadoop.fs.FileSystem getFileSystem(org.apache.hadoop.fs.Path fname) throws IOException
IOException
public static org.apache.hadoop.fs.FileSystem getFileSystem(org.apache.hadoop.conf.Configuration conf) throws IOException
IOException
public static org.apache.hadoop.fs.FileSystem getFileSystem(org.apache.hadoop.fs.Path fname, org.apache.hadoop.conf.Configuration conf) throws IOException
IOException
public static boolean isSameFileScheme(org.apache.hadoop.fs.Path path1, org.apache.hadoop.fs.Path path2)
public static boolean isObjectStoreFileScheme(org.apache.hadoop.fs.Path path)
public static String getPartFileName(int pos)
public static void closeSilently(Closeable io)
public static void closeSilently(org.apache.hadoop.mapred.RecordReader<?,?> rr)
public static void checkAndRaiseErrorCSVEmptyField(String row, boolean fill, boolean emptyFound) throws IOException
IOException
public static void checkAndRaiseErrorCSVNumColumns(String fname, String line, String[] parts, long ncol) throws IOException
IOException
public static String[] split(String str, String delim)
str
- string to splitdelim
- delimiterpublic static String[] splitCSV(String str, String delim)
str
- string to splitdelim
- delimiterpublic static String[] splitCSV(String str, String delim, String[] tokens, Set<String> naStrings)
str
- string to splitdelim
- delimitertokens
- array for tokens, length needs to match the number of tokensnaStrings
- the strings to map to null value.public static int countTokensCSV(String str, String delim)
str
- string to splitdelim
- delimiterpublic static FileFormatPropertiesMM readAndParseMatrixMarketHeader(String filename) throws DMLRuntimeException
DMLRuntimeException
public static int countNnz(String[] cols)
cols
- string arraypublic static int countNnz(String[] cols, int pos, int len)
cols
- string arraypos
- starting array indexlen
- ending array indexpublic static int getUTFSize(String value)
value
- string valuepublic static InputStream toInputStream(String input)
public static String toString(InputStream input) throws IOException
IOException
public static org.apache.hadoop.mapred.InputSplit[] sortInputSplits(org.apache.hadoop.mapred.InputSplit[] splits)
public static int countNumColumnsCSV(org.apache.hadoop.mapred.InputSplit[] splits, org.apache.hadoop.mapred.InputFormat informat, org.apache.hadoop.mapred.JobConf job, String delim) throws IOException
splits
- input splitsinformat
- input formatjob
- job configruationdelim
- delimiterIOException
- if IOException occurspublic static org.apache.hadoop.fs.Path[] getSequenceFilePaths(org.apache.hadoop.fs.FileSystem fs, org.apache.hadoop.fs.Path file) throws IOException
IOException
public static void deleteCrcFilesFromLocalFileSystem(org.apache.hadoop.fs.FileSystem fs, org.apache.hadoop.fs.Path path) throws IOException
fs
- the file systempath
- the path to a fileIOException
- thrown if error occurred attempting to delete crc filespublic static int baToShort(byte[] ba, int off)
public static int baToInt(byte[] ba, int off)
public static long baToLong(byte[] ba, int off)
public static void shortToBa(int val, byte[] ba, int off)
public static void intToBa(int val, byte[] ba, int off)
public static void longToBa(long val, byte[] ba, int off)
public static byte[] getBytes(ByteBuffer buff)
public static <T> T get(Future<T> in)
Copyright © 2020 The Apache Software Foundation. All rights reserved.