Package org.apache.sysds.runtime.io
Class IOUtilFunctions
- java.lang.Object
- 
- org.apache.sysds.runtime.io.IOUtilFunctions
 
- 
 public class IOUtilFunctions extends Object 
- 
- 
Nested Class SummaryNested Classes Modifier and Type Class Description static classIOUtilFunctions.CountRowsTask
 - 
Field SummaryFields Modifier and Type Field Description static charCSV_QUOTE_CHARstatic StringEMPTY_TEXT_LINEstatic org.apache.hadoop.fs.PathFilterhiddenFileFilterstatic StringLIBSVM_DELIMstatic StringLIBSVM_INDEX_DELIM
 - 
Constructor SummaryConstructors Constructor Description IOUtilFunctions()
 - 
Method SummaryAll Methods Static Methods Concrete Methods Modifier and Type Method Description static intbaToInt(byte[] ba, int off)static longbaToLong(byte[] ba, int off)static intbaToShort(byte[] ba, int off)static voidcheckAndRaiseErrorCSVEmptyField(String row, boolean fill, boolean emptyFound)static voidcheckAndRaiseErrorCSVNumColumns(String fname, String line, String[] parts, long ncol)static voidcloseSilently(Closeable io)static voidcloseSilently(org.apache.hadoop.mapred.RecordReader<?,?> rr)static intcountNnz(String[] cols)Returns the number of non-zero entries but avoids the expensive string to double parsing.static intcountNnz(String[] cols, int pos, int len)Returns the number of non-zero entries but avoids the expensive string to double parsing.static intcountNumColumnsCSV(org.apache.hadoop.mapred.InputSplit[] splits, org.apache.hadoop.mapred.InputFormat informat, org.apache.hadoop.mapred.JobConf job, String delim)Counts the number of columns in a given collection of csv file splits.static intcountTokensCSV(String str, String delim)Counts the number of tokens defined by the given delimiter, respecting the rules for quotes and escapes defined in RFC4180, with robustness for various special cases.static voiddeleteCrcFilesFromLocalFileSystem(org.apache.hadoop.fs.FileSystem fs, org.apache.hadoop.fs.Path path)Delete the CRC files from the local file system associated with a particular file and its metadata file.static <T> Tget(Future<T> in)static byte[]getBytes(ByteBuffer buff)static org.apache.hadoop.fs.FileSystemgetFileSystem(String fname)static org.apache.hadoop.fs.FileSystemgetFileSystem(org.apache.hadoop.conf.Configuration conf)static org.apache.hadoop.fs.FileSystemgetFileSystem(org.apache.hadoop.fs.Path fname)static org.apache.hadoop.fs.FileSystemgetFileSystem(org.apache.hadoop.fs.Path fname, org.apache.hadoop.conf.Configuration conf)static org.apache.hadoop.fs.Path[]getMetadataFilePaths(org.apache.hadoop.fs.FileSystem fs, org.apache.hadoop.fs.Path file)static StringgetPartFileName(int pos)static org.apache.hadoop.fs.Path[]getSequenceFilePaths(org.apache.hadoop.fs.FileSystem fs, org.apache.hadoop.fs.Path file)static intgetUTFSize(String value)Returns the serialized size in bytes of the given string value, following the modified UTF-8 specification as used by Java's DataInput/DataOutput.static voidintToBa(int val, byte[] ba, int off)static booleanisObjectStoreFileScheme(org.apache.hadoop.fs.Path path)static booleanisSameFileScheme(org.apache.hadoop.fs.Path path1, org.apache.hadoop.fs.Path path2)static voidlongToBa(long val, byte[] ba, int off)static FileFormatPropertiesMMreadAndParseMatrixMarketHeader(String filename)static String[]readMatrixMarketHeader(String filename)static voidshortToBa(int val, byte[] ba, int off)static org.apache.hadoop.mapred.InputSplit[]sortInputSplits(org.apache.hadoop.mapred.InputSplit[] splits)static String[]split(String str, String delim)Splits a string by a specified delimiter into all tokens, including empty.static String[]splitByFirst(String str, String delim)static String[]splitCSV(String str, String delim)Splits a string by a specified delimiter into all tokens, including empty while respecting the rules for quotes and escapes defined in RFC4180, with robustness for various special cases.static String[]splitCSV(String str, String delim, String[] cache)Splits a string by a specified delimiter into all tokens, including empty while respecting the rules for quotes and escapes defined in RFC4180, with robustness for various special cases.static String[]splitCSV(String str, String delim, String[] tokens, Set<String> naStrings)Splits a string by a specified delimiter into all tokens, including empty while respecting the rules for quotes and escapes defined in RFC4180, with robustness for various special cases.static InputStreamtoInputStream(String input)static StringtoString(InputStream input)static Stringtrim(String str)
 
- 
- 
- 
Field Detail- 
hiddenFileFilterpublic static final org.apache.hadoop.fs.PathFilter hiddenFileFilter 
 - 
EMPTY_TEXT_LINEpublic static final String EMPTY_TEXT_LINE - See Also:
- Constant Field Values
 
 - 
CSV_QUOTE_CHARpublic static final char CSV_QUOTE_CHAR - See Also:
- Constant Field Values
 
 - 
LIBSVM_DELIMpublic static final String LIBSVM_DELIM - See Also:
- Constant Field Values
 
 - 
LIBSVM_INDEX_DELIMpublic static final String LIBSVM_INDEX_DELIM - See Also:
- Constant Field Values
 
 
- 
 - 
Method Detail- 
getFileSystempublic static org.apache.hadoop.fs.FileSystem getFileSystem(String fname) throws IOException - Throws:
- IOException
 
 - 
getFileSystempublic static org.apache.hadoop.fs.FileSystem getFileSystem(org.apache.hadoop.fs.Path fname) throws IOException- Throws:
- IOException
 
 - 
getFileSystempublic static org.apache.hadoop.fs.FileSystem getFileSystem(org.apache.hadoop.conf.Configuration conf) throws IOException- Throws:
- IOException
 
 - 
getFileSystempublic static org.apache.hadoop.fs.FileSystem getFileSystem(org.apache.hadoop.fs.Path fname, org.apache.hadoop.conf.Configuration conf) throws IOException- Throws:
- IOException
 
 - 
isSameFileSchemepublic static boolean isSameFileScheme(org.apache.hadoop.fs.Path path1, org.apache.hadoop.fs.Path path2)
 - 
isObjectStoreFileSchemepublic static boolean isObjectStoreFileScheme(org.apache.hadoop.fs.Path path) 
 - 
getPartFileNamepublic static String getPartFileName(int pos) 
 - 
closeSilentlypublic static void closeSilently(Closeable io) 
 - 
closeSilentlypublic static void closeSilently(org.apache.hadoop.mapred.RecordReader<?,?> rr) 
 - 
checkAndRaiseErrorCSVEmptyFieldpublic static void checkAndRaiseErrorCSVEmptyField(String row, boolean fill, boolean emptyFound) throws IOException - Throws:
- IOException
 
 - 
checkAndRaiseErrorCSVNumColumnspublic static void checkAndRaiseErrorCSVNumColumns(String fname, String line, String[] parts, long ncol) throws IOException - Throws:
- IOException
 
 - 
splitpublic static String[] split(String str, String delim) Splits a string by a specified delimiter into all tokens, including empty. NOTE: This method is meant as a faster drop-in replacement of the regular string split.- Parameters:
- str- string to split
- delim- delimiter
- Returns:
- string array
 
 - 
splitCSVpublic static String[] splitCSV(String str, String delim) Splits a string by a specified delimiter into all tokens, including empty while respecting the rules for quotes and escapes defined in RFC4180, with robustness for various special cases.- Parameters:
- str- string to split
- delim- delimiter
- Returns:
- string array of tokens
 
 - 
splitCSVpublic static String[] splitCSV(String str, String delim, String[] cache) Splits a string by a specified delimiter into all tokens, including empty while respecting the rules for quotes and escapes defined in RFC4180, with robustness for various special cases.- Parameters:
- str- string to split
- delim- delimiter
- cache- cachedReturnArray
- Returns:
- string array of tokens
 
 - 
splitCSVpublic static String[] splitCSV(String str, String delim, String[] tokens, Set<String> naStrings) Splits a string by a specified delimiter into all tokens, including empty while respecting the rules for quotes and escapes defined in RFC4180, with robustness for various special cases.- Parameters:
- str- string to split
- delim- delimiter
- tokens- array for tokens, length needs to match the number of tokens
- naStrings- the strings to map to null value.
- Returns:
- string array of tokens
 
 - 
countTokensCSVpublic static int countTokensCSV(String str, String delim) Counts the number of tokens defined by the given delimiter, respecting the rules for quotes and escapes defined in RFC4180, with robustness for various special cases.- Parameters:
- str- string to split
- delim- delimiter
- Returns:
- number of tokens split by the given delimiter
 
 - 
readAndParseMatrixMarketHeaderpublic static FileFormatPropertiesMM readAndParseMatrixMarketHeader(String filename) throws DMLRuntimeException - Throws:
- DMLRuntimeException
 
 - 
countNnzpublic static int countNnz(String[] cols) Returns the number of non-zero entries but avoids the expensive string to double parsing. This function is guaranteed to never underestimate.- Parameters:
- cols- string array
- Returns:
- number of non-zeros
 
 - 
countNnzpublic static int countNnz(String[] cols, int pos, int len) Returns the number of non-zero entries but avoids the expensive string to double parsing. This function is guaranteed to never underestimate.- Parameters:
- cols- string array
- pos- starting array index
- len- ending array index
- Returns:
- number of non-zeros
 
 - 
getUTFSizepublic static int getUTFSize(String value) Returns the serialized size in bytes of the given string value, following the modified UTF-8 specification as used by Java's DataInput/DataOutput. see java docs: docs/api/java/io/DataInput.html#modified-utf-8- Parameters:
- value- string value
- Returns:
- string size for modified UTF-8 specification
 
 - 
toInputStreampublic static InputStream toInputStream(String input) 
 - 
toStringpublic static String toString(InputStream input) throws IOException - Throws:
- IOException
 
 - 
sortInputSplitspublic static org.apache.hadoop.mapred.InputSplit[] sortInputSplits(org.apache.hadoop.mapred.InputSplit[] splits) 
 - 
countNumColumnsCSVpublic static int countNumColumnsCSV(org.apache.hadoop.mapred.InputSplit[] splits, org.apache.hadoop.mapred.InputFormat informat, org.apache.hadoop.mapred.JobConf job, String delim) throws IOExceptionCounts the number of columns in a given collection of csv file splits. This primitive aborts if a row with more than 0 columns is found and hence is robust against empty file splits etc.- Parameters:
- splits- input splits
- informat- input format
- job- job configruation
- delim- delimiter
- Returns:
- the number of columns in the collection of csv file splits
- Throws:
- IOException- if IOException occurs
 
 - 
getSequenceFilePathspublic static org.apache.hadoop.fs.Path[] getSequenceFilePaths(org.apache.hadoop.fs.FileSystem fs, org.apache.hadoop.fs.Path file) throws IOException- Throws:
- IOException
 
 - 
getMetadataFilePathspublic static org.apache.hadoop.fs.Path[] getMetadataFilePaths(org.apache.hadoop.fs.FileSystem fs, org.apache.hadoop.fs.Path file) throws IOException- Throws:
- IOException
 
 - 
deleteCrcFilesFromLocalFileSystempublic static void deleteCrcFilesFromLocalFileSystem(org.apache.hadoop.fs.FileSystem fs, org.apache.hadoop.fs.Path path) throws IOExceptionDelete the CRC files from the local file system associated with a particular file and its metadata file.- Parameters:
- fs- the file system
- path- the path to a file
- Throws:
- IOException- thrown if error occurred attempting to delete crc files
 
 - 
baToShortpublic static int baToShort(byte[] ba, int off)
 - 
baToIntpublic static int baToInt(byte[] ba, int off)
 - 
baToLongpublic static long baToLong(byte[] ba, int off)
 - 
shortToBapublic static void shortToBa(int val, byte[] ba, int off)
 - 
intToBapublic static void intToBa(int val, byte[] ba, int off)
 - 
longToBapublic static void longToBa(long val, byte[] ba, int off)
 - 
getBytespublic static byte[] getBytes(ByteBuffer buff) 
 - 
getpublic static <T> T get(Future<T> in) 
 
- 
 
-