Package org.apache.sysds.runtime.util
Class HDFSTool
- java.lang.Object
-
- org.apache.sysds.runtime.util.HDFSTool
-
public class HDFSTool extends Object
-
-
Field Summary
Fields Modifier and Type Field Description static String
DFS_BLOCKSIZE
static String
DFS_PERMISSIONS_ENABLED
static String
DFS_REPLICATION
static String
FS_DEFAULTFS
static String
IO_FILE_BUFFER_SIZE
static String
IO_SERIALIZATIONS
static boolean
USE_BINARYBLOCK_SERIALIZATION
-
Constructor Summary
Constructors Constructor Description HDFSTool()
-
Method Summary
All Methods Static Methods Concrete Methods Modifier and Type Method Description static void
addBinaryBlockSerializationFramework(org.apache.hadoop.conf.Configuration job)
static void
copyFileOnHDFS(String originalDir, String newDir)
static void
createDirIfNotExistOnHDFS(String dir, String permissions)
static void
createDirIfNotExistOnHDFS(org.apache.hadoop.fs.Path path, String permissions)
static void
deleteFileIfExistOnHDFS(String dir)
static void
deleteFileIfExistOnHDFS(org.apache.hadoop.fs.Path outpath, org.apache.hadoop.mapred.JobConf job)
static void
deleteFileIfExistOnLFS(org.apache.hadoop.fs.Path outpath, org.apache.hadoop.mapred.JobConf job)
static void
deleteFileWithMTDIfExistOnHDFS(String fname)
static long
estimateNnzBasedOnFileSize(org.apache.hadoop.fs.Path path, long rlen, long clen, int blen, double factor)
static boolean
existsFileOnHDFS(String fname)
static org.apache.hadoop.fs.FileStatus[]
getDirectoryListing(String fname)
static long
getFilesizeOnHDFS(org.apache.hadoop.fs.Path path)
Returns the size of a file or directory on hdfs in bytes.static org.apache.hadoop.fs.FSDataOutputStream
getHDFSDataOutputStream(String filename, boolean overwrite)
static boolean
isDirectory(String fname)
static boolean
isFileEmpty(org.apache.hadoop.fs.FileSystem fs, org.apache.hadoop.fs.Path dir)
static boolean
isHDFSFileEmpty(String dir)
static void
mergeIntoSingleFile(String originalDir, String newFile)
static String
metaDataToString(Types.ValueType vt, Types.ValueType[] schema, Types.DataType dt, DataCharacteristics dc, Types.FileFormat fmt, FileFormatProperties formatProperties, PrivacyConstraint privacyConstraint)
static boolean
readBooleanFromHDFSFile(String filename)
static double[]
readColumnVectorFromHDFS(String dir, Types.FileFormat fmt, long rlen, long clen, int blen)
static double
readDoubleFromHDFSFile(String filename)
static long
readIntegerFromHDFSFile(String filename)
static double[][]
readMatrixFromHDFS(String dir, Types.FileFormat fmt, long rlen, long clen, int blen)
static Object
readObjectFromHDFSFile(String filename, Types.ValueType vt)
static ScalarObject
readScalarObjectFromHDFSFile(String fname, Types.ValueType vt)
static String
readStringFromHDFSFile(String filename)
static void
renameFileOnHDFS(String originalDir, String newDir)
static void
writeBooleanToHDFS(boolean b, String filename)
static void
writeDoubleToHDFS(double d, String filename)
static void
writeIntToHDFS(long i, String filename)
static void
writeMetaDataFile(String mtdfile, Types.ValueType vt, Types.ValueType[] schema, Types.DataType dt, DataCharacteristics mc, Types.FileFormat fmt)
static void
writeMetaDataFile(String mtdfile, Types.ValueType vt, Types.ValueType[] schema, Types.DataType dt, DataCharacteristics dc, Types.FileFormat fmt, FileFormatProperties formatProperties)
static void
writeMetaDataFile(String mtdfile, Types.ValueType vt, Types.ValueType[] schema, Types.DataType dt, DataCharacteristics dc, Types.FileFormat fmt, FileFormatProperties formatProperties, PrivacyConstraint privacyConstraint)
static void
writeMetaDataFile(String mtdfile, Types.ValueType vt, Types.ValueType[] schema, Types.DataType dt, DataCharacteristics mc, Types.FileFormat fmt, PrivacyConstraint privacyConstraint)
static void
writeMetaDataFile(String mtdfile, Types.ValueType vt, DataCharacteristics mc, Types.FileFormat fmt)
static void
writeMetaDataFile(String mtdfile, Types.ValueType vt, DataCharacteristics dc, Types.FileFormat fmt, FileFormatProperties formatProperties)
static void
writeMetaDataFile(String mtdfile, Types.ValueType vt, DataCharacteristics dc, Types.FileFormat fmt, FileFormatProperties formatProperties, PrivacyConstraint privacyConstraint)
static void
writeMetaDataFile(String mtdfile, Types.ValueType vt, DataCharacteristics mc, Types.FileFormat fmt, PrivacyConstraint privacyConstraint)
static void
writeObjectToHDFS(Object obj, String filename)
static void
writeScalarMetaDataFile(String mtdfile, Types.ValueType vt)
static void
writeScalarMetaDataFile(String mtdfile, Types.ValueType vt, PrivacyConstraint privacyConstraint)
static void
writeScalarToHDFS(ScalarObject scalar, String fname)
Helper function to write scalars to HDFS, including writing its meta data and removing CRC files in local file systemstatic void
writeStringToHDFS(String s, String filename)
-
-
-
Field Detail
-
DFS_REPLICATION
public static final String DFS_REPLICATION
- See Also:
- Constant Field Values
-
IO_FILE_BUFFER_SIZE
public static final String IO_FILE_BUFFER_SIZE
- See Also:
- Constant Field Values
-
IO_SERIALIZATIONS
public static final String IO_SERIALIZATIONS
- See Also:
- Constant Field Values
-
DFS_BLOCKSIZE
public static final String DFS_BLOCKSIZE
- See Also:
- Constant Field Values
-
DFS_PERMISSIONS_ENABLED
public static final String DFS_PERMISSIONS_ENABLED
- See Also:
- Constant Field Values
-
FS_DEFAULTFS
public static final String FS_DEFAULTFS
- See Also:
- Constant Field Values
-
USE_BINARYBLOCK_SERIALIZATION
public static final boolean USE_BINARYBLOCK_SERIALIZATION
- See Also:
- Constant Field Values
-
-
Method Detail
-
addBinaryBlockSerializationFramework
public static void addBinaryBlockSerializationFramework(org.apache.hadoop.conf.Configuration job)
-
existsFileOnHDFS
public static boolean existsFileOnHDFS(String fname)
-
isDirectory
public static boolean isDirectory(String fname)
-
getDirectoryListing
public static org.apache.hadoop.fs.FileStatus[] getDirectoryListing(String fname)
-
deleteFileWithMTDIfExistOnHDFS
public static void deleteFileWithMTDIfExistOnHDFS(String fname) throws IOException
- Throws:
IOException
-
deleteFileIfExistOnHDFS
public static void deleteFileIfExistOnHDFS(String dir) throws IOException
- Throws:
IOException
-
deleteFileIfExistOnHDFS
public static void deleteFileIfExistOnHDFS(org.apache.hadoop.fs.Path outpath, org.apache.hadoop.mapred.JobConf job) throws IOException
- Throws:
IOException
-
deleteFileIfExistOnLFS
public static void deleteFileIfExistOnLFS(org.apache.hadoop.fs.Path outpath, org.apache.hadoop.mapred.JobConf job) throws IOException
- Throws:
IOException
-
isHDFSFileEmpty
public static boolean isHDFSFileEmpty(String dir) throws IOException
- Throws:
IOException
-
isFileEmpty
public static boolean isFileEmpty(org.apache.hadoop.fs.FileSystem fs, org.apache.hadoop.fs.Path dir) throws IOException
- Throws:
IOException
-
renameFileOnHDFS
public static void renameFileOnHDFS(String originalDir, String newDir) throws IOException
- Throws:
IOException
-
mergeIntoSingleFile
public static void mergeIntoSingleFile(String originalDir, String newFile) throws IOException
- Throws:
IOException
-
copyFileOnHDFS
public static void copyFileOnHDFS(String originalDir, String newDir) throws IOException
- Throws:
IOException
-
estimateNnzBasedOnFileSize
public static long estimateNnzBasedOnFileSize(org.apache.hadoop.fs.Path path, long rlen, long clen, int blen, double factor) throws IOException
- Throws:
IOException
-
getFilesizeOnHDFS
public static long getFilesizeOnHDFS(org.apache.hadoop.fs.Path path) throws IOException
Returns the size of a file or directory on hdfs in bytes.- Parameters:
path
- file system path- Returns:
- file size
- Throws:
IOException
- if IOException occurs
-
readDoubleFromHDFSFile
public static double readDoubleFromHDFSFile(String filename) throws IOException
- Throws:
IOException
-
readIntegerFromHDFSFile
public static long readIntegerFromHDFSFile(String filename) throws IOException
- Throws:
IOException
-
readBooleanFromHDFSFile
public static boolean readBooleanFromHDFSFile(String filename) throws IOException
- Throws:
IOException
-
readStringFromHDFSFile
public static String readStringFromHDFSFile(String filename) throws IOException
- Throws:
IOException
-
readObjectFromHDFSFile
public static Object readObjectFromHDFSFile(String filename, Types.ValueType vt) throws IOException
- Throws:
IOException
-
readScalarObjectFromHDFSFile
public static ScalarObject readScalarObjectFromHDFSFile(String fname, Types.ValueType vt)
-
writeScalarToHDFS
public static void writeScalarToHDFS(ScalarObject scalar, String fname)
Helper function to write scalars to HDFS, including writing its meta data and removing CRC files in local file system- Parameters:
scalar
- scalar data objectfname
- file name
-
writeDoubleToHDFS
public static void writeDoubleToHDFS(double d, String filename) throws IOException
- Throws:
IOException
-
writeIntToHDFS
public static void writeIntToHDFS(long i, String filename) throws IOException
- Throws:
IOException
-
writeBooleanToHDFS
public static void writeBooleanToHDFS(boolean b, String filename) throws IOException
- Throws:
IOException
-
writeStringToHDFS
public static void writeStringToHDFS(String s, String filename) throws IOException
- Throws:
IOException
-
writeObjectToHDFS
public static void writeObjectToHDFS(Object obj, String filename) throws IOException
- Throws:
IOException
-
writeMetaDataFile
public static void writeMetaDataFile(String mtdfile, Types.ValueType vt, DataCharacteristics mc, Types.FileFormat fmt) throws IOException
- Throws:
IOException
-
writeMetaDataFile
public static void writeMetaDataFile(String mtdfile, Types.ValueType vt, DataCharacteristics mc, Types.FileFormat fmt, PrivacyConstraint privacyConstraint) throws IOException
- Throws:
IOException
-
writeMetaDataFile
public static void writeMetaDataFile(String mtdfile, Types.ValueType vt, Types.ValueType[] schema, Types.DataType dt, DataCharacteristics mc, Types.FileFormat fmt) throws IOException
- Throws:
IOException
-
writeMetaDataFile
public static void writeMetaDataFile(String mtdfile, Types.ValueType vt, Types.ValueType[] schema, Types.DataType dt, DataCharacteristics mc, Types.FileFormat fmt, PrivacyConstraint privacyConstraint) throws IOException
- Throws:
IOException
-
writeMetaDataFile
public static void writeMetaDataFile(String mtdfile, Types.ValueType vt, DataCharacteristics dc, Types.FileFormat fmt, FileFormatProperties formatProperties) throws IOException
- Throws:
IOException
-
writeMetaDataFile
public static void writeMetaDataFile(String mtdfile, Types.ValueType vt, DataCharacteristics dc, Types.FileFormat fmt, FileFormatProperties formatProperties, PrivacyConstraint privacyConstraint) throws IOException
- Throws:
IOException
-
writeMetaDataFile
public static void writeMetaDataFile(String mtdfile, Types.ValueType vt, Types.ValueType[] schema, Types.DataType dt, DataCharacteristics dc, Types.FileFormat fmt, FileFormatProperties formatProperties) throws IOException
- Throws:
IOException
-
writeMetaDataFile
public static void writeMetaDataFile(String mtdfile, Types.ValueType vt, Types.ValueType[] schema, Types.DataType dt, DataCharacteristics dc, Types.FileFormat fmt, FileFormatProperties formatProperties, PrivacyConstraint privacyConstraint) throws IOException
- Throws:
IOException
-
writeScalarMetaDataFile
public static void writeScalarMetaDataFile(String mtdfile, Types.ValueType vt) throws IOException
- Throws:
IOException
-
writeScalarMetaDataFile
public static void writeScalarMetaDataFile(String mtdfile, Types.ValueType vt, PrivacyConstraint privacyConstraint) throws IOException
- Throws:
IOException
-
metaDataToString
public static String metaDataToString(Types.ValueType vt, Types.ValueType[] schema, Types.DataType dt, DataCharacteristics dc, Types.FileFormat fmt, FileFormatProperties formatProperties, PrivacyConstraint privacyConstraint) throws org.apache.wink.json4j.JSONException, DMLRuntimeException
- Throws:
org.apache.wink.json4j.JSONException
DMLRuntimeException
-
readMatrixFromHDFS
public static double[][] readMatrixFromHDFS(String dir, Types.FileFormat fmt, long rlen, long clen, int blen) throws IOException, DMLRuntimeException
- Throws:
IOException
DMLRuntimeException
-
readColumnVectorFromHDFS
public static double[] readColumnVectorFromHDFS(String dir, Types.FileFormat fmt, long rlen, long clen, int blen) throws IOException, DMLRuntimeException
- Throws:
IOException
DMLRuntimeException
-
createDirIfNotExistOnHDFS
public static void createDirIfNotExistOnHDFS(String dir, String permissions) throws IOException
- Throws:
IOException
-
createDirIfNotExistOnHDFS
public static void createDirIfNotExistOnHDFS(org.apache.hadoop.fs.Path path, String permissions) throws IOException
- Throws:
IOException
-
getHDFSDataOutputStream
public static org.apache.hadoop.fs.FSDataOutputStream getHDFSDataOutputStream(String filename, boolean overwrite) throws IOException
- Throws:
IOException
-
-