public class SparkUtils
extends java.lang.Object
Constructor and Description |
---|
SparkUtils() |
Modifier and Type | Method and Description |
---|---|
static java.lang.Class<? extends org.apache.hadoop.io.compress.CompressionCodec> |
getCompressionCodeClass(java.lang.String compressionCodecClass) |
static <T> T |
readObjectFromFile(java.lang.String path,
java.lang.Class<T> type,
org.apache.spark.api.java.JavaSparkContext sc)
Read an object from HDFS (or local) using default Java object serialization
|
static <T> T |
readObjectFromFile(java.lang.String path,
java.lang.Class<T> type,
org.apache.spark.SparkContext sc)
Read an object from HDFS (or local) using default Java object serialization
|
static java.lang.String |
readStringFromFile(java.lang.String path,
org.apache.spark.api.java.JavaSparkContext sc)
Read a UTF-8 format String from HDFS (or local)
|
static java.lang.String |
readStringFromFile(java.lang.String path,
org.apache.spark.SparkContext sc)
Read a UTF-8 format String from HDFS (or local)
|
static void |
registerKryoClasses(org.apache.spark.SparkConf conf)
Register the DataVec writable classes for Kryo
|
static <T> java.util.List<org.apache.spark.api.java.JavaRDD<T>> |
splitData(SplitStrategy splitStrategy,
org.apache.spark.api.java.JavaRDD<T> data,
long seed) |
static void |
writeAnalysisHTMLToFile(java.lang.String outputPath,
DataAnalysis dataAnalysis,
org.apache.spark.api.java.JavaSparkContext sc)
Write a DataAnalysis to HDFS (or locally) as a HTML file
|
static void |
writeObjectToFile(java.lang.String path,
java.lang.Object toWrite,
org.apache.spark.api.java.JavaSparkContext sc)
Write an object to HDFS (or local) using default Java object serialization
|
static void |
writeObjectToFile(java.lang.String path,
java.lang.Object toWrite,
org.apache.spark.SparkContext sc)
Write an object to HDFS (or local) using default Java object serialization
|
static void |
writeSchema(java.lang.String outputPath,
Schema schema,
org.apache.spark.api.java.JavaSparkContext sc)
Write a schema to a HDFS (or, local) file in a human-readable format
|
static void |
writeStringToFile(java.lang.String path,
java.lang.String toWrite,
org.apache.spark.api.java.JavaSparkContext sc)
Write a String to a file (on HDFS or local) in UTF-8 format
|
static void |
writeStringToFile(java.lang.String path,
java.lang.String toWrite,
org.apache.spark.SparkContext sc)
Write a String to a file (on HDFS or local) in UTF-8 format
|
static void |
writeWritablesToFile(java.lang.String outputPath,
java.lang.String delim,
java.util.List<java.util.List<Writable>> writables,
org.apache.spark.api.java.JavaSparkContext sc)
Wlite a set of writables (or, sequence) to HDFS (or, locally).
|
public static <T> java.util.List<org.apache.spark.api.java.JavaRDD<T>> splitData(SplitStrategy splitStrategy, org.apache.spark.api.java.JavaRDD<T> data, long seed)
public static void writeStringToFile(java.lang.String path, java.lang.String toWrite, org.apache.spark.api.java.JavaSparkContext sc) throws java.io.IOException
path
- Path to write totoWrite
- String to writesc
- Spark contextjava.io.IOException
public static void writeStringToFile(java.lang.String path, java.lang.String toWrite, org.apache.spark.SparkContext sc) throws java.io.IOException
path
- Path to write totoWrite
- String to writesc
- Spark contextjava.io.IOException
public static java.lang.String readStringFromFile(java.lang.String path, org.apache.spark.api.java.JavaSparkContext sc) throws java.io.IOException
path
- Path to write the stringsc
- Spark contextjava.io.IOException
public static java.lang.String readStringFromFile(java.lang.String path, org.apache.spark.SparkContext sc) throws java.io.IOException
path
- Path to write the stringsc
- Spark contextjava.io.IOException
public static void writeObjectToFile(java.lang.String path, java.lang.Object toWrite, org.apache.spark.api.java.JavaSparkContext sc) throws java.io.IOException
path
- Path to write the object totoWrite
- Object to writesc
- Spark contextjava.io.IOException
public static void writeObjectToFile(java.lang.String path, java.lang.Object toWrite, org.apache.spark.SparkContext sc) throws java.io.IOException
path
- Path to write the object totoWrite
- Object to writesc
- Spark contextjava.io.IOException
public static <T> T readObjectFromFile(java.lang.String path, java.lang.Class<T> type, org.apache.spark.api.java.JavaSparkContext sc) throws java.io.IOException
T
- Type of the object to readpath
- File to readtype
- Class of the object to readsc
- Spark contextjava.io.IOException
public static <T> T readObjectFromFile(java.lang.String path, java.lang.Class<T> type, org.apache.spark.SparkContext sc) throws java.io.IOException
T
- Type of the object to readpath
- File to readtype
- Class of the object to readsc
- Spark contextjava.io.IOException
public static void writeSchema(java.lang.String outputPath, Schema schema, org.apache.spark.api.java.JavaSparkContext sc) throws java.io.IOException
outputPath
- Output path to write toschema
- Schema to writesc
- Spark contextjava.io.IOException
public static void writeAnalysisHTMLToFile(java.lang.String outputPath, DataAnalysis dataAnalysis, org.apache.spark.api.java.JavaSparkContext sc)
outputPath
- Output pathdataAnalysis
- Analysis to generate HTML file forsc
- Spark contextpublic static void writeWritablesToFile(java.lang.String outputPath, java.lang.String delim, java.util.List<java.util.List<Writable>> writables, org.apache.spark.api.java.JavaSparkContext sc) throws java.io.IOException
outputPath
- Path to write the outptudelim
- Delimiterwritables
- data to writesc
- Spark contextjava.io.IOException
public static void registerKryoClasses(org.apache.spark.SparkConf conf)
public static java.lang.Class<? extends org.apache.hadoop.io.compress.CompressionCodec> getCompressionCodeClass(java.lang.String compressionCodecClass)