public class WordVectorSerializer
extends java.lang.Object
| Modifier and Type | Class and Description |
|---|---|
protected static class |
WordVectorSerializer.BinaryReader |
protected static class |
WordVectorSerializer.CSVReader |
protected static interface |
WordVectorSerializer.Reader |
| Modifier and Type | Method and Description |
|---|---|
static java.lang.String |
decodeB64(java.lang.String word) |
static java.lang.String |
encodeB64(java.lang.String word) |
static Word2Vec |
fromPair(Pair<InMemoryLookupTable,VocabCache> pair)
Load word vectors from the given pair
|
static WordVectors |
fromTableAndVocab(WeightLookupTable table,
VocabCache vocab)
Load word vectors for the given vocab and table
|
static float |
getFloat(byte[] b)
Read a string from a data input stream Credit to:
https://github.com/NLPchina/Word2VEC_java/blob/master/src/com/ansj/vec/Word2VEC.java
|
protected static TokenizerFactory |
getTokenizerFactory(VectorsConfiguration configuration) |
static Word2Vec |
loadFullModel(java.lang.String path)
Deprecated.
|
static Word2Vec |
loadGoogleModel(java.io.File modelFile,
boolean binary)
Deprecated.
|
static Word2Vec |
loadGoogleModel(java.io.File modelFile,
boolean binary,
boolean lineBreaks)
Deprecated.
|
static WordVectors |
loadGoogleModelNonNormalized(java.io.File modelFile,
boolean binary,
boolean lineBreaks)
Deprecated.
|
static WordVectors |
loadStaticModel(java.io.File file)
This method restores previously saved w2v model.
|
static Pair<InMemoryLookupTable,VocabCache> |
loadTxt(java.io.File vectorsFile)
Loads an in memory cache from the given path (sets syn0 and the vocab)
|
static WordVectors |
loadTxtVectors(java.io.File vectorsFile)
Deprecated.
|
static WordVectors |
loadTxtVectors(java.io.InputStream stream,
boolean skipFirstLine)
Deprecated.
|
static void |
printOutProjectedMemoryUse(long numWords,
int vectorLength,
int numTables) |
static float |
readFloat(java.io.InputStream is)
Read a float from a data input stream Credit to:
https://github.com/NLPchina/Word2VEC_java/blob/master/src/com/ansj/vec/Word2VEC.java
|
static ParagraphVectors |
readParagraphVectors(java.io.File file)
This method restores ParagraphVectors model previously saved with writeParagraphVectors()
|
static ParagraphVectors |
readParagraphVectors(java.io.InputStream stream)
This method restores ParagraphVectors model previously saved with writeParagraphVectors()
|
static ParagraphVectors |
readParagraphVectors(java.lang.String path)
This method restores ParagraphVectors model previously saved with writeParagraphVectors()
|
static ParagraphVectors |
readParagraphVectorsFromText(java.io.File file)
Deprecated.
|
static ParagraphVectors |
readParagraphVectorsFromText(java.io.InputStream stream)
Deprecated.
|
static ParagraphVectors |
readParagraphVectorsFromText(java.lang.String path)
Deprecated.
|
static <T extends SequenceElement> |
readSequenceVectors(SequenceElementFactory<T> factory,
java.io.File file)
This method loads previously saved SequenceVectors model from File
|
static <T extends SequenceElement> |
readSequenceVectors(SequenceElementFactory<T> factory,
java.io.InputStream stream)
This method loads previously saved SequenceVectors model from InputStream
|
static java.lang.String |
readString(java.io.DataInputStream dis)
Read a string from a data input stream Credit to:
https://github.com/NLPchina/Word2VEC_java/blob/master/src/com/ansj/vec/Word2VEC.java
|
static VocabCache<VocabWord> |
readVocabCache(java.io.File file)
This method reads vocab cache from provided file.
|
static VocabCache<VocabWord> |
readVocabCache(java.io.InputStream stream)
This method reads vocab cache from provided InputStream.
|
static Word2Vec |
readWord2Vec(java.io.File file)
Deprecated.
|
static Word2Vec |
readWord2VecFromText(java.io.File vectors,
java.io.File hs,
java.io.File h_codes,
java.io.File h_points,
VectorsConfiguration configuration)
This method allows you to read ParagraphVectors from externaly originated vectors and syn1.
|
static Word2Vec |
readWord2VecModel(java.io.File file)
This method
1) Binary model, either compressed or not.
|
static Word2Vec |
readWord2VecModel(java.io.File file,
boolean extendedModel)
This method
1) Binary model, either compressed or not.
|
static Word2Vec |
readWord2VecModel(java.lang.String path)
This method
1) Binary model, either compressed or not.
|
static Word2Vec |
readWord2VecModel(java.lang.String path,
boolean extendedModel)
This method
1) Binary model, either compressed or not.
|
static void |
writeFullModel(Word2Vec vec,
java.lang.String path)
Deprecated.
|
static void |
writeParagraphVectors(ParagraphVectors vectors,
java.io.File file)
This method saves ParagraphVectors model into compressed zip file
|
static void |
writeParagraphVectors(ParagraphVectors vectors,
java.io.OutputStream stream)
This method saves ParagraphVectors model into compressed zip file and sends it to output stream
|
static void |
writeParagraphVectors(ParagraphVectors vectors,
java.lang.String path)
This method saves ParagraphVectors model into compressed zip file located at path
|
static <T extends SequenceElement> |
writeSequenceVectors(SequenceVectors<T> vectors,
SequenceElementFactory<T> factory,
java.io.File file)
This method saves specified SequenceVectors model to target file
|
static <T extends SequenceElement> |
writeSequenceVectors(SequenceVectors<T> vectors,
SequenceElementFactory<T> factory,
java.io.OutputStream stream)
This method saves specified SequenceVectors model to target OutputStream
|
static <T extends SequenceElement> |
writeSequenceVectors(SequenceVectors<T> vectors,
SequenceElementFactory<T> factory,
java.lang.String path)
This method saves specified SequenceVectors model to target file path
|
static void |
writeTsneFormat(Glove vec,
org.nd4j.linalg.api.ndarray.INDArray tsne,
java.io.File csv)
Write the tsne format
|
static void |
writeTsneFormat(Word2Vec vec,
org.nd4j.linalg.api.ndarray.INDArray tsne,
java.io.File csv)
Write the tsne format
|
static void |
writeVocabCache(VocabCache<VocabWord> vocabCache,
java.io.File file)
This method saves vocab cache to provided File.
|
static void |
writeVocabCache(VocabCache<VocabWord> vocabCache,
java.io.OutputStream stream)
This method saves vocab cache to provided OutputStream.
|
static void |
writeWord2VecModel(Word2Vec vectors,
java.io.File file)
This method saves Word2Vec model into compressed zip file and sends it to output stream
PLEASE NOTE: This method saves FULL model, including syn0 AND syn1
|
static void |
writeWord2VecModel(Word2Vec vectors,
java.io.OutputStream stream)
This method saves Word2Vec model into compressed zip file and sends it to output stream
PLEASE NOTE: This method saves FULL model, including syn0 AND syn1
|
static void |
writeWord2VecModel(Word2Vec vectors,
java.lang.String path)
This method saves Word2Vec model into compressed zip file and sends it to output stream
PLEASE NOTE: This method saves FULL model, including syn0 AND syn1
|
static void |
writeWordVectors(Glove vectors,
java.io.File file)
This method saves GloVe model to the given output stream.
|
static void |
writeWordVectors(Glove vectors,
java.io.OutputStream stream)
This method saves GloVe model to the given OutputStream
|
static void |
writeWordVectors(Glove vectors,
java.lang.String path)
This method saves GloVe model to the given output stream.
|
static void |
writeWordVectors(InMemoryLookupTable lookupTable,
InMemoryLookupCache cache,
java.lang.String path)
Deprecated.
|
static void |
writeWordVectors(ParagraphVectors vectors,
java.io.File path)
Deprecated.
|
static void |
writeWordVectors(ParagraphVectors vectors,
java.io.OutputStream stream)
Deprecated.
|
static void |
writeWordVectors(ParagraphVectors vectors,
java.lang.String path)
Deprecated.
|
static <T extends SequenceElement> |
writeWordVectors(WeightLookupTable<T> lookupTable,
java.io.File file)
This mehod writes word vectors to the given file.
|
static <T extends SequenceElement> |
writeWordVectors(WeightLookupTable<T> lookupTable,
java.io.OutputStream stream)
This mehod writes word vectors to the given OutputStream.
|
static <T extends SequenceElement> |
writeWordVectors(WeightLookupTable<T> lookupTable,
java.lang.String path)
This mehod writes word vectors to the given path.
|
static void |
writeWordVectors(Word2Vec vec,
java.io.BufferedWriter writer)
Deprecated.
|
static void |
writeWordVectors(Word2Vec vec,
java.io.File file)
Deprecated.
|
static void |
writeWordVectors(Word2Vec vec,
java.io.OutputStream outputStream)
Deprecated.
|
static void |
writeWordVectors(Word2Vec vec,
java.lang.String path)
Deprecated.
|
@Deprecated public static Word2Vec loadGoogleModel(java.io.File modelFile, boolean binary) throws java.io.IOException
modelFile - the path to the google modelbinary - read from binary file format (if set to true) or from text file format.java.io.IOException@Deprecated public static Word2Vec loadGoogleModel(java.io.File modelFile, boolean binary, boolean lineBreaks) throws java.io.IOException
modelFile - the input filebinary - read from binary or text file formatlineBreaks - if true, the input file is expected to terminate each line with a line break. This
is typically the case for files created with recent versions of Word2Vec, but not
for the downloadable model files.Word2Vec objectjava.io.IOException@Deprecated public static WordVectors loadGoogleModelNonNormalized(java.io.File modelFile, boolean binary, boolean lineBreaks) throws java.io.IOException
modelFile - binary - lineBreaks - java.io.IOExceptionpublic static float readFloat(java.io.InputStream is)
throws java.io.IOException
is - java.io.IOExceptionpublic static float getFloat(byte[] b)
b - java.io.IOExceptionpublic static java.lang.String readString(java.io.DataInputStream dis)
throws java.io.IOException
dis - java.io.IOExceptionpublic static <T extends SequenceElement> void writeWordVectors(WeightLookupTable<T> lookupTable, java.lang.String path) throws java.io.IOException
T - lookupTable - path - java.io.IOExceptionpublic static <T extends SequenceElement> void writeWordVectors(WeightLookupTable<T> lookupTable, java.io.File file) throws java.io.IOException
T - lookupTable - file - java.io.IOExceptionpublic static <T extends SequenceElement> void writeWordVectors(WeightLookupTable<T> lookupTable, java.io.OutputStream stream) throws java.io.IOException
T - lookupTable - stream - java.io.IOException@Deprecated
public static void writeWordVectors(@NonNull
ParagraphVectors vectors,
@NonNull
java.io.File path)
vectors - path - @Deprecated
public static void writeWordVectors(@NonNull
ParagraphVectors vectors,
@NonNull
java.lang.String path)
vectors - path - public static void writeParagraphVectors(ParagraphVectors vectors, java.io.File file)
file - public static void writeParagraphVectors(ParagraphVectors vectors, java.lang.String path)
path - public static void writeWord2VecModel(Word2Vec vectors, java.io.File file)
public static void writeWord2VecModel(Word2Vec vectors, java.lang.String path)
public static void writeWord2VecModel(Word2Vec vectors, java.io.OutputStream stream) throws java.io.IOException
java.io.IOExceptionpublic static void writeParagraphVectors(ParagraphVectors vectors, java.io.OutputStream stream) throws java.io.IOException
java.io.IOExceptionpublic static ParagraphVectors readParagraphVectors(java.lang.String path) throws java.io.IOException
java.io.IOExceptionpublic static ParagraphVectors readParagraphVectors(java.io.File file) throws java.io.IOException
java.io.IOException@Deprecated public static Word2Vec readWord2Vec(java.io.File file) throws java.io.IOException
file - java.io.IOExceptionpublic static ParagraphVectors readParagraphVectors(java.io.InputStream stream) throws java.io.IOException
java.io.IOExceptionpublic static Word2Vec readWord2VecFromText(@NonNull java.io.File vectors, @NonNull java.io.File hs, @NonNull java.io.File h_codes, @NonNull java.io.File h_points, @NonNull VectorsConfiguration configuration) throws java.io.IOException
vectors - text file with words and their wieghts, aka Syn0hs - text file HS layers, aka Syn1h_codes - text file with Huffman tree codesh_points - text file with Huffman tree pointsjava.io.IOException@Deprecated public static ParagraphVectors readParagraphVectorsFromText(@NonNull java.lang.String path)
path - Path to file that contains previously serialized model@Deprecated public static ParagraphVectors readParagraphVectorsFromText(@NonNull java.io.File file)
file - File that contains previously serialized model@Deprecated public static ParagraphVectors readParagraphVectorsFromText(@NonNull java.io.InputStream stream)
stream - InputStream that contains previously serialized modelpublic static void writeWordVectors(@NonNull
Glove vectors,
@NonNull
java.io.File file)
vectors - GloVe model to be savedfile - path where model should be saved topublic static void writeWordVectors(@NonNull
Glove vectors,
@NonNull
java.lang.String path)
vectors - GloVe model to be savedpath - path where model should be saved topublic static void writeWordVectors(@NonNull
Glove vectors,
@NonNull
java.io.OutputStream stream)
vectors - GloVe model to be savedstream - OutputStream where model should be saved to@Deprecated public static void writeWordVectors(ParagraphVectors vectors, java.io.OutputStream stream)
vectors - stream - @Deprecated public static void writeWordVectors(InMemoryLookupTable lookupTable, InMemoryLookupCache cache, java.lang.String path) throws java.io.IOException
lookupTable - cache - path - the path to writejava.io.IOException@Deprecated
public static void writeFullModel(@NonNull
Word2Vec vec,
@NonNull
java.lang.String path)
vec - - The Word2Vec instance to be savedpath - - the path for json to be saved@Deprecated public static Word2Vec loadFullModel(@NonNull java.lang.String path) throws java.io.FileNotFoundException
path - - path to previously stored w2v json modeljava.io.FileNotFoundException@Deprecated
public static void writeWordVectors(@NonNull
Word2Vec vec,
@NonNull
java.lang.String path)
throws java.io.IOException
vec - the word2vec to writepath - the path to writejava.io.IOException@Deprecated
public static void writeWordVectors(@NonNull
Word2Vec vec,
@NonNull
java.io.File file)
throws java.io.IOException
vec - the word2vec to writefile - the file to writejava.io.IOException@Deprecated
public static void writeWordVectors(@NonNull
Word2Vec vec,
@NonNull
java.io.OutputStream outputStream)
throws java.io.IOException
vec - the word2vec to writeoutputStream - - OutputStream, where all data should be sent to
the path to writejava.io.IOException@Deprecated
public static void writeWordVectors(@NonNull
Word2Vec vec,
@NonNull
java.io.BufferedWriter writer)
throws java.io.IOException
vec - the word2vec to writewriter - - BufferedWriter, where all data should be written to
the path to writejava.io.IOExceptionpublic static WordVectors fromTableAndVocab(WeightLookupTable table, VocabCache vocab)
table - the weights to usevocab - the vocab to usepublic static Word2Vec fromPair(Pair<InMemoryLookupTable,VocabCache> pair)
pair - the given pair@Deprecated public static WordVectors loadTxtVectors(java.io.File vectorsFile) throws java.io.FileNotFoundException, java.io.UnsupportedEncodingException
vectorsFile - the path of the file to load\java.io.FileNotFoundException - if the file does not existjava.io.UnsupportedEncodingExceptionpublic static Pair<InMemoryLookupTable,VocabCache> loadTxt(java.io.File vectorsFile) throws java.io.FileNotFoundException, java.io.UnsupportedEncodingException
vectorsFile - the path of the file to loadjava.io.FileNotFoundException - if the input file does not existjava.io.UnsupportedEncodingException@Deprecated public static WordVectors loadTxtVectors(@NonNull java.io.InputStream stream, boolean skipFirstLine) throws java.io.IOException
stream - InputStream that contains previously serialized modelskipFirstLine - Set this TRUE if first line contains csv header, FALSE otherwisejava.io.IOExceptionpublic static void writeTsneFormat(Glove vec, org.nd4j.linalg.api.ndarray.INDArray tsne, java.io.File csv) throws java.lang.Exception
vec - the word vectors to use for labelingtsne - the tsne array to writecsv - the file to usejava.lang.Exceptionpublic static void writeTsneFormat(Word2Vec vec, org.nd4j.linalg.api.ndarray.INDArray tsne, java.io.File csv) throws java.lang.Exception
vec - the word vectors to use for labelingtsne - the tsne array to writecsv - the file to usejava.lang.Exceptionpublic static <T extends SequenceElement> void writeSequenceVectors(@NonNull SequenceVectors<T> vectors, @NonNull SequenceElementFactory<T> factory, @NonNull java.lang.String path) throws java.io.IOException
T - vectors - SequenceVectors modelfactory - SequenceElementFactory implementation for your objectspath - Target output file pathjava.io.IOExceptionpublic static <T extends SequenceElement> void writeSequenceVectors(@NonNull SequenceVectors<T> vectors, @NonNull SequenceElementFactory<T> factory, @NonNull java.io.File file) throws java.io.IOException
T - vectors - SequenceVectors modelfactory - SequenceElementFactory implementation for your objectsfile - Target output filejava.io.IOExceptionpublic static <T extends SequenceElement> void writeSequenceVectors(@NonNull SequenceVectors<T> vectors, @NonNull SequenceElementFactory<T> factory, @NonNull java.io.OutputStream stream) throws java.io.IOException
T - vectors - SequenceVectors modelfactory - SequenceElementFactory implementation for your objectsstream - Target output streamjava.io.IOExceptionpublic static <T extends SequenceElement> SequenceVectors<T> readSequenceVectors(@NonNull SequenceElementFactory<T> factory, @NonNull java.io.File file) throws java.io.IOException
T - factory - file - java.io.IOExceptionpublic static <T extends SequenceElement> SequenceVectors<T> readSequenceVectors(@NonNull SequenceElementFactory<T> factory, @NonNull java.io.InputStream stream) throws java.io.IOException
T - factory - stream - java.io.IOExceptionpublic static void writeVocabCache(@NonNull
VocabCache<VocabWord> vocabCache,
@NonNull
java.io.File file)
throws java.io.IOException
vocabCache - file - java.io.UnsupportedEncodingExceptionjava.io.IOExceptionpublic static void writeVocabCache(@NonNull
VocabCache<VocabWord> vocabCache,
@NonNull
java.io.OutputStream stream)
throws java.io.IOException
vocabCache - stream - java.io.UnsupportedEncodingExceptionjava.io.IOExceptionpublic static VocabCache<VocabWord> readVocabCache(@NonNull java.io.File file) throws java.io.IOException
file - java.io.IOExceptionpublic static VocabCache<VocabWord> readVocabCache(@NonNull java.io.InputStream stream) throws java.io.IOException
stream - java.io.IOExceptionpublic static Word2Vec readWord2VecModel(@NonNull java.io.File file)
file - public static Word2Vec readWord2VecModel(java.lang.String path)
path - public static Word2Vec readWord2VecModel(java.lang.String path, boolean extendedModel)
path - extendedModel - if TRUE, we'll try to load HS states & Huffman tree info, if FALSE, only weights will be loadedpublic static Word2Vec readWord2VecModel(@NonNull java.io.File file, boolean extendedModel)
file - extendedModel - if TRUE, we'll try to load HS states & Huffman tree info, if FALSE, only weights will be loadedprotected static TokenizerFactory getTokenizerFactory(VectorsConfiguration configuration)
public static WordVectors loadStaticModel(java.io.File file)
file - File should point to previously saved w2v modelpublic static java.lang.String encodeB64(java.lang.String word)
public static java.lang.String decodeB64(java.lang.String word)
public static void printOutProjectedMemoryUse(long numWords,
int vectorLength,
int numTables)