public class Word2Vec extends WordVectorsImpl<VocabWord> implements java.io.Serializable
Modifier and Type | Class and Description |
---|---|
static class |
Word2Vec.Builder |
Modifier and Type | Field and Description |
---|---|
protected VectorsConfiguration |
configuration |
batchSize, DEFAULT_UNK, layerSize, learningRate, learningRateDecayWords, lookupTable, minLearningRate, minWordFrequency, modelUtils, negative, numEpochs, numIterations, resetModel, sampling, stopWords, trainElementsVectors, trainSequenceVectors, useAdeGrad, variableWindows, vocab, window, workers
Modifier | Constructor and Description |
---|---|
protected |
Word2Vec() |
protected |
Word2Vec(org.nd4j.linalg.api.ndarray.INDArray trainedSyn1) |
Modifier and Type | Method and Description |
---|---|
java.util.Map<java.lang.String,java.lang.Object> |
getTokenizerVarMap() |
java.util.Map<java.lang.String,java.lang.Object> |
getWord2vecVarMap() |
protected double[] |
initExpTable() |
void |
train(org.apache.spark.api.java.JavaRDD<java.lang.String> corpusRDD)
Training word2vec model on a given text corpus
|
accuracy, getLayerSize, getWordVector, getWordVectorMatrix, getWordVectorMatrixNormalized, getWordVectors, getWordVectorsMean, hasWord, indexOf, lookupTable, setLookupTable, setModelUtils, setVocab, similarity, similarWordsInVocabTo, update, update, vocab, wordsNearest, wordsNearest, wordsNearest, wordsNearestSum, wordsNearestSum, wordsNearestSum
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
getUNK, setUNK
protected VectorsConfiguration configuration
protected Word2Vec(org.nd4j.linalg.api.ndarray.INDArray trainedSyn1)
protected Word2Vec()
protected double[] initExpTable()
public java.util.Map<java.lang.String,java.lang.Object> getTokenizerVarMap()
public java.util.Map<java.lang.String,java.lang.Object> getWord2vecVarMap()
public void train(org.apache.spark.api.java.JavaRDD<java.lang.String> corpusRDD) throws java.lang.Exception
corpusRDD
- training corpusjava.lang.Exception