public class ParagraphVectors extends Word2Vec
| Modifier and Type | Class and Description |
|---|---|
class |
ParagraphVectors.BlindInferenceCallable |
static class |
ParagraphVectors.Builder |
class |
ParagraphVectors.InferenceCallable |
SequenceVectors.AsyncSequencer| Modifier and Type | Field and Description |
|---|---|
protected java.util.concurrent.atomic.AtomicLong |
countFinished |
protected java.util.concurrent.atomic.AtomicLong |
countSubmitted |
protected java.util.concurrent.ExecutorService |
inferenceExecutor |
protected java.lang.Object |
inferenceLocker |
protected LabelAwareIterator |
labelAwareIterator |
protected java.util.List<VocabWord> |
labelsList |
protected org.nd4j.linalg.api.ndarray.INDArray |
labelsMatrix |
protected LabelsSource |
labelsSource |
protected boolean |
normalizedLabels |
sentenceIter, tokenizerFactoryconfiguration, configured, elementsLearningAlgorithm, enableScavenger, eventListeners, existingModel, iterator, log, scoreElements, scoreSequences, sequenceLearningAlgorithm, unknownElementbatchSize, DEFAULT_UNK, layerSize, learningRate, learningRateDecayWords, lookupTable, minLearningRate, minWordFrequency, modelUtils, negative, numEpochs, numIterations, resetModel, sampling, seed, stopWords, trainElementsVectors, trainSequenceVectors, useAdeGrad, useUnknown, variableWindows, vocab, window, workers| Modifier | Constructor and Description |
|---|---|
protected |
ParagraphVectors() |
| Modifier and Type | Method and Description |
|---|---|
void |
extractLabels() |
void |
fit()
Starts training over
|
org.nd4j.linalg.api.ndarray.INDArray |
inferVector(LabelledDocument document)
This method calculates inferred vector for given document, with default parameters for learning rate and iterations
|
org.nd4j.linalg.api.ndarray.INDArray |
inferVector(LabelledDocument document,
double learningRate,
double minLearningRate,
int iterations)
This method calculates inferred vector for given document
|
org.nd4j.linalg.api.ndarray.INDArray |
inferVector(java.util.List<VocabWord> document)
This method calculates inferred vector for given list of words, with default parameters for learning rate and iterations
|
org.nd4j.linalg.api.ndarray.INDArray |
inferVector(java.util.List<VocabWord> document,
double learningRate,
double minLearningRate,
int iterations)
This method calculates inferred vector for given document
|
org.nd4j.linalg.api.ndarray.INDArray |
inferVector(java.lang.String text)
This method calculates inferred vector for given text, with default parameters for learning rate and iterations
|
org.nd4j.linalg.api.ndarray.INDArray |
inferVector(java.lang.String text,
double learningRate,
double minLearningRate,
int iterations)
This method calculates inferred vector for given text
|
java.util.concurrent.Future<Pair<java.lang.String,org.nd4j.linalg.api.ndarray.INDArray>> |
inferVectorBatched(LabelledDocument document)
This method implements batched inference, based on Java Future parallelism model.
|
java.util.List<org.nd4j.linalg.api.ndarray.INDArray> |
inferVectorBatched(java.util.List<java.lang.String> documents)
This method does inference on a given List<String>
|
java.util.concurrent.Future<org.nd4j.linalg.api.ndarray.INDArray> |
inferVectorBatched(java.lang.String document)
This method implements batched inference, based on Java Future parallelism model.
|
protected void |
initInference() |
java.util.Collection<java.lang.String> |
nearestLabels(java.util.Collection<VocabWord> document,
int topN)
This method returns top N labels nearest to specified set of vocab words
|
java.util.Collection<java.lang.String> |
nearestLabels(org.nd4j.linalg.api.ndarray.INDArray labelVector,
int topN)
This method returns top N labels nearest to specified features vector
|
java.util.Collection<java.lang.String> |
nearestLabels(LabelledDocument document,
int topN)
This method returns top N labels nearest to specified document
|
java.util.Collection<java.lang.String> |
nearestLabels(java.lang.String rawText,
int topN)
This method returns top N labels nearest to specified text
|
java.lang.String |
predict(LabelledDocument document)
Deprecated.
|
java.lang.String |
predict(java.util.List<VocabWord> document)
Deprecated.
|
java.lang.String |
predict(java.lang.String rawText)
Deprecated.
|
java.util.Collection<java.lang.String> |
predictSeveral(LabelledDocument document,
int limit)
Deprecated.
|
java.util.Collection<java.lang.String> |
predictSeveral(java.util.List<VocabWord> document,
int limit)
Deprecated.
|
java.util.Collection<java.lang.String> |
predictSeveral(java.lang.String rawText,
int limit)
Deprecated.
|
protected void |
reassignExistingModel() |
void |
setSequenceIterator(SequenceIterator<VocabWord> iterator)
This method defines SequenceIterator instance, that will be used as training corpus source.
|
double |
similarityToLabel(LabelledDocument document,
java.lang.String label)
Deprecated.
|
double |
similarityToLabel(java.util.List<VocabWord> document,
java.lang.String label)
Deprecated.
|
double |
similarityToLabel(java.lang.String rawText,
java.lang.String label)
Deprecated.
|
setSentenceIterator, setTokenizerFactorybuildVocab, getElementsScore, getSequencesScore, getUNK, getWordVectorMatrix, initLearners, trainSequenceaccuracy, getLayerSize, getWordVector, getWordVectorMatrixNormalized, getWordVectors, getWordVectorsMean, hasWord, indexOf, lookupTable, setLookupTable, setModelUtils, setVocab, similarity, similarWordsInVocabTo, update, update, vocab, wordsNearest, wordsNearest, wordsNearest, wordsNearestSum, wordsNearestSum, wordsNearestSumclone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, waitaccuracy, getWordVector, getWordVectorMatrixNormalized, getWordVectors, getWordVectorsMean, hasWord, indexOf, lookupTable, setModelUtils, setUNK, similarity, similarWordsInVocabTo, vocab, wordsNearest, wordsNearest, wordsNearest, wordsNearestSum, wordsNearestSum, wordsNearestSumprotected LabelsSource labelsSource
protected transient LabelAwareIterator labelAwareIterator
protected org.nd4j.linalg.api.ndarray.INDArray labelsMatrix
protected java.util.List<VocabWord> labelsList
protected boolean normalizedLabels
protected final transient java.lang.Object inferenceLocker
protected transient java.util.concurrent.ExecutorService inferenceExecutor
protected transient java.util.concurrent.atomic.AtomicLong countSubmitted
protected transient java.util.concurrent.atomic.AtomicLong countFinished
protected void initInference()
@Deprecated public java.lang.String predict(java.lang.String rawText)
rawText - public void setSequenceIterator(@NonNull
SequenceIterator<VocabWord> iterator)
setSequenceIterator in class Word2Veciterator - @Deprecated public java.lang.String predict(LabelledDocument document)
document - the documentpublic void extractLabels()
public org.nd4j.linalg.api.ndarray.INDArray inferVector(java.lang.String text,
double learningRate,
double minLearningRate,
int iterations)
text - protected void reassignExistingModel()
public org.nd4j.linalg.api.ndarray.INDArray inferVector(LabelledDocument document, double learningRate, double minLearningRate, int iterations)
document - public org.nd4j.linalg.api.ndarray.INDArray inferVector(@NonNull
java.util.List<VocabWord> document,
double learningRate,
double minLearningRate,
int iterations)
document - public org.nd4j.linalg.api.ndarray.INDArray inferVector(java.lang.String text)
text - public org.nd4j.linalg.api.ndarray.INDArray inferVector(LabelledDocument document)
document - public org.nd4j.linalg.api.ndarray.INDArray inferVector(@NonNull
java.util.List<VocabWord> document)
document - public java.util.concurrent.Future<Pair<java.lang.String,org.nd4j.linalg.api.ndarray.INDArray>> inferVectorBatched(@NonNull LabelledDocument document)
document - public java.util.concurrent.Future<org.nd4j.linalg.api.ndarray.INDArray> inferVectorBatched(@NonNull
java.lang.String document)
document - public java.util.List<org.nd4j.linalg.api.ndarray.INDArray> inferVectorBatched(@NonNull
java.util.List<java.lang.String> documents)
documents - @Deprecated public java.lang.String predict(java.util.List<VocabWord> document)
document - the document@Deprecated
public java.util.Collection<java.lang.String> predictSeveral(@NonNull
LabelledDocument document,
int limit)
document - raw text of the document@Deprecated
public java.util.Collection<java.lang.String> predictSeveral(java.lang.String rawText,
int limit)
rawText - raw text of the document@Deprecated public java.util.Collection<java.lang.String> predictSeveral(java.util.List<VocabWord> document, int limit)
document - the documentpublic java.util.Collection<java.lang.String> nearestLabels(LabelledDocument document, int topN)
document - topN - public java.util.Collection<java.lang.String> nearestLabels(@NonNull
java.lang.String rawText,
int topN)
rawText - topN - public java.util.Collection<java.lang.String> nearestLabels(@NonNull
java.util.Collection<VocabWord> document,
int topN)
document - topN - public java.util.Collection<java.lang.String> nearestLabels(org.nd4j.linalg.api.ndarray.INDArray labelVector,
int topN)
labelVector - topN - @Deprecated
public double similarityToLabel(java.lang.String rawText,
java.lang.String label)
rawText - label - public void fit()
SequenceVectorsfit in class SequenceVectors<VocabWord>@Deprecated public double similarityToLabel(LabelledDocument document, java.lang.String label)
document - label - @Deprecated public double similarityToLabel(java.util.List<VocabWord> document, java.lang.String label)
document - label -