public class ParagraphVectors extends Word2Vec
Modifier and Type | Class and Description |
---|---|
class |
ParagraphVectors.BlindInferenceCallable |
static class |
ParagraphVectors.Builder |
class |
ParagraphVectors.InferenceCallable |
SequenceVectors.AsyncSequencer
Modifier and Type | Field and Description |
---|---|
protected java.util.concurrent.atomic.AtomicLong |
countFinished |
protected java.util.concurrent.atomic.AtomicLong |
countSubmitted |
protected java.util.concurrent.ExecutorService |
inferenceExecutor |
protected java.lang.Object |
inferenceLocker |
protected LabelAwareIterator |
labelAwareIterator |
protected java.util.List<VocabWord> |
labelsList |
protected org.nd4j.linalg.api.ndarray.INDArray |
labelsMatrix |
protected LabelsSource |
labelsSource |
protected boolean |
normalizedLabels |
sentenceIter, tokenizerFactory
configuration, configured, elementsLearningAlgorithm, enableScavenger, eventListeners, existingModel, iterator, log, scoreElements, scoreSequences, sequenceLearningAlgorithm, unknownElement
batchSize, DEFAULT_UNK, layerSize, learningRate, learningRateDecayWords, lookupTable, minLearningRate, minWordFrequency, modelUtils, negative, numEpochs, numIterations, resetModel, sampling, seed, stopWords, trainElementsVectors, trainSequenceVectors, useAdeGrad, useUnknown, variableWindows, vocab, window, workers
Modifier | Constructor and Description |
---|---|
protected |
ParagraphVectors() |
Modifier and Type | Method and Description |
---|---|
void |
extractLabels() |
void |
fit()
Starts training over
|
org.nd4j.linalg.api.ndarray.INDArray |
inferVector(LabelledDocument document)
This method calculates inferred vector for given document, with default parameters for learning rate and iterations
|
org.nd4j.linalg.api.ndarray.INDArray |
inferVector(LabelledDocument document,
double learningRate,
double minLearningRate,
int iterations)
This method calculates inferred vector for given document
|
org.nd4j.linalg.api.ndarray.INDArray |
inferVector(java.util.List<VocabWord> document)
This method calculates inferred vector for given list of words, with default parameters for learning rate and iterations
|
org.nd4j.linalg.api.ndarray.INDArray |
inferVector(java.util.List<VocabWord> document,
double learningRate,
double minLearningRate,
int iterations)
This method calculates inferred vector for given document
|
org.nd4j.linalg.api.ndarray.INDArray |
inferVector(java.lang.String text)
This method calculates inferred vector for given text, with default parameters for learning rate and iterations
|
org.nd4j.linalg.api.ndarray.INDArray |
inferVector(java.lang.String text,
double learningRate,
double minLearningRate,
int iterations)
This method calculates inferred vector for given text
|
java.util.concurrent.Future<Pair<java.lang.String,org.nd4j.linalg.api.ndarray.INDArray>> |
inferVectorBatched(LabelledDocument document)
This method implements batched inference, based on Java Future parallelism model.
|
java.util.List<org.nd4j.linalg.api.ndarray.INDArray> |
inferVectorBatched(java.util.List<java.lang.String> documents)
This method does inference on a given List<String>
|
java.util.concurrent.Future<org.nd4j.linalg.api.ndarray.INDArray> |
inferVectorBatched(java.lang.String document)
This method implements batched inference, based on Java Future parallelism model.
|
protected void |
initInference() |
java.util.Collection<java.lang.String> |
nearestLabels(java.util.Collection<VocabWord> document,
int topN)
This method returns top N labels nearest to specified set of vocab words
|
java.util.Collection<java.lang.String> |
nearestLabels(org.nd4j.linalg.api.ndarray.INDArray labelVector,
int topN)
This method returns top N labels nearest to specified features vector
|
java.util.Collection<java.lang.String> |
nearestLabels(LabelledDocument document,
int topN)
This method returns top N labels nearest to specified document
|
java.util.Collection<java.lang.String> |
nearestLabels(java.lang.String rawText,
int topN)
This method returns top N labels nearest to specified text
|
java.lang.String |
predict(LabelledDocument document)
Deprecated.
|
java.lang.String |
predict(java.util.List<VocabWord> document)
Deprecated.
|
java.lang.String |
predict(java.lang.String rawText)
Deprecated.
|
java.util.Collection<java.lang.String> |
predictSeveral(LabelledDocument document,
int limit)
Deprecated.
|
java.util.Collection<java.lang.String> |
predictSeveral(java.util.List<VocabWord> document,
int limit)
Deprecated.
|
java.util.Collection<java.lang.String> |
predictSeveral(java.lang.String rawText,
int limit)
Deprecated.
|
protected void |
reassignExistingModel() |
void |
setSequenceIterator(SequenceIterator<VocabWord> iterator)
This method defines SequenceIterator instance, that will be used as training corpus source.
|
double |
similarityToLabel(LabelledDocument document,
java.lang.String label)
Deprecated.
|
double |
similarityToLabel(java.util.List<VocabWord> document,
java.lang.String label)
Deprecated.
|
double |
similarityToLabel(java.lang.String rawText,
java.lang.String label)
Deprecated.
|
setSentenceIterator, setTokenizerFactory
buildVocab, getElementsScore, getSequencesScore, getUNK, getWordVectorMatrix, initLearners, trainSequence
accuracy, getLayerSize, getWordVector, getWordVectorMatrixNormalized, getWordVectors, getWordVectorsMean, hasWord, indexOf, lookupTable, setLookupTable, setModelUtils, setVocab, similarity, similarWordsInVocabTo, update, update, vocab, wordsNearest, wordsNearest, wordsNearest, wordsNearestSum, wordsNearestSum, wordsNearestSum
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
accuracy, getWordVector, getWordVectorMatrixNormalized, getWordVectors, getWordVectorsMean, hasWord, indexOf, lookupTable, setModelUtils, setUNK, similarity, similarWordsInVocabTo, vocab, wordsNearest, wordsNearest, wordsNearest, wordsNearestSum, wordsNearestSum, wordsNearestSum
protected LabelsSource labelsSource
protected transient LabelAwareIterator labelAwareIterator
protected org.nd4j.linalg.api.ndarray.INDArray labelsMatrix
protected java.util.List<VocabWord> labelsList
protected boolean normalizedLabels
protected final transient java.lang.Object inferenceLocker
protected transient java.util.concurrent.ExecutorService inferenceExecutor
protected transient java.util.concurrent.atomic.AtomicLong countSubmitted
protected transient java.util.concurrent.atomic.AtomicLong countFinished
protected void initInference()
@Deprecated public java.lang.String predict(java.lang.String rawText)
rawText
- public void setSequenceIterator(@NonNull SequenceIterator<VocabWord> iterator)
setSequenceIterator
in class Word2Vec
iterator
- @Deprecated public java.lang.String predict(LabelledDocument document)
document
- the documentpublic void extractLabels()
public org.nd4j.linalg.api.ndarray.INDArray inferVector(java.lang.String text, double learningRate, double minLearningRate, int iterations)
text
- protected void reassignExistingModel()
public org.nd4j.linalg.api.ndarray.INDArray inferVector(LabelledDocument document, double learningRate, double minLearningRate, int iterations)
document
- public org.nd4j.linalg.api.ndarray.INDArray inferVector(@NonNull java.util.List<VocabWord> document, double learningRate, double minLearningRate, int iterations)
document
- public org.nd4j.linalg.api.ndarray.INDArray inferVector(java.lang.String text)
text
- public org.nd4j.linalg.api.ndarray.INDArray inferVector(LabelledDocument document)
document
- public org.nd4j.linalg.api.ndarray.INDArray inferVector(@NonNull java.util.List<VocabWord> document)
document
- public java.util.concurrent.Future<Pair<java.lang.String,org.nd4j.linalg.api.ndarray.INDArray>> inferVectorBatched(@NonNull LabelledDocument document)
document
- public java.util.concurrent.Future<org.nd4j.linalg.api.ndarray.INDArray> inferVectorBatched(@NonNull java.lang.String document)
document
- public java.util.List<org.nd4j.linalg.api.ndarray.INDArray> inferVectorBatched(@NonNull java.util.List<java.lang.String> documents)
documents
- @Deprecated public java.lang.String predict(java.util.List<VocabWord> document)
document
- the document@Deprecated public java.util.Collection<java.lang.String> predictSeveral(@NonNull LabelledDocument document, int limit)
document
- raw text of the document@Deprecated public java.util.Collection<java.lang.String> predictSeveral(java.lang.String rawText, int limit)
rawText
- raw text of the document@Deprecated public java.util.Collection<java.lang.String> predictSeveral(java.util.List<VocabWord> document, int limit)
document
- the documentpublic java.util.Collection<java.lang.String> nearestLabels(LabelledDocument document, int topN)
document
- topN
- public java.util.Collection<java.lang.String> nearestLabels(@NonNull java.lang.String rawText, int topN)
rawText
- topN
- public java.util.Collection<java.lang.String> nearestLabels(@NonNull java.util.Collection<VocabWord> document, int topN)
document
- topN
- public java.util.Collection<java.lang.String> nearestLabels(org.nd4j.linalg.api.ndarray.INDArray labelVector, int topN)
labelVector
- topN
- @Deprecated public double similarityToLabel(java.lang.String rawText, java.lang.String label)
rawText
- label
- public void fit()
SequenceVectors
fit
in class SequenceVectors<VocabWord>
@Deprecated public double similarityToLabel(LabelledDocument document, java.lang.String label)
document
- label
- @Deprecated public double similarityToLabel(java.util.List<VocabWord> document, java.lang.String label)
document
- label
-