public class WordVectorsImpl<T extends SequenceElement> extends java.lang.Object implements WordVectors
| Modifier and Type | Field and Description |
|---|---|
protected int |
batchSize |
static java.lang.String |
DEFAULT_UNK |
protected int |
layerSize |
protected com.google.common.util.concurrent.AtomicDouble |
learningRate |
protected int |
learningRateDecayWords |
protected WeightLookupTable<T> |
lookupTable |
protected double |
minLearningRate |
protected int |
minWordFrequency |
protected ModelUtils<T> |
modelUtils |
protected double |
negative |
protected int |
numEpochs |
protected int |
numIterations |
protected boolean |
resetModel |
protected double |
sampling |
protected long |
seed |
protected java.util.Collection<java.lang.String> |
stopWords |
protected boolean |
trainElementsVectors |
protected boolean |
trainSequenceVectors |
protected boolean |
useAdeGrad |
protected boolean |
useUnknown |
protected int[] |
variableWindows |
protected VocabCache<T> |
vocab |
protected int |
window |
protected int |
workers |
| Constructor and Description |
|---|
WordVectorsImpl() |
| Modifier and Type | Method and Description |
|---|---|
java.util.Map<java.lang.String,java.lang.Double> |
accuracy(java.util.List<java.lang.String> questions)
Accuracy based on questions which are a space separated list of strings
where the first word is the query word, the next 2 words are negative,
and the last word is the predicted word to be nearest
|
int |
getLayerSize()
This method returns word vector size
|
double[] |
getWordVector(java.lang.String word)
Get the word vector for a given matrix
|
org.nd4j.linalg.api.ndarray.INDArray |
getWordVectorMatrix(java.lang.String word)
Get the word vector for a given matrix
|
org.nd4j.linalg.api.ndarray.INDArray |
getWordVectorMatrixNormalized(java.lang.String word)
Returns the word vector divided by the norm2 of the array
|
org.nd4j.linalg.api.ndarray.INDArray |
getWordVectors(java.util.Collection<java.lang.String> labels)
This method returns 2D array, where each row represents corresponding label
|
org.nd4j.linalg.api.ndarray.INDArray |
getWordVectorsMean(java.util.Collection<java.lang.String> labels)
This method returns mean vector, built from words/labels passed in
|
boolean |
hasWord(java.lang.String word)
Returns true if the model has this word in the vocab
|
int |
indexOf(java.lang.String word) |
WeightLookupTable |
lookupTable()
Lookup table for the vectors
|
void |
setLookupTable(WeightLookupTable lookupTable) |
void |
setModelUtils(ModelUtils modelUtils)
Specifies ModelUtils to be used to access model
|
void |
setVocab(VocabCache vocab) |
double |
similarity(java.lang.String word,
java.lang.String word2)
Returns similarity of two elements, provided by ModelUtils
|
java.util.List<java.lang.String> |
similarWordsInVocabTo(java.lang.String word,
double accuracy)
Find all words with a similar characters
in the vocab
|
protected void |
update() |
protected void |
update(org.nd4j.linalg.heartbeat.reports.Environment env,
org.nd4j.linalg.heartbeat.reports.Event event) |
VocabCache<T> |
vocab()
Vocab for the vectors
|
java.util.Collection<java.lang.String> |
wordsNearest(java.util.Collection<java.lang.String> positive,
java.util.Collection<java.lang.String> negative,
int top)
Words nearest based on positive and negative words
|
java.util.Collection<java.lang.String> |
wordsNearest(org.nd4j.linalg.api.ndarray.INDArray words,
int top)
Words nearest based on positive and negative words
* @param top the top n words
|
java.util.Collection<java.lang.String> |
wordsNearest(java.lang.String word,
int n)
Get the top n words most similar to the given word
|
java.util.Collection<java.lang.String> |
wordsNearestSum(java.util.Collection<java.lang.String> positive,
java.util.Collection<java.lang.String> negative,
int top)
Words nearest based on positive and negative words
|
java.util.Collection<java.lang.String> |
wordsNearestSum(org.nd4j.linalg.api.ndarray.INDArray words,
int top)
Words nearest based on positive and negative words
* @param top the top n words
|
java.util.Collection<java.lang.String> |
wordsNearestSum(java.lang.String word,
int n)
Get the top n words most similar to the given word
|
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, waitgetUNK, setUNKprotected int minWordFrequency
protected WeightLookupTable<T extends SequenceElement> lookupTable
protected VocabCache<T extends SequenceElement> vocab
protected int layerSize
protected transient ModelUtils<T extends SequenceElement> modelUtils
protected int numIterations
protected int numEpochs
protected double negative
protected double sampling
protected com.google.common.util.concurrent.AtomicDouble learningRate
protected double minLearningRate
protected int window
protected int batchSize
protected int learningRateDecayWords
protected boolean resetModel
protected boolean useAdeGrad
protected int workers
protected boolean trainSequenceVectors
protected boolean trainElementsVectors
protected long seed
protected boolean useUnknown
protected int[] variableWindows
public static final java.lang.String DEFAULT_UNK
protected java.util.Collection<java.lang.String> stopWords
public int getLayerSize()
public boolean hasWord(java.lang.String word)
hasWord in interface WordVectorsword - the word to test forpublic java.util.Collection<java.lang.String> wordsNearestSum(java.util.Collection<java.lang.String> positive,
java.util.Collection<java.lang.String> negative,
int top)
wordsNearestSum in interface WordVectorspositive - the positive wordsnegative - the negative wordstop - the top n wordspublic java.util.Collection<java.lang.String> wordsNearestSum(org.nd4j.linalg.api.ndarray.INDArray words,
int top)
wordsNearestSum in interface WordVectorspublic java.util.Collection<java.lang.String> wordsNearest(org.nd4j.linalg.api.ndarray.INDArray words,
int top)
wordsNearest in interface WordVectorspublic java.util.Collection<java.lang.String> wordsNearestSum(java.lang.String word,
int n)
wordsNearestSum in interface WordVectorsword - the word to comparen - the n to getpublic java.util.Map<java.lang.String,java.lang.Double> accuracy(java.util.List<java.lang.String> questions)
accuracy in interface WordVectorsquestions - the questions to askpublic int indexOf(java.lang.String word)
indexOf in interface WordVectorspublic java.util.List<java.lang.String> similarWordsInVocabTo(java.lang.String word,
double accuracy)
similarWordsInVocabTo in interface WordVectorsword - the word to compareaccuracy - the accuracy: 0 to 1public double[] getWordVector(java.lang.String word)
getWordVector in interface WordVectorsword - the word to get the matrix forpublic org.nd4j.linalg.api.ndarray.INDArray getWordVectorMatrixNormalized(java.lang.String word)
getWordVectorMatrixNormalized in interface WordVectorsword - the word to get the matrix forpublic org.nd4j.linalg.api.ndarray.INDArray getWordVectorMatrix(java.lang.String word)
WordVectorsgetWordVectorMatrix in interface WordVectorsword - the word to get the matrix forpublic java.util.Collection<java.lang.String> wordsNearest(java.util.Collection<java.lang.String> positive,
java.util.Collection<java.lang.String> negative,
int top)
wordsNearest in interface WordVectorspositive - the positive wordsnegative - the negative wordstop - the top n wordspublic org.nd4j.linalg.api.ndarray.INDArray getWordVectors(@NonNull
java.util.Collection<java.lang.String> labels)
getWordVectors in interface WordVectorslabels - public org.nd4j.linalg.api.ndarray.INDArray getWordVectorsMean(java.util.Collection<java.lang.String> labels)
getWordVectorsMean in interface WordVectorslabels - public java.util.Collection<java.lang.String> wordsNearest(java.lang.String word,
int n)
wordsNearest in interface WordVectorsword - the word to comparen - the n to getpublic double similarity(java.lang.String word,
java.lang.String word2)
similarity in interface WordVectorsword - the first wordword2 - the second wordpublic VocabCache<T> vocab()
WordVectorsvocab in interface WordVectorspublic WeightLookupTable lookupTable()
WordVectorslookupTable in interface WordVectorspublic void setModelUtils(@NonNull
ModelUtils modelUtils)
WordVectorssetModelUtils in interface WordVectorspublic void setLookupTable(@NonNull
WeightLookupTable lookupTable)
public void setVocab(VocabCache vocab)
protected void update()
protected void update(org.nd4j.linalg.heartbeat.reports.Environment env,
org.nd4j.linalg.heartbeat.reports.Event event)