public class WordVectorsImpl<T extends SequenceElement> extends java.lang.Object implements WordVectors
Modifier and Type | Field and Description |
---|---|
protected int |
batchSize |
static java.lang.String |
DEFAULT_UNK |
protected int |
layerSize |
protected com.google.common.util.concurrent.AtomicDouble |
learningRate |
protected int |
learningRateDecayWords |
protected WeightLookupTable<T> |
lookupTable |
protected double |
minLearningRate |
protected int |
minWordFrequency |
protected ModelUtils<T> |
modelUtils |
protected double |
negative |
protected int |
numEpochs |
protected int |
numIterations |
protected boolean |
resetModel |
protected double |
sampling |
protected long |
seed |
protected java.util.Collection<java.lang.String> |
stopWords |
protected boolean |
trainElementsVectors |
protected boolean |
trainSequenceVectors |
protected boolean |
useAdeGrad |
protected boolean |
useUnknown |
protected int[] |
variableWindows |
protected VocabCache<T> |
vocab |
protected int |
window |
protected int |
workers |
Constructor and Description |
---|
WordVectorsImpl() |
Modifier and Type | Method and Description |
---|---|
java.util.Map<java.lang.String,java.lang.Double> |
accuracy(java.util.List<java.lang.String> questions)
Accuracy based on questions which are a space separated list of strings
where the first word is the query word, the next 2 words are negative,
and the last word is the predicted word to be nearest
|
int |
getLayerSize()
This method returns word vector size
|
double[] |
getWordVector(java.lang.String word)
Get the word vector for a given matrix
|
org.nd4j.linalg.api.ndarray.INDArray |
getWordVectorMatrix(java.lang.String word)
Get the word vector for a given matrix
|
org.nd4j.linalg.api.ndarray.INDArray |
getWordVectorMatrixNormalized(java.lang.String word)
Returns the word vector divided by the norm2 of the array
|
org.nd4j.linalg.api.ndarray.INDArray |
getWordVectors(java.util.Collection<java.lang.String> labels)
This method returns 2D array, where each row represents corresponding label
|
org.nd4j.linalg.api.ndarray.INDArray |
getWordVectorsMean(java.util.Collection<java.lang.String> labels)
This method returns mean vector, built from words/labels passed in
|
boolean |
hasWord(java.lang.String word)
Returns true if the model has this word in the vocab
|
int |
indexOf(java.lang.String word) |
WeightLookupTable |
lookupTable()
Lookup table for the vectors
|
void |
setLookupTable(WeightLookupTable lookupTable) |
void |
setModelUtils(ModelUtils modelUtils)
Specifies ModelUtils to be used to access model
|
void |
setVocab(VocabCache vocab) |
double |
similarity(java.lang.String word,
java.lang.String word2)
Returns similarity of two elements, provided by ModelUtils
|
java.util.List<java.lang.String> |
similarWordsInVocabTo(java.lang.String word,
double accuracy)
Find all words with a similar characters
in the vocab
|
protected void |
update() |
protected void |
update(org.nd4j.linalg.heartbeat.reports.Environment env,
org.nd4j.linalg.heartbeat.reports.Event event) |
VocabCache<T> |
vocab()
Vocab for the vectors
|
java.util.Collection<java.lang.String> |
wordsNearest(java.util.Collection<java.lang.String> positive,
java.util.Collection<java.lang.String> negative,
int top)
Words nearest based on positive and negative words
|
java.util.Collection<java.lang.String> |
wordsNearest(org.nd4j.linalg.api.ndarray.INDArray words,
int top)
Words nearest based on positive and negative words
* @param top the top n words
|
java.util.Collection<java.lang.String> |
wordsNearest(java.lang.String word,
int n)
Get the top n words most similar to the given word
|
java.util.Collection<java.lang.String> |
wordsNearestSum(java.util.Collection<java.lang.String> positive,
java.util.Collection<java.lang.String> negative,
int top)
Words nearest based on positive and negative words
|
java.util.Collection<java.lang.String> |
wordsNearestSum(org.nd4j.linalg.api.ndarray.INDArray words,
int top)
Words nearest based on positive and negative words
* @param top the top n words
|
java.util.Collection<java.lang.String> |
wordsNearestSum(java.lang.String word,
int n)
Get the top n words most similar to the given word
|
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
getUNK, setUNK
protected int minWordFrequency
protected WeightLookupTable<T extends SequenceElement> lookupTable
protected VocabCache<T extends SequenceElement> vocab
protected int layerSize
protected transient ModelUtils<T extends SequenceElement> modelUtils
protected int numIterations
protected int numEpochs
protected double negative
protected double sampling
protected com.google.common.util.concurrent.AtomicDouble learningRate
protected double minLearningRate
protected int window
protected int batchSize
protected int learningRateDecayWords
protected boolean resetModel
protected boolean useAdeGrad
protected int workers
protected boolean trainSequenceVectors
protected boolean trainElementsVectors
protected long seed
protected boolean useUnknown
protected int[] variableWindows
public static final java.lang.String DEFAULT_UNK
protected java.util.Collection<java.lang.String> stopWords
public int getLayerSize()
public boolean hasWord(java.lang.String word)
hasWord
in interface WordVectors
word
- the word to test forpublic java.util.Collection<java.lang.String> wordsNearestSum(java.util.Collection<java.lang.String> positive, java.util.Collection<java.lang.String> negative, int top)
wordsNearestSum
in interface WordVectors
positive
- the positive wordsnegative
- the negative wordstop
- the top n wordspublic java.util.Collection<java.lang.String> wordsNearestSum(org.nd4j.linalg.api.ndarray.INDArray words, int top)
wordsNearestSum
in interface WordVectors
public java.util.Collection<java.lang.String> wordsNearest(org.nd4j.linalg.api.ndarray.INDArray words, int top)
wordsNearest
in interface WordVectors
public java.util.Collection<java.lang.String> wordsNearestSum(java.lang.String word, int n)
wordsNearestSum
in interface WordVectors
word
- the word to comparen
- the n to getpublic java.util.Map<java.lang.String,java.lang.Double> accuracy(java.util.List<java.lang.String> questions)
accuracy
in interface WordVectors
questions
- the questions to askpublic int indexOf(java.lang.String word)
indexOf
in interface WordVectors
public java.util.List<java.lang.String> similarWordsInVocabTo(java.lang.String word, double accuracy)
similarWordsInVocabTo
in interface WordVectors
word
- the word to compareaccuracy
- the accuracy: 0 to 1public double[] getWordVector(java.lang.String word)
getWordVector
in interface WordVectors
word
- the word to get the matrix forpublic org.nd4j.linalg.api.ndarray.INDArray getWordVectorMatrixNormalized(java.lang.String word)
getWordVectorMatrixNormalized
in interface WordVectors
word
- the word to get the matrix forpublic org.nd4j.linalg.api.ndarray.INDArray getWordVectorMatrix(java.lang.String word)
WordVectors
getWordVectorMatrix
in interface WordVectors
word
- the word to get the matrix forpublic java.util.Collection<java.lang.String> wordsNearest(java.util.Collection<java.lang.String> positive, java.util.Collection<java.lang.String> negative, int top)
wordsNearest
in interface WordVectors
positive
- the positive wordsnegative
- the negative wordstop
- the top n wordspublic org.nd4j.linalg.api.ndarray.INDArray getWordVectors(@NonNull java.util.Collection<java.lang.String> labels)
getWordVectors
in interface WordVectors
labels
- public org.nd4j.linalg.api.ndarray.INDArray getWordVectorsMean(java.util.Collection<java.lang.String> labels)
getWordVectorsMean
in interface WordVectors
labels
- public java.util.Collection<java.lang.String> wordsNearest(java.lang.String word, int n)
wordsNearest
in interface WordVectors
word
- the word to comparen
- the n to getpublic double similarity(java.lang.String word, java.lang.String word2)
similarity
in interface WordVectors
word
- the first wordword2
- the second wordpublic VocabCache<T> vocab()
WordVectors
vocab
in interface WordVectors
public WeightLookupTable lookupTable()
WordVectors
lookupTable
in interface WordVectors
public void setModelUtils(@NonNull ModelUtils modelUtils)
WordVectors
setModelUtils
in interface WordVectors
public void setLookupTable(@NonNull WeightLookupTable lookupTable)
public void setVocab(VocabCache vocab)
protected void update()
protected void update(org.nd4j.linalg.heartbeat.reports.Environment env, org.nd4j.linalg.heartbeat.reports.Event event)