public class SparkSequenceVectors<T extends SequenceElement> extends SequenceVectors<T>
Modifier and Type | Class and Description |
---|---|
static class |
SparkSequenceVectors.Builder<T extends SequenceElement> |
SequenceVectors.AsyncSequencer
Modifier and Type | Field and Description |
---|---|
protected org.apache.spark.broadcast.Broadcast<VectorsConfiguration> |
configurationBroadcast |
protected SparkElementsLearningAlgorithm |
ela |
protected org.apache.spark.Accumulator<Counter<java.lang.Long>> |
elementsFreqAccum |
protected org.apache.spark.Accumulator<ExtraCounter<java.lang.Long>> |
elementsFreqAccumExtra |
protected SparkModelExporter<T> |
exporter |
protected boolean |
isAutoDiscoveryMode |
protected boolean |
isEnvironmentReady |
protected org.nd4j.parameterserver.distributed.conf.VoidConfiguration |
paramServerConfiguration |
protected VocabCache<ShallowSequenceElement> |
shallowVocabCache |
protected org.apache.spark.broadcast.Broadcast<VocabCache<ShallowSequenceElement>> |
shallowVocabCacheBroadcast |
protected SparkSequenceLearningAlgorithm |
sla |
protected org.apache.spark.storage.StorageLevel |
storageLevel |
protected org.apache.spark.broadcast.Broadcast<VocabCache<T>> |
vocabCacheBroadcast |
configuration, configured, elementsLearningAlgorithm, enableScavenger, eventListeners, existingModel, iterator, log, scoreElements, scoreSequences, sequenceLearningAlgorithm, unknownElement
batchSize, DEFAULT_UNK, layerSize, learningRate, learningRateDecayWords, lookupTable, minLearningRate, minWordFrequency, modelUtils, negative, numEpochs, numIterations, resetModel, sampling, seed, stopWords, trainElementsVectors, trainSequenceVectors, useAdeGrad, useUnknown, variableWindows, vocab, window, workers
Modifier | Constructor and Description |
---|---|
protected |
SparkSequenceVectors() |
protected |
SparkSequenceVectors(VectorsConfiguration configuration) |
Modifier and Type | Method and Description |
---|---|
protected void |
broadcastEnvironment(org.apache.spark.api.java.JavaSparkContext context) |
protected VocabCache<ShallowSequenceElement> |
buildShallowVocabCache(Counter<java.lang.Long> counter)
This method builds shadow vocabulary and huffman tree
|
void |
fit()
Deprecated.
|
void |
fitLists(org.apache.spark.api.java.JavaRDD<java.util.List<T>> corpus)
Utility method.
|
void |
fitSequences(org.apache.spark.api.java.JavaRDD<Sequence<T>> corpus)
Base training entry point
|
protected Counter<java.lang.Long> |
getCounter() |
protected VocabCache<ShallowSequenceElement> |
getShallowVocabCache() |
protected void |
validateConfiguration() |
buildVocab, getElementsScore, getSequencesScore, getUNK, getWordVectorMatrix, initLearners, trainSequence
accuracy, getLayerSize, getWordVector, getWordVectorMatrixNormalized, getWordVectors, getWordVectorsMean, hasWord, indexOf, lookupTable, setLookupTable, setModelUtils, setVocab, similarity, similarWordsInVocabTo, update, update, vocab, wordsNearest, wordsNearest, wordsNearest, wordsNearestSum, wordsNearestSum, wordsNearestSum
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
accuracy, getWordVector, getWordVectorMatrixNormalized, getWordVectors, getWordVectorsMean, hasWord, indexOf, lookupTable, setModelUtils, setUNK, similarity, similarWordsInVocabTo, vocab, wordsNearest, wordsNearest, wordsNearest, wordsNearestSum, wordsNearestSum, wordsNearestSum
protected org.apache.spark.Accumulator<Counter<java.lang.Long>> elementsFreqAccum
protected org.apache.spark.Accumulator<ExtraCounter<java.lang.Long>> elementsFreqAccumExtra
protected org.apache.spark.storage.StorageLevel storageLevel
protected org.apache.spark.broadcast.Broadcast<VocabCache<T extends SequenceElement>> vocabCacheBroadcast
protected org.apache.spark.broadcast.Broadcast<VocabCache<ShallowSequenceElement>> shallowVocabCacheBroadcast
protected org.apache.spark.broadcast.Broadcast<VectorsConfiguration> configurationBroadcast
protected transient boolean isEnvironmentReady
protected transient VocabCache<ShallowSequenceElement> shallowVocabCache
protected boolean isAutoDiscoveryMode
protected SparkModelExporter<T extends SequenceElement> exporter
protected SparkElementsLearningAlgorithm ela
protected SparkSequenceLearningAlgorithm sla
protected org.nd4j.parameterserver.distributed.conf.VoidConfiguration paramServerConfiguration
protected SparkSequenceVectors()
protected SparkSequenceVectors(@NonNull VectorsConfiguration configuration)
protected VocabCache<ShallowSequenceElement> getShallowVocabCache()
@Deprecated public void fit()
fit
in class SequenceVectors<T extends SequenceElement>
protected void validateConfiguration()
protected void broadcastEnvironment(org.apache.spark.api.java.JavaSparkContext context)
public void fitLists(org.apache.spark.api.java.JavaRDD<java.util.List<T>> corpus)
corpus
- public void fitSequences(org.apache.spark.api.java.JavaRDD<Sequence<T>> corpus)
corpus
- protected VocabCache<ShallowSequenceElement> buildShallowVocabCache(Counter<java.lang.Long> counter)
counter
- protected Counter<java.lang.Long> getCounter()