public class VocabularyHolder
extends java.lang.Object
implements java.io.Serializable
| Modifier and Type | Class and Description | 
|---|---|
| static class  | VocabularyHolder.Builder | 
| Modifier | Constructor and Description | 
|---|---|
| protected  | VocabularyHolder()Default constructor | 
| protected  | VocabularyHolder(VocabCache<? extends SequenceElement> cache,
                boolean markAsSpecial)Builds VocabularyHolder from VocabCache. | 
| Modifier and Type | Method and Description | 
|---|---|
| protected void | activateScavenger()This method removes low-frequency words based on their frequency change between activations. | 
| void | addWord(java.lang.String word)Adds new word to vocabulary | 
| void | addWord(VocabularyWord word) | 
| static java.util.List<java.lang.Byte> | arrayToList(byte[] array,
           int codeLen)This method is used only for VocabCache compatibility purposes | 
| static java.util.List<java.lang.Integer> | arrayToList(int[] array,
           int codeLen)This method is used only for VocabCache compatibility purposes | 
| static HuffmanNode | buildNode(java.util.List<java.lang.Byte> codes,
         java.util.List<java.lang.Integer> points,
         int codeLen,
         int index) | 
| void | consumeVocabulary(VocabularyHolder holder) | 
| boolean | containsWord(java.lang.String word)Checks vocabulary for the word existance | 
| java.util.Collection<VocabularyWord> | getVocabulary() | 
| VocabularyWord | getVocabularyWordByIdx(java.lang.Integer id) | 
| VocabularyWord | getVocabularyWordByString(java.lang.String word) | 
| void | incrementWordCounter(java.lang.String word)Increments by one number of occurencies of the word in corpus | 
| int | indexOf(java.lang.String word)This method returns index of word in sorted list. | 
| static byte[] | listToArray(java.util.List<java.lang.Byte> code) | 
| static int[] | listToArray(java.util.List<java.lang.Integer> points,
           int codeLen) | 
| int | numWords() | 
| void | resetWordCounters()This methods reset counters for all words in vocabulary | 
| protected void | setScavengerActivationThreshold(int threshold)This method is needed ONLY for unit tests and should NOT be available in public scope. | 
| long | totalWordsBeyondLimit() | 
| void | transferBackToVocabCache() | 
| void | transferBackToVocabCache(VocabCache cache) | 
| void | transferBackToVocabCache(VocabCache cache,
                        boolean emptyHolder)This method is required for compatibility purposes. | 
| void | truncateVocabulary()The same as truncateVocabulary(this.minWordFrequency) | 
| void | truncateVocabulary(int threshold)All words with frequency below threshold wii be removed | 
| java.util.List<VocabularyWord> | updateHuffmanCodes()build binary tree ordered by counter. | 
| java.util.List<VocabularyWord> | words()Returns sorted list of words in vocabulary. | 
protected VocabularyHolder()
protected VocabularyHolder(@NonNull
                           VocabCache<? extends SequenceElement> cache,
                           boolean markAsSpecial)
cache - public static HuffmanNode buildNode(java.util.List<java.lang.Byte> codes, java.util.List<java.lang.Integer> points, int codeLen, int index)
public void transferBackToVocabCache()
public void transferBackToVocabCache(VocabCache cache)
public void transferBackToVocabCache(VocabCache cache, boolean emptyHolder)
cache - protected void setScavengerActivationThreshold(int threshold)
threshold - public static java.util.List<java.lang.Byte> arrayToList(byte[] array,
                                                         int codeLen)
array - codeLen - public static byte[] listToArray(java.util.List<java.lang.Byte> code)
public static int[] listToArray(java.util.List<java.lang.Integer> points,
                                int codeLen)
public static java.util.List<java.lang.Integer> arrayToList(int[] array,
                                                            int codeLen)
array - codeLen - public java.util.Collection<VocabularyWord> getVocabulary()
public VocabularyWord getVocabularyWordByString(java.lang.String word)
public VocabularyWord getVocabularyWordByIdx(java.lang.Integer id)
public boolean containsWord(java.lang.String word)
word - to be looked forpublic void incrementWordCounter(java.lang.String word)
word - whose counter is to be incrementedpublic void addWord(java.lang.String word)
word - to be addedpublic void addWord(VocabularyWord word)
public void consumeVocabulary(VocabularyHolder holder)
protected void activateScavenger()
public void resetWordCounters()
public int numWords()
public void truncateVocabulary()
public void truncateVocabulary(int threshold)
threshold - exclusive threshold for removalpublic java.util.List<VocabularyWord> updateHuffmanCodes()
public int indexOf(java.lang.String word)
word - public java.util.List<VocabularyWord> words()
public long totalWordsBeyondLimit()