public class VocabularyHolder
extends java.lang.Object
implements java.io.Serializable
Modifier and Type | Class and Description |
---|---|
static class |
VocabularyHolder.Builder |
Modifier | Constructor and Description |
---|---|
protected |
VocabularyHolder()
Default constructor
|
protected |
VocabularyHolder(VocabCache<? extends SequenceElement> cache,
boolean markAsSpecial)
Builds VocabularyHolder from VocabCache.
|
Modifier and Type | Method and Description |
---|---|
protected void |
activateScavenger()
This method removes low-frequency words based on their frequency change between activations.
|
void |
addWord(java.lang.String word)
Adds new word to vocabulary
|
void |
addWord(VocabularyWord word) |
static java.util.List<java.lang.Byte> |
arrayToList(byte[] array,
int codeLen)
This method is used only for VocabCache compatibility purposes
|
static java.util.List<java.lang.Integer> |
arrayToList(int[] array,
int codeLen)
This method is used only for VocabCache compatibility purposes
|
static HuffmanNode |
buildNode(java.util.List<java.lang.Byte> codes,
java.util.List<java.lang.Integer> points,
int codeLen,
int index) |
void |
consumeVocabulary(VocabularyHolder holder) |
boolean |
containsWord(java.lang.String word)
Checks vocabulary for the word existance
|
java.util.Collection<VocabularyWord> |
getVocabulary() |
VocabularyWord |
getVocabularyWordByIdx(java.lang.Integer id) |
VocabularyWord |
getVocabularyWordByString(java.lang.String word) |
void |
incrementWordCounter(java.lang.String word)
Increments by one number of occurencies of the word in corpus
|
int |
indexOf(java.lang.String word)
This method returns index of word in sorted list.
|
static byte[] |
listToArray(java.util.List<java.lang.Byte> code) |
static int[] |
listToArray(java.util.List<java.lang.Integer> points,
int codeLen) |
int |
numWords() |
void |
resetWordCounters()
This methods reset counters for all words in vocabulary
|
protected void |
setScavengerActivationThreshold(int threshold)
This method is needed ONLY for unit tests and should NOT be available in public scope.
|
long |
totalWordsBeyondLimit() |
void |
transferBackToVocabCache() |
void |
transferBackToVocabCache(VocabCache cache) |
void |
transferBackToVocabCache(VocabCache cache,
boolean emptyHolder)
This method is required for compatibility purposes.
|
void |
truncateVocabulary()
The same as truncateVocabulary(this.minWordFrequency)
|
void |
truncateVocabulary(int threshold)
All words with frequency below threshold wii be removed
|
java.util.List<VocabularyWord> |
updateHuffmanCodes()
build binary tree ordered by counter.
|
java.util.List<VocabularyWord> |
words()
Returns sorted list of words in vocabulary.
|
protected VocabularyHolder()
protected VocabularyHolder(@NonNull VocabCache<? extends SequenceElement> cache, boolean markAsSpecial)
cache
- public static HuffmanNode buildNode(java.util.List<java.lang.Byte> codes, java.util.List<java.lang.Integer> points, int codeLen, int index)
public void transferBackToVocabCache()
public void transferBackToVocabCache(VocabCache cache)
public void transferBackToVocabCache(VocabCache cache, boolean emptyHolder)
cache
- protected void setScavengerActivationThreshold(int threshold)
threshold
- public static java.util.List<java.lang.Byte> arrayToList(byte[] array, int codeLen)
array
- codeLen
- public static byte[] listToArray(java.util.List<java.lang.Byte> code)
public static int[] listToArray(java.util.List<java.lang.Integer> points, int codeLen)
public static java.util.List<java.lang.Integer> arrayToList(int[] array, int codeLen)
array
- codeLen
- public java.util.Collection<VocabularyWord> getVocabulary()
public VocabularyWord getVocabularyWordByString(java.lang.String word)
public VocabularyWord getVocabularyWordByIdx(java.lang.Integer id)
public boolean containsWord(java.lang.String word)
word
- to be looked forpublic void incrementWordCounter(java.lang.String word)
word
- whose counter is to be incrementedpublic void addWord(java.lang.String word)
word
- to be addedpublic void addWord(VocabularyWord word)
public void consumeVocabulary(VocabularyHolder holder)
protected void activateScavenger()
public void resetWordCounters()
public int numWords()
public void truncateVocabulary()
public void truncateVocabulary(int threshold)
threshold
- exclusive threshold for removalpublic java.util.List<VocabularyWord> updateHuffmanCodes()
public int indexOf(java.lang.String word)
word
- public java.util.List<VocabularyWord> words()
public long totalWordsBeyondLimit()