public class AbstractCache<T extends SequenceElement> extends java.lang.Object implements VocabCache<T>
| Modifier and Type | Class and Description |
|---|---|
static class |
AbstractCache.Builder<T extends SequenceElement> |
| Constructor and Description |
|---|
AbstractCache() |
| Modifier and Type | Method and Description |
|---|---|
void |
addToken(T element)
This method adds specified SequenceElement to vocabulary
|
void |
addWordToIndex(int index,
long elementId) |
void |
addWordToIndex(int index,
java.lang.String label)
This method allows to insert specified label to specified Huffman tree position.
|
boolean |
containsElement(T element)
Checks, if specified element exists in vocabulary
|
boolean |
containsWord(java.lang.String word)
Checks, if specified label exists in vocabulary
|
int |
docAppearedIn(java.lang.String word)
Returns number of documents (if applicable) the label was observed in.
|
T |
elementAtIndex(int index)
Returns SequenceElement at specified index
|
boolean |
hasToken(java.lang.String label)
Checks, if specified label already exists in vocabulary.
|
void |
importVocabulary(VocabCache<T> vocabCache)
This method imports all elements from VocabCache passed as argument
If element already exists,
|
void |
incrementDocCount(java.lang.String word,
long howMuch)
Increment number of documents the label was observed in
Please note: this method is NOT thread-safe
|
void |
incrementTotalDocCount()
Increment total number of documents observed by 1
|
void |
incrementTotalDocCount(long by)
Increment total number of documents observed by specified value
|
void |
incrementWordCount(java.lang.String word)
Increment frequency for specified label by 1
|
void |
incrementWordCount(java.lang.String word,
int increment)
Increment frequency for specified label by specified value
|
int |
indexOf(java.lang.String label)
Returns Huffman index for specified label
|
void |
loadVocab()
Deserialize vocabulary from specified path
|
int |
numWords()
Returns number of elements in this vocabulary
|
void |
putVocabWord(java.lang.String word)
Deprecated.
|
void |
removeElement(java.lang.String label)
Removes element with specified label from vocabulary
Please note: Huffman index should be updated after element removal
|
void |
removeElement(T element)
Removes specified element from vocabulary
Please note: Huffman index should be updated after element removal
|
void |
saveVocab()
Serialize vocabulary to specified path
|
void |
setCountForDoc(java.lang.String word,
long count)
Set exact number of observed documents that contain specified word
Please note: this method is NOT thread-safe
|
T |
tokenFor(long id) |
T |
tokenFor(java.lang.String label)
Returns SequenceElement for specified label.
|
java.util.Collection<T> |
tokens()
Returns collection of SequenceElements from this vocabulary.
|
long |
totalNumberOfDocs()
Returns total number of documents observed (if applicable)
|
long |
totalWordOccurrences()
Returns total number of elements observed
|
void |
updateWordsOccurencies()
Updates counters
|
boolean |
vocabExists()
Returns true, if number of elements in vocabulary > 0, false otherwise
|
java.util.Collection<T> |
vocabWords()
Returns collection of SequenceElements stored in this vocabulary
|
java.lang.String |
wordAtIndex(int index)
Returns the label of the element at specified Huffman index
|
T |
wordFor(long id) |
T |
wordFor(java.lang.String label)
Returns SequenceElement for specified label
|
int |
wordFrequency(java.lang.String word)
Returns the SequenceElement's frequency over training corpus
|
java.util.Collection<java.lang.String> |
words()
Returns collection of labels available in this vocabulary
|
public void loadVocab()
loadVocab in interface VocabCache<T extends SequenceElement>public boolean vocabExists()
vocabExists in interface VocabCache<T extends SequenceElement>public void saveVocab()
saveVocab in interface VocabCache<T extends SequenceElement>public java.util.Collection<java.lang.String> words()
words in interface VocabCache<T extends SequenceElement>public void incrementWordCount(java.lang.String word)
incrementWordCount in interface VocabCache<T extends SequenceElement>word - the word to increment the count forpublic void incrementWordCount(java.lang.String word,
int increment)
incrementWordCount in interface VocabCache<T extends SequenceElement>word - the word to increment the count forincrement - the amount to increment bypublic int wordFrequency(@NonNull
java.lang.String word)
wordFrequency in interface VocabCache<T extends SequenceElement>word - the word to retrieve the occurrence frequency forpublic boolean containsWord(java.lang.String word)
containsWord in interface VocabCache<T extends SequenceElement>word - the word to check forpublic boolean containsElement(T element)
element - public java.lang.String wordAtIndex(int index)
wordAtIndex in interface VocabCache<T extends SequenceElement>index - the index of the word to getpublic T elementAtIndex(int index)
elementAtIndex in interface VocabCache<T extends SequenceElement>index - public int indexOf(java.lang.String label)
indexOf in interface VocabCache<T extends SequenceElement>label - the label to get index forpublic java.util.Collection<T> vocabWords()
vocabWords in interface VocabCache<T extends SequenceElement>public long totalWordOccurrences()
totalWordOccurrences in interface VocabCache<T extends SequenceElement>public T wordFor(@NonNull java.lang.String label)
wordFor in interface VocabCache<T extends SequenceElement>label - to fetch element forpublic T wordFor(long id)
wordFor in interface VocabCache<T extends SequenceElement>public void addWordToIndex(int index,
java.lang.String label)
addWordToIndex in interface VocabCache<T extends SequenceElement>index - label - public void addWordToIndex(int index,
long elementId)
addWordToIndex in interface VocabCache<T extends SequenceElement>@Deprecated public void putVocabWord(java.lang.String word)
VocabCacheputVocabWord in interface VocabCache<T extends SequenceElement>word - the word to add to the vocabpublic int numWords()
numWords in interface VocabCache<T extends SequenceElement>public int docAppearedIn(java.lang.String word)
docAppearedIn in interface VocabCache<T extends SequenceElement>word - the number of documents the word appeared inpublic void incrementDocCount(java.lang.String word,
long howMuch)
incrementDocCount in interface VocabCache<T extends SequenceElement>word - the word to increment byhowMuch - public void setCountForDoc(java.lang.String word,
long count)
setCountForDoc in interface VocabCache<T extends SequenceElement>word - the word to set the count forcount - the count of the wordpublic long totalNumberOfDocs()
totalNumberOfDocs in interface VocabCache<T extends SequenceElement>public void incrementTotalDocCount()
incrementTotalDocCount in interface VocabCache<T extends SequenceElement>public void incrementTotalDocCount(long by)
incrementTotalDocCount in interface VocabCache<T extends SequenceElement>by - the number to increment bypublic java.util.Collection<T> tokens()
tokens in interface VocabCache<T extends SequenceElement>public void addToken(T element)
addToken in interface VocabCache<T extends SequenceElement>element - the word to addpublic T tokenFor(java.lang.String label)
tokenFor in interface VocabCache<T extends SequenceElement>label - the label to get the token forpublic T tokenFor(long id)
tokenFor in interface VocabCache<T extends SequenceElement>public boolean hasToken(java.lang.String label)
hasToken in interface VocabCache<T extends SequenceElement>label - the token to testpublic void importVocabulary(@NonNull
VocabCache<T> vocabCache)
importVocabulary in interface VocabCache<T extends SequenceElement>vocabCache - public void updateWordsOccurencies()
VocabCacheupdateWordsOccurencies in interface VocabCache<T extends SequenceElement>public void removeElement(java.lang.String label)
VocabCacheremoveElement in interface VocabCache<T extends SequenceElement>label - label of the element to be removedpublic void removeElement(T element)
VocabCacheremoveElement in interface VocabCache<T extends SequenceElement>element - SequenceElement to be removed