public class AbstractCache<T extends SequenceElement> extends java.lang.Object implements VocabCache<T>
Modifier and Type | Class and Description |
---|---|
static class |
AbstractCache.Builder<T extends SequenceElement> |
Constructor and Description |
---|
AbstractCache() |
Modifier and Type | Method and Description |
---|---|
void |
addToken(T element)
This method adds specified SequenceElement to vocabulary
|
void |
addWordToIndex(int index,
long elementId) |
void |
addWordToIndex(int index,
java.lang.String label)
This method allows to insert specified label to specified Huffman tree position.
|
boolean |
containsElement(T element)
Checks, if specified element exists in vocabulary
|
boolean |
containsWord(java.lang.String word)
Checks, if specified label exists in vocabulary
|
int |
docAppearedIn(java.lang.String word)
Returns number of documents (if applicable) the label was observed in.
|
T |
elementAtIndex(int index)
Returns SequenceElement at specified index
|
boolean |
hasToken(java.lang.String label)
Checks, if specified label already exists in vocabulary.
|
void |
importVocabulary(VocabCache<T> vocabCache)
This method imports all elements from VocabCache passed as argument
If element already exists,
|
void |
incrementDocCount(java.lang.String word,
long howMuch)
Increment number of documents the label was observed in
Please note: this method is NOT thread-safe
|
void |
incrementTotalDocCount()
Increment total number of documents observed by 1
|
void |
incrementTotalDocCount(long by)
Increment total number of documents observed by specified value
|
void |
incrementWordCount(java.lang.String word)
Increment frequency for specified label by 1
|
void |
incrementWordCount(java.lang.String word,
int increment)
Increment frequency for specified label by specified value
|
int |
indexOf(java.lang.String label)
Returns Huffman index for specified label
|
void |
loadVocab()
Deserialize vocabulary from specified path
|
int |
numWords()
Returns number of elements in this vocabulary
|
void |
putVocabWord(java.lang.String word)
Deprecated.
|
void |
removeElement(java.lang.String label)
Removes element with specified label from vocabulary
Please note: Huffman index should be updated after element removal
|
void |
removeElement(T element)
Removes specified element from vocabulary
Please note: Huffman index should be updated after element removal
|
void |
saveVocab()
Serialize vocabulary to specified path
|
void |
setCountForDoc(java.lang.String word,
long count)
Set exact number of observed documents that contain specified word
Please note: this method is NOT thread-safe
|
T |
tokenFor(long id) |
T |
tokenFor(java.lang.String label)
Returns SequenceElement for specified label.
|
java.util.Collection<T> |
tokens()
Returns collection of SequenceElements from this vocabulary.
|
long |
totalNumberOfDocs()
Returns total number of documents observed (if applicable)
|
long |
totalWordOccurrences()
Returns total number of elements observed
|
void |
updateWordsOccurencies()
Updates counters
|
boolean |
vocabExists()
Returns true, if number of elements in vocabulary > 0, false otherwise
|
java.util.Collection<T> |
vocabWords()
Returns collection of SequenceElements stored in this vocabulary
|
java.lang.String |
wordAtIndex(int index)
Returns the label of the element at specified Huffman index
|
T |
wordFor(long id) |
T |
wordFor(java.lang.String label)
Returns SequenceElement for specified label
|
int |
wordFrequency(java.lang.String word)
Returns the SequenceElement's frequency over training corpus
|
java.util.Collection<java.lang.String> |
words()
Returns collection of labels available in this vocabulary
|
public void loadVocab()
loadVocab
in interface VocabCache<T extends SequenceElement>
public boolean vocabExists()
vocabExists
in interface VocabCache<T extends SequenceElement>
public void saveVocab()
saveVocab
in interface VocabCache<T extends SequenceElement>
public java.util.Collection<java.lang.String> words()
words
in interface VocabCache<T extends SequenceElement>
public void incrementWordCount(java.lang.String word)
incrementWordCount
in interface VocabCache<T extends SequenceElement>
word
- the word to increment the count forpublic void incrementWordCount(java.lang.String word, int increment)
incrementWordCount
in interface VocabCache<T extends SequenceElement>
word
- the word to increment the count forincrement
- the amount to increment bypublic int wordFrequency(@NonNull java.lang.String word)
wordFrequency
in interface VocabCache<T extends SequenceElement>
word
- the word to retrieve the occurrence frequency forpublic boolean containsWord(java.lang.String word)
containsWord
in interface VocabCache<T extends SequenceElement>
word
- the word to check forpublic boolean containsElement(T element)
element
- public java.lang.String wordAtIndex(int index)
wordAtIndex
in interface VocabCache<T extends SequenceElement>
index
- the index of the word to getpublic T elementAtIndex(int index)
elementAtIndex
in interface VocabCache<T extends SequenceElement>
index
- public int indexOf(java.lang.String label)
indexOf
in interface VocabCache<T extends SequenceElement>
label
- the label to get index forpublic java.util.Collection<T> vocabWords()
vocabWords
in interface VocabCache<T extends SequenceElement>
public long totalWordOccurrences()
totalWordOccurrences
in interface VocabCache<T extends SequenceElement>
public T wordFor(@NonNull java.lang.String label)
wordFor
in interface VocabCache<T extends SequenceElement>
label
- to fetch element forpublic T wordFor(long id)
wordFor
in interface VocabCache<T extends SequenceElement>
public void addWordToIndex(int index, java.lang.String label)
addWordToIndex
in interface VocabCache<T extends SequenceElement>
index
- label
- public void addWordToIndex(int index, long elementId)
addWordToIndex
in interface VocabCache<T extends SequenceElement>
@Deprecated public void putVocabWord(java.lang.String word)
VocabCache
putVocabWord
in interface VocabCache<T extends SequenceElement>
word
- the word to add to the vocabpublic int numWords()
numWords
in interface VocabCache<T extends SequenceElement>
public int docAppearedIn(java.lang.String word)
docAppearedIn
in interface VocabCache<T extends SequenceElement>
word
- the number of documents the word appeared inpublic void incrementDocCount(java.lang.String word, long howMuch)
incrementDocCount
in interface VocabCache<T extends SequenceElement>
word
- the word to increment byhowMuch
- public void setCountForDoc(java.lang.String word, long count)
setCountForDoc
in interface VocabCache<T extends SequenceElement>
word
- the word to set the count forcount
- the count of the wordpublic long totalNumberOfDocs()
totalNumberOfDocs
in interface VocabCache<T extends SequenceElement>
public void incrementTotalDocCount()
incrementTotalDocCount
in interface VocabCache<T extends SequenceElement>
public void incrementTotalDocCount(long by)
incrementTotalDocCount
in interface VocabCache<T extends SequenceElement>
by
- the number to increment bypublic java.util.Collection<T> tokens()
tokens
in interface VocabCache<T extends SequenceElement>
public void addToken(T element)
addToken
in interface VocabCache<T extends SequenceElement>
element
- the word to addpublic T tokenFor(java.lang.String label)
tokenFor
in interface VocabCache<T extends SequenceElement>
label
- the label to get the token forpublic T tokenFor(long id)
tokenFor
in interface VocabCache<T extends SequenceElement>
public boolean hasToken(java.lang.String label)
hasToken
in interface VocabCache<T extends SequenceElement>
label
- the token to testpublic void importVocabulary(@NonNull VocabCache<T> vocabCache)
importVocabulary
in interface VocabCache<T extends SequenceElement>
vocabCache
- public void updateWordsOccurencies()
VocabCache
updateWordsOccurencies
in interface VocabCache<T extends SequenceElement>
public void removeElement(java.lang.String label)
VocabCache
removeElement
in interface VocabCache<T extends SequenceElement>
label
- label of the element to be removedpublic void removeElement(T element)
VocabCache
removeElement
in interface VocabCache<T extends SequenceElement>
element
- SequenceElement to be removed