public class DefaultVocabCache extends java.lang.Object implements VocabCache
Constructor and Description |
---|
DefaultVocabCache() |
DefaultVocabCache(int minWordFrequency)
Instantiate with a given min word frequency
|
Modifier and Type | Method and Description |
---|---|
int |
getMinWordFrequency() |
double |
idf(java.lang.String word)
Number of documents word has occurred in
|
void |
incrementCount(java.lang.String word)
Increment a word count by 1
|
void |
incrementCount(java.lang.String word,
double by)
Increment count for a word
|
void |
incrementDocCount(java.lang.String word)
Increment the doc count for a word by 1
|
void |
incrementDocCount(java.lang.String word,
double by)
Increment the document count for a particular word
|
void |
incrementNumDocs(double by)
Increment the number of documents
|
void |
initialize(Configuration conf)
Configuration for initializing
|
int |
minWordFrequency()
The min word frequency
needed to be included in the vocab
(default 5)
|
double |
numDocs()
Number of documents
|
void |
setMinWordFrequency(int minWordFrequency) |
double |
tfidf(java.lang.String word,
double frequency)
Calculate the tfidf of the word given the document frequency
|
Index |
vocabWords()
All of the vocab words (ordered)
note that these are not all the possible tokens
|
java.lang.String |
wordAt(int i)
Returns a word in the vocab at a particular index
|
double |
wordFrequency(java.lang.String word)
Get the word frequency for a word
|
public DefaultVocabCache(int minWordFrequency)
minWordFrequency
- public DefaultVocabCache()
public void incrementNumDocs(double by)
VocabCache
incrementNumDocs
in interface VocabCache
public double numDocs()
VocabCache
numDocs
in interface VocabCache
public java.lang.String wordAt(int i)
VocabCache
wordAt
in interface VocabCache
i
- the index to getpublic void initialize(Configuration conf)
VocabCache
initialize
in interface VocabCache
conf
- the configuration to initialize withpublic double wordFrequency(java.lang.String word)
VocabCache
wordFrequency
in interface VocabCache
word
- the word to get frequency forpublic int minWordFrequency()
VocabCache
minWordFrequency
in interface VocabCache
public Index vocabWords()
VocabCache
vocabWords
in interface VocabCache
public void incrementDocCount(java.lang.String word)
VocabCache
incrementDocCount
in interface VocabCache
word
- the word to increment the count forpublic void incrementDocCount(java.lang.String word, double by)
VocabCache
incrementDocCount
in interface VocabCache
word
- the word to increment the count forby
- the amount to increment bypublic void incrementCount(java.lang.String word)
VocabCache
incrementCount
in interface VocabCache
word
- the word to increment the count forpublic void incrementCount(java.lang.String word, double by)
VocabCache
incrementCount
in interface VocabCache
word
- the word to increment the count forby
- the amount to increment bypublic double idf(java.lang.String word)
VocabCache
idf
in interface VocabCache
word
- the word to get the idf forpublic double tfidf(java.lang.String word, double frequency)
VocabCache
tfidf
in interface VocabCache
word
- the word to get frequency forfrequency
- the frequencypublic int getMinWordFrequency()
public void setMinWordFrequency(int minWordFrequency)