public interface InvertedIndex<T extends SequenceElement>
extends java.io.Serializable
Modifier and Type | Method and Description |
---|---|
void |
addLabelForDoc(int doc,
java.lang.String label)
Adds words to the given document
|
void |
addLabelForDoc(int doc,
T word)
Add word to a document
|
void |
addLabelsForDoc(int doc,
java.util.Collection<java.lang.String> label)
Adds words to the given document
|
void |
addLabelsForDoc(int doc,
java.util.List<T> word)
Add word to a document
|
void |
addWordsToDoc(int doc,
java.util.List<T> words)
Adds words to the given document
|
void |
addWordsToDoc(int doc,
java.util.List<T> words,
java.util.Collection<java.lang.String> label)
Adds words to the given document
|
void |
addWordsToDoc(int doc,
java.util.List<T> words,
java.lang.String label)
Adds words to the given document
|
void |
addWordsToDoc(int doc,
java.util.List<T> words,
T label)
Adds words to the given document
|
void |
addWordsToDocVocabWord(int doc,
java.util.List<T> words,
java.util.Collection<T> label)
Adds words to the given document
|
void |
addWordToDoc(int doc,
T word)
Add word to a document
|
int[] |
allDocs()
Returns a list of all documents
|
java.util.Iterator<java.util.List<java.util.List<T>>> |
batchIter(int batchSize)
Iterate over batches
|
int |
batchSize()
For word vectors, this is the batch size for which to train on
|
void |
cleanup()
Cleanup any resources used
|
java.util.Iterator<java.util.List<T>> |
docs()
Iterate over documents
|
java.util.List<T> |
document(int index)
Returns a list of words for a document
|
int[] |
documents(T vocabWord)
Returns the list of documents a vocab word is in
|
Pair<java.util.List<T>,java.lang.String> |
documentWithLabel(int index)
Returns a list of words for a document
and the associated label
|
Pair<java.util.List<T>,java.util.Collection<java.lang.String>> |
documentWithLabels(int index)
Returns a list of words associated with the document
and the associated labels
|
void |
eachDoc(com.google.common.base.Function<java.util.List<T>,java.lang.Void> func,
java.util.concurrent.ExecutorService exec)
Iterate over each document
|
void |
eachDocWithLabel(com.google.common.base.Function<Pair<java.util.List<T>,java.lang.String>,java.lang.Void> func,
java.util.concurrent.ExecutorService exec)
Iterate over each document with a label
|
void |
eachDocWithLabels(com.google.common.base.Function<Pair<java.util.List<T>,java.util.Collection<java.lang.String>>,java.lang.Void> func,
java.util.concurrent.ExecutorService exec)
Iterate over each document with a label
|
void |
finish()
Finishes saving data
|
java.util.Iterator<java.util.List<T>> |
miniBatches()
Iterates over mini batches
|
int |
numDocuments()
Returns the number of documents
|
double |
sample()
Sampling for creating mini batches
|
long |
totalWords()
Total number of words in the index
|
void |
unlock()
Unlock the index
|
java.util.Iterator<java.util.List<java.util.List<T>>> batchIter(int batchSize)
java.util.Iterator<java.util.List<T>> docs()
void unlock()
void cleanup()
double sample()
java.util.Iterator<java.util.List<T>> miniBatches()
java.util.List<T> document(int index)
index
- Pair<java.util.List<T>,java.lang.String> documentWithLabel(int index)
index
- Pair<java.util.List<T>,java.util.Collection<java.lang.String>> documentWithLabels(int index)
index
- int[] documents(T vocabWord)
vocabWord
- the vocab word to get documents forint numDocuments()
int[] allDocs()
void addWordToDoc(int doc, T word)
doc
- the document to add toword
- the word to addvoid addWordsToDoc(int doc, java.util.List<T> words)
doc
- the document to add towords
- the words to addvoid addLabelForDoc(int doc, T word)
doc
- the document to add toword
- the word to addvoid addLabelForDoc(int doc, java.lang.String label)
doc
- the document to add tovoid addWordsToDoc(int doc, java.util.List<T> words, java.lang.String label)
doc
- the document to add towords
- the words to addlabel
- the label for the documentvoid addWordsToDoc(int doc, java.util.List<T> words, T label)
doc
- the document to add towords
- the words to addlabel
- the label for the documentvoid addLabelsForDoc(int doc, java.util.List<T> word)
doc
- the document to add toword
- the word to addvoid addLabelsForDoc(int doc, java.util.Collection<java.lang.String> label)
doc
- the document to add tolabel
- the labels to addvoid addWordsToDoc(int doc, java.util.List<T> words, java.util.Collection<java.lang.String> label)
doc
- the document to add towords
- the words to addlabel
- the label for the documentvoid addWordsToDocVocabWord(int doc, java.util.List<T> words, java.util.Collection<T> label)
doc
- the document to add towords
- the words to addlabel
- the label for the documentvoid finish()
long totalWords()
int batchSize()
void eachDocWithLabels(com.google.common.base.Function<Pair<java.util.List<T>,java.util.Collection<java.lang.String>>,java.lang.Void> func, java.util.concurrent.ExecutorService exec)
func
- the function to applyexec
- executor service for executionvoid eachDocWithLabel(com.google.common.base.Function<Pair<java.util.List<T>,java.lang.String>,java.lang.Void> func, java.util.concurrent.ExecutorService exec)
func
- the function to applyexec
- executor service for executionvoid eachDoc(com.google.common.base.Function<java.util.List<T>,java.lang.Void> func, java.util.concurrent.ExecutorService exec)
func
- the function to applyexec
- executor service for execution