public interface InvertedIndex<T extends SequenceElement>
extends java.io.Serializable
| Modifier and Type | Method and Description |
|---|---|
void |
addLabelForDoc(int doc,
java.lang.String label)
Adds words to the given document
|
void |
addLabelForDoc(int doc,
T word)
Add word to a document
|
void |
addLabelsForDoc(int doc,
java.util.Collection<java.lang.String> label)
Adds words to the given document
|
void |
addLabelsForDoc(int doc,
java.util.List<T> word)
Add word to a document
|
void |
addWordsToDoc(int doc,
java.util.List<T> words)
Adds words to the given document
|
void |
addWordsToDoc(int doc,
java.util.List<T> words,
java.util.Collection<java.lang.String> label)
Adds words to the given document
|
void |
addWordsToDoc(int doc,
java.util.List<T> words,
java.lang.String label)
Adds words to the given document
|
void |
addWordsToDoc(int doc,
java.util.List<T> words,
T label)
Adds words to the given document
|
void |
addWordsToDocVocabWord(int doc,
java.util.List<T> words,
java.util.Collection<T> label)
Adds words to the given document
|
void |
addWordToDoc(int doc,
T word)
Add word to a document
|
int[] |
allDocs()
Returns a list of all documents
|
java.util.Iterator<java.util.List<java.util.List<T>>> |
batchIter(int batchSize)
Iterate over batches
|
int |
batchSize()
For word vectors, this is the batch size for which to train on
|
void |
cleanup()
Cleanup any resources used
|
java.util.Iterator<java.util.List<T>> |
docs()
Iterate over documents
|
java.util.List<T> |
document(int index)
Returns a list of words for a document
|
int[] |
documents(T vocabWord)
Returns the list of documents a vocab word is in
|
Pair<java.util.List<T>,java.lang.String> |
documentWithLabel(int index)
Returns a list of words for a document
and the associated label
|
Pair<java.util.List<T>,java.util.Collection<java.lang.String>> |
documentWithLabels(int index)
Returns a list of words associated with the document
and the associated labels
|
void |
eachDoc(com.google.common.base.Function<java.util.List<T>,java.lang.Void> func,
java.util.concurrent.ExecutorService exec)
Iterate over each document
|
void |
eachDocWithLabel(com.google.common.base.Function<Pair<java.util.List<T>,java.lang.String>,java.lang.Void> func,
java.util.concurrent.ExecutorService exec)
Iterate over each document with a label
|
void |
eachDocWithLabels(com.google.common.base.Function<Pair<java.util.List<T>,java.util.Collection<java.lang.String>>,java.lang.Void> func,
java.util.concurrent.ExecutorService exec)
Iterate over each document with a label
|
void |
finish()
Finishes saving data
|
java.util.Iterator<java.util.List<T>> |
miniBatches()
Iterates over mini batches
|
int |
numDocuments()
Returns the number of documents
|
double |
sample()
Sampling for creating mini batches
|
long |
totalWords()
Total number of words in the index
|
void |
unlock()
Unlock the index
|
java.util.Iterator<java.util.List<java.util.List<T>>> batchIter(int batchSize)
java.util.Iterator<java.util.List<T>> docs()
void unlock()
void cleanup()
double sample()
java.util.Iterator<java.util.List<T>> miniBatches()
java.util.List<T> document(int index)
index - Pair<java.util.List<T>,java.lang.String> documentWithLabel(int index)
index - Pair<java.util.List<T>,java.util.Collection<java.lang.String>> documentWithLabels(int index)
index - int[] documents(T vocabWord)
vocabWord - the vocab word to get documents forint numDocuments()
int[] allDocs()
void addWordToDoc(int doc,
T word)
doc - the document to add toword - the word to addvoid addWordsToDoc(int doc,
java.util.List<T> words)
doc - the document to add towords - the words to addvoid addLabelForDoc(int doc,
T word)
doc - the document to add toword - the word to addvoid addLabelForDoc(int doc,
java.lang.String label)
doc - the document to add tovoid addWordsToDoc(int doc,
java.util.List<T> words,
java.lang.String label)
doc - the document to add towords - the words to addlabel - the label for the documentvoid addWordsToDoc(int doc,
java.util.List<T> words,
T label)
doc - the document to add towords - the words to addlabel - the label for the documentvoid addLabelsForDoc(int doc,
java.util.List<T> word)
doc - the document to add toword - the word to addvoid addLabelsForDoc(int doc,
java.util.Collection<java.lang.String> label)
doc - the document to add tolabel - the labels to addvoid addWordsToDoc(int doc,
java.util.List<T> words,
java.util.Collection<java.lang.String> label)
doc - the document to add towords - the words to addlabel - the label for the documentvoid addWordsToDocVocabWord(int doc,
java.util.List<T> words,
java.util.Collection<T> label)
doc - the document to add towords - the words to addlabel - the label for the documentvoid finish()
long totalWords()
int batchSize()
void eachDocWithLabels(com.google.common.base.Function<Pair<java.util.List<T>,java.util.Collection<java.lang.String>>,java.lang.Void> func, java.util.concurrent.ExecutorService exec)
func - the function to applyexec - executor service for executionvoid eachDocWithLabel(com.google.common.base.Function<Pair<java.util.List<T>,java.lang.String>,java.lang.Void> func, java.util.concurrent.ExecutorService exec)
func - the function to applyexec - executor service for executionvoid eachDoc(com.google.common.base.Function<java.util.List<T>,java.lang.Void> func, java.util.concurrent.ExecutorService exec)
func - the function to applyexec - executor service for execution