public class DocumentSequenceConvertFunction extends BaseTokenizerFunction implements org.apache.spark.api.java.function.Function<LabelledDocument,Sequence<VocabWord>>
configurationBroadcast, tokenizerFactory, tokenPreprocessor
Constructor and Description |
---|
DocumentSequenceConvertFunction(org.apache.spark.broadcast.Broadcast<VectorsConfiguration> configurationBroadcast) |
Modifier and Type | Method and Description |
---|---|
Sequence<VocabWord> |
call(LabelledDocument document) |
instantiateTokenizerFactory
public DocumentSequenceConvertFunction(@NonNull org.apache.spark.broadcast.Broadcast<VectorsConfiguration> configurationBroadcast)
public Sequence<VocabWord> call(LabelledDocument document) throws java.lang.Exception
call
in interface org.apache.spark.api.java.function.Function<LabelledDocument,Sequence<VocabWord>>
java.lang.Exception