public class CategoryColumn extends AbstractColumn implements CategoryFilters, CategoryColumnUtils, java.lang.Iterable<java.lang.String>
Modifier and Type | Field and Description |
---|---|
static java.lang.String |
MISSING_VALUE |
it.unimi.dsi.fastutil.ints.IntComparator |
rowComparator |
isMissing, isNotMissing
Constructor and Description |
---|
CategoryColumn(ColumnMetadata metadata) |
CategoryColumn(java.lang.String name,
int size) |
Modifier and Type | Method and Description |
---|---|
void |
add(java.lang.String stringValue) |
void |
addAll(java.util.List<java.lang.String> stringValues)
Add all the strings in the list to this column
|
void |
addCell(java.lang.String object) |
void |
append(Column column) |
CategoryColumn |
appendString(CategoryColumn append)
Return a copy of this column
with the given string appended
|
CategoryColumn |
appendString(java.lang.String append)
Return a copy of this column
with the given string appended
|
byte[] |
asBytes(int rowNumber)
Returns the contents of the cell at rowNumber as a byte[]
|
java.util.Set<java.lang.String> |
asSet() |
java.util.List<java.lang.String> |
bottom(int n)
Returns the smallest ("bottom") n values in the column
|
int |
byteSize()
Returns the width of a cell in this column, in bytes
|
void |
clear() |
boolean |
contains(java.lang.String aString)
Returns true if this column contains a cell with the given string, and false otherwise
|
static java.lang.String |
convert(java.lang.String stringValue) |
CategoryColumn |
copy()
Returns a deep copy of the receiver
|
Table |
countByCategory() |
int |
countMissing()
Returns the count of missing values in this column
|
int |
countUnique()
Returns the count of unique values in this column
|
static CategoryColumn |
create(java.lang.String name) |
static CategoryColumn |
create(java.lang.String name,
int size) |
static CategoryColumn |
create(java.lang.String name,
java.util.List<java.lang.String> categories) |
it.unimi.dsi.fastutil.ints.IntArrayList |
data()
Returns the integers that back this column
|
DictionaryMap |
dictionaryMap() |
CategoryColumn |
emptyCopy()
Returns a copy of the receiver with no data.
|
CategoryColumn |
emptyCopy(int rowSize)
Returns an empty copy of the receiver, with its internal storage initialized to the given row size
|
java.lang.String |
get(int rowIndex)
Returns the value at rowIndex in this column.
|
java.util.List<BooleanColumn> |
getDummies()
Returns a list of boolean columns suitable for use as dummy variables in, for example, regression analysis,
selectWhere a column of categorical data must be encoded as a list of columns, such that each column represents
a single
category and indicates whether it is present (1) or not present (0)
|
int |
getInt(int rowNumber) |
java.lang.String |
getString(int row)
Returns a string representation of the value at the given row
|
it.unimi.dsi.fastutil.ints.IntArrayList |
getValues(it.unimi.dsi.fastutil.ints.IntArrayList indexes)
Returns all the values associated with the given indexes
|
int[] |
indexes()
Return the raw indexes
that this column contains.
|
void |
initializeWith(it.unimi.dsi.fastutil.ints.IntArrayList list,
DictionaryMap map)
Initializes this Column with the given values for performance
|
boolean |
isEmpty()
Returns true if the column has no data
|
Selection |
isEqualTo(java.lang.String string) |
Selection |
isIn(java.lang.String... strings) |
Selection |
isInSet(java.util.Collection<java.lang.String> values2) |
Selection |
isMissing() |
Selection |
isNotEqualTo(java.lang.String string) |
Selection |
isNotInSet(java.util.Collection<java.lang.String> values2) |
Selection |
isNotMissing() |
java.util.Iterator<java.lang.String> |
iterator() |
java.lang.String |
print() |
CategoryColumn |
replaceAll(java.lang.String[] regexArray,
java.lang.String replacement)
Return a copy of this column
with the given regular expressions array
applied to a find and replace
|
it.unimi.dsi.fastutil.ints.IntComparator |
rowComparator() |
Selection |
select(StringBiPredicate predicate,
java.lang.String value) |
Selection |
select(StringPredicate predicate) |
CategoryColumn |
selectIf(StringPredicate predicate) |
void |
set(int rowIndex,
java.lang.String stringValue) |
int |
size()
Returns the number of elements (a.k.a.
|
void |
sortAscending() |
void |
sortDescending() |
Table |
summary() |
IntColumn |
toIntColumn() |
CategoryColumn |
tokenizeAndRemoveDuplicates() |
CategoryColumn |
tokenizeAndSort()
Splits on Whitespace and returns the lexicographically sorted result
|
CategoryColumn |
tokenizeAndSort(java.lang.String separator) |
java.util.List<java.lang.String> |
toList() |
java.util.List<java.lang.String> |
top(int n)
Returns the largest ("top") n values in the column
|
java.lang.String |
toString() |
ColumnType |
type()
Returns this column's ColumnType
|
CategoryColumn |
unique()
Returns a column of the same type as the receiver, containing only the unique values of the receiver
|
it.unimi.dsi.fastutil.ints.IntArrayList |
values()
Returns the integer encoded value of each cell in this column.
|
columnMetadata, columnWidth, comment, difference, id, metadata, name, setComment, setName
clone, equals, finalize, getClass, hashCode, notify, notifyAll, wait, wait, wait
empty, endsWith, equalToIgnoringCase, hasLengthEqualTo, isAlpha, isAlphaNumeric, isLongerThan, isLowerCase, isNumeric, isShorterThan, isUpperCase, matchesRegex, startsWith, stringContains
abbreviate, commonPrefix, commonSuffix, distance, join, lowerCase, padEnd, padStart, replaceAll, replaceFirst, substring, substring, trim, upperCase
appendAll, appendAll
columnMetadata, columnWidth, comment, difference, first, first, id, last, last, metadata, name, setComment, setName, subset, title, toDoubleArray
public static final java.lang.String MISSING_VALUE
public final it.unimi.dsi.fastutil.ints.IntComparator rowComparator
public CategoryColumn(ColumnMetadata metadata)
public CategoryColumn(java.lang.String name, int size)
public static CategoryColumn create(java.lang.String name)
public static CategoryColumn create(java.lang.String name, int size)
public static CategoryColumn create(java.lang.String name, java.util.List<java.lang.String> categories)
public ColumnType type()
Column
public java.lang.String getString(int row)
Column
public CategoryColumn emptyCopy()
Column
public CategoryColumn emptyCopy(int rowSize)
Column
public void sortAscending()
sortAscending
in interface Column
public void sortDescending()
sortDescending
in interface Column
public int size()
size
in interface Column
size
in interface CategoryReduceUtils
public java.lang.String get(int rowIndex)
java.lang.IndexOutOfBoundsException
- if the given rowIndex is not in the columnpublic java.util.List<java.lang.String> toList()
public Table countByCategory()
public void set(int rowIndex, java.lang.String stringValue)
public int countUnique()
Column
countUnique
in interface Column
public java.util.List<java.lang.String> top(int n)
n
- The maximum number of records to return. The actual number will be smaller if n is greater than the
number of observations in the columnpublic java.util.List<java.lang.String> bottom(int n)
n
- The maximum number of records to return. The actual number will be smaller if n is greater than the
number of observations in the columnpublic void add(java.lang.String stringValue)
public void initializeWith(it.unimi.dsi.fastutil.ints.IntArrayList list, DictionaryMap map)
public boolean contains(java.lang.String aString)
public it.unimi.dsi.fastutil.ints.IntArrayList getValues(it.unimi.dsi.fastutil.ints.IntArrayList indexes)
public void addAll(java.util.List<java.lang.String> stringValues)
public static java.lang.String convert(java.lang.String stringValue)
public void addCell(java.lang.String object)
addCell
in interface Column
addCell
in class AbstractColumn
public it.unimi.dsi.fastutil.ints.IntComparator rowComparator()
rowComparator
in interface Column
public boolean isEmpty()
Column
public Selection isInSet(java.util.Collection<java.lang.String> values2)
public Selection isEqualTo(java.lang.String string)
public Selection isNotInSet(java.util.Collection<java.lang.String> values2)
public Selection isNotEqualTo(java.lang.String string)
public java.util.List<BooleanColumn> getDummies()
public int getInt(int rowNumber)
public CategoryColumn unique()
Column
public it.unimi.dsi.fastutil.ints.IntArrayList data()
public IntColumn toIntColumn()
public DictionaryMap dictionaryMap()
dictionaryMap
in interface CategoryColumnUtils
public java.lang.String toString()
toString
in class java.lang.Object
public int[] indexes()
public CategoryColumn appendString(CategoryColumn append)
append
- the string to appendpublic CategoryColumn appendString(java.lang.String append)
append
- the string to appendpublic CategoryColumn replaceAll(java.lang.String[] regexArray, java.lang.String replacement)
regexArray
- the regex array to replacereplacement
- the replacement arraypublic CategoryColumn tokenizeAndSort(java.lang.String separator)
public CategoryColumn tokenizeAndSort()
public CategoryColumn tokenizeAndRemoveDuplicates()
public Selection isNotMissing()
isNotMissing
in interface Column
public Selection select(StringPredicate predicate)
public Selection select(StringBiPredicate predicate, java.lang.String value)
public CategoryColumn copy()
Column
public int countMissing()
countMissing
in interface Column
public java.util.Iterator<java.lang.String> iterator()
iterator
in interface java.lang.Iterable<java.lang.String>
public CategoryColumn selectIf(StringPredicate predicate)
public java.util.Set<java.lang.String> asSet()
public it.unimi.dsi.fastutil.ints.IntArrayList values()
values
in interface CategoryColumnUtils
public int byteSize()
Column
public byte[] asBytes(int rowNumber)
public Selection isIn(java.lang.String... strings)