public class MathUtils
extends java.lang.Object
Modifier and Type | Field and Description |
---|---|
static double |
log2
The natural logarithm of 2.
|
static double |
SMALL
The small deviation allowed in double comparisons.
|
Constructor and Description |
---|
MathUtils() |
Modifier and Type | Method and Description |
---|---|
static double |
adjustedrSquared(double rSquared,
int numRegressors,
int numDataPoints)
This calculates the adjusted r^2 including degrees of freedom.
|
static double |
bernoullis(double n,
double k,
double successProb)
This will return the bernoulli trial for the given event.
|
static int |
binomial(org.apache.commons.math3.random.RandomGenerator rng,
int n,
double p)
Generates a binomial distributed number using
the given rng
|
org.apache.commons.math3.linear.CholeskyDecomposition |
choleskyFromMatrix(org.apache.commons.math3.linear.RealMatrix m)
This will return the cholesky decomposition of
the given matrix
|
static int |
clamp(int value,
int min,
int max)
Clamps the value to a discrete value
|
static double |
combination(double n,
double r)
This returns the combination of n choose r
|
static java.util.List<double[]> |
coordSplit(double[] vector)
This returns the coordinate split in a list of coordinates
such that the values for ret[0] are the x values
and ret[1] are the y values
|
static java.util.List<double[]> |
coordSplit(java.util.List<java.lang.Double> vector)
This returns the coordinate split in a list of coordinates
such that the values for ret[0] are the x values
and ret[1] are the y values
|
static double |
correlation(double[] residuals,
double[] targetAttribute)
Returns the correlation coefficient of two double vectors.
|
static double |
determinationCoefficient(double[] y1,
double[] y2,
int n)
This returns the determination coefficient of two vectors given a length
|
static int |
discretize(double value,
double min,
double max,
int binCount)
Discretize the given value
|
static int |
distanceFinderZValue(double[] vector)
This will translate a vector in to an equivalent integer
|
static double |
entropy(double[] vector)
This returns the entropy (information gain, or uncertainty of a random variable).
|
static double |
errorFor(double actual,
double prediction) |
static double |
euclideanDistance(double[] p,
double[] q)
This returns the distance of two vectors
sum(i=1,n) (q_i - p_i)^2
|
static double |
euclideanDistance(float[] p,
float[] q)
This returns the distance of two vectors
sum(i=1,n) (q_i - p_i)^2
|
static double |
factorial(double n)
This will return the factorial of the given number n.
|
static double[] |
fromString(java.lang.String data,
java.lang.String separator)
This will take a given string and separator and convert it to an equivalent
double array.
|
static double[] |
generateUniform(int l)
This will generate a series of uniformally distributed
numbers between l times
|
static boolean |
gr(double a,
double b)
Tests if a is greater than b.
|
static double |
hypotenuse(double a,
double b)
sqrt(a^2 + b^2) without under/overflow.
|
static double |
idf(double totalDocs,
double numTimesWordAppearedInADocument)
Inverse document frequency: the total docs divided by the number of times the word
appeared in a document
|
static double |
information(double[] probabilities)
This returns the entropy for a given vector of probabilities.
|
static int |
kroneckerDelta(double i,
double j)
This returns the kronecker delta of two doubles.
|
static double |
log2(double a)
Returns the logarithm of a for base 2.
|
static double[] |
logs2probs(double[] a)
Converts an array containing the natural logarithms of
probabilities stored in a vector back into probabilities.
|
static double |
manhattanDistance(double[] p,
double[] q)
This will calculate the Manhattan distance between two sets of points.
|
static double |
max(double[] doubles) |
static int |
maxIndex(double[] doubles)
Returns index of maximum element in a given
array of doubles.
|
static double |
mean(double[] vector)
Computes the mean for an array of doubles.
|
static double[] |
mergeCoords(double[] x,
double[] y)
This will merge the coordinates of the given coordinate system.
|
static java.util.List<java.lang.Double> |
mergeCoords(java.util.List<java.lang.Double> x,
java.util.List<java.lang.Double> y)
This will merge the coordinates of the given coordinate system.
|
static double |
min(double[] doubles) |
static long |
nextPowOf2(long v)
See: http://stackoverflow.com/questions/466204/rounding-off-to-nearest-power-of-2
|
static void |
normalize(double[] doubles,
double sum)
Normalizes the doubles in the array using the given value.
|
static double |
normalize(double val,
double min,
double max)
Normalize a value
(val - min) / (max - min)
|
static double[] |
normalizeToOne(double[] doubles) |
static java.util.List<java.util.List<java.lang.Double>> |
partitionVariable(java.util.List<java.lang.Double> arr,
int chunk)
This will partition the given whole variable data applyTransformToDestination in to the specified chunk number.
|
static double |
permutation(double n,
double r)
This returns the permutation of n choose r.
|
static int |
probRound(double value,
java.util.Random rand)
Rounds a double to the next nearest integer value in a probabilistic
fashion (e.g.
|
static double |
probToLogOdds(double prob)
Returns the log-odds for a given probability.
|
static double |
randomDoubleBetween(double begin,
double end) |
static float |
randomFloatBetween(float begin,
float end) |
static int |
randomNumberBetween(double begin,
double end)
Generates a random integer between the specified numbers
|
static int |
randomNumberBetween(double begin,
double end,
org.apache.commons.math3.random.RandomGenerator rng)
Generates a random integer between the specified numbers
|
static double |
rootMeansSquaredError(double[] real,
double[] predicted)
This returns the root mean squared error of two data sets
|
static int |
round(double value)
Rounds a double to the next nearest integer value.
|
static double |
roundDouble(double value,
int afterDecimalPoint)
Rounds a double to the given number of decimal places.
|
static float |
roundFloat(float value,
int afterDecimalPoint)
Rounds a double to the given number of decimal places.
|
static double[] |
sampleDoublesInInterval(double[][] doubles,
int l) |
static void |
shuffleArray(int[] array,
long rngSeed) |
static void |
shuffleArray(int[] array,
java.util.Random rng) |
static double |
sigmoid(double x)
1 / 1 + exp(-x)
|
double |
slope(double x1,
double x2,
double y1,
double y2)
This returns the slope of the given points.
|
static boolean |
sm(double a,
double b)
Tests if a is smaller than b.
|
static double |
squaredLoss(double[] x,
double[] y,
double w_0,
double w_1)
This will return the squared loss of the given
points
|
static double |
ssError(double[] predictedValues,
double[] targetAttribute)
How much of the variance is NOT explained by the regression
|
static double |
ssReg(double[] residuals,
double[] targetAttribute)
How much of the variance is explained by the regression
|
static double |
ssTotal(double[] residuals,
double[] targetAttribute)
Total variance in target attribute
|
static double |
stringSimilarity(java.lang.String... strings)
Calculate string similarity with tfidf weights relative to each character
frequency and how many times a character appears in a given string
|
static double |
sum(double[] nums)
This returns the sum of the given array.
|
static double |
sumOfMeanDifferences(double[] vector,
double[] vector2)
Used for calculating top part of simple regression for
beta 1
|
static double |
sumOfMeanDifferencesOnePoint(double[] vector)
Used for calculating top part of simple regression for
beta 1
|
static double |
sumOfProducts(double[]... nums)
This returns the sum of products for the given
numbers.
|
static double |
sumOfSquares(double[] vector)
This returns the sum of squares for the given vector.
|
static double |
tf(int count,
int documentLength)
Term frequency: 1+ log10(count)
|
static double |
tfidf(double tf,
double idf)
Return td * idf
|
static double |
times(double[] nums)
This returns the product of all numbers in the given array.
|
static int |
toDecimal(java.lang.String binary)
This will convert the given binary string to a decimal based
integer
|
static double |
uniform(java.util.Random rng,
double min,
double max)
Generate a uniform random number from the given rng
|
static double |
variance(double[] vector) |
static double |
vectorLength(double[] vector)
Returns the vector length (sqrt(sum(x_i))
|
static double |
w_0(double[] x,
double[] y,
int n) |
static double |
w_1(double[] x,
double[] y,
int n) |
static double[] |
weightsFor(double[] vector)
This returns the minimized loss values for a given vector.
|
static double[] |
weightsFor(java.util.List<java.lang.Double> vector)
This returns the minimized loss values for a given vector.
|
static double[] |
xVals(double[] vector)
This returns the x values of the given vector.
|
static double[] |
yVals(double[] vector)
This returns the odd indexed values for the given vector
|
public static double log2
public static double SMALL
public static double normalize(double val, double min, double max)
val
- value to normalizemax
- max valuemin
- min valuepublic static int clamp(int value, int min, int max)
value
- the value to clampmin
- min for the probability distributionmax
- max for the probability distributionpublic static int discretize(double value, double min, double max, int binCount)
value
- the value to discretizemin
- the min of the distributionmax
- the max of the distributionbinCount
- the number of binspublic static long nextPowOf2(long v)
v
- the number to getFromOrigin the next power of 2 forpublic static int binomial(org.apache.commons.math3.random.RandomGenerator rng, int n, double p)
rng
- n
- p
- public static double uniform(java.util.Random rng, double min, double max)
rng
- the rng to usemin
- the min nummax
- the max numpublic static double correlation(double[] residuals, double[] targetAttribute)
residuals
- residualstargetAttribute
- target attribute vectorpublic static double sigmoid(double x)
x
- public static double ssReg(double[] residuals, double[] targetAttribute)
residuals
- errortargetAttribute
- data for target attributepublic static double ssError(double[] predictedValues, double[] targetAttribute)
predictedValues
- predicted valuestargetAttribute
- data for target attributepublic static double stringSimilarity(java.lang.String... strings)
strings
- the strings to calculate similarity forpublic static double vectorLength(double[] vector)
vector
- the vector to return the vector length forpublic static double idf(double totalDocs, double numTimesWordAppearedInADocument)
totalDocs
- the total documents for the data applyTransformToDestinationnumTimesWordAppearedInADocument
- the number of times the word occurred in a documentpublic static double tf(int count, int documentLength)
count
- the count of a word or character in a given string or documentpublic static double tfidf(double tf, double idf)
tf
- the term frequency (assumed calculated)idf
- inverse document frequency (assumed calculated)public static double ssTotal(double[] residuals, double[] targetAttribute)
residuals
- errortargetAttribute
- data for target attributepublic static double sum(double[] nums)
nums
- the array of numbers to sumpublic static double[] mergeCoords(double[] x, double[] y)
x
- the x coordinatesy
- the y coordinatespublic static java.util.List<java.lang.Double> mergeCoords(java.util.List<java.lang.Double> x, java.util.List<java.lang.Double> y)
x
- the x coordinatesy
- the y coordinatespublic static double[] weightsFor(java.util.List<java.lang.Double> vector)
vector
- the vector of numbers to getFromOrigin the weights forpublic static double squaredLoss(double[] x, double[] y, double w_0, double w_1)
x
- the x coordinates to usey
- the y coordinates to usew_0
- the first weightw_1
- the second weightpublic static double w_1(double[] x, double[] y, int n)
public static double w_0(double[] x, double[] y, int n)
public static double[] weightsFor(double[] vector)
vector
- the vector of numbers to getFromOrigin the weights forpublic static double errorFor(double actual, double prediction)
public static double sumOfMeanDifferences(double[] vector, double[] vector2)
vector
- the x coordinatesvector2
- the y coordinatespublic static double sumOfMeanDifferencesOnePoint(double[] vector)
vector
- the x coordinatespublic static double variance(double[] vector)
public static double times(double[] nums)
nums
- the numbers to multiply overpublic static double sumOfProducts(double[]... nums)
nums
- the sum of products for the give numberspublic static java.util.List<double[]> coordSplit(double[] vector)
vector
- the vector to split with x and y values/public static java.util.List<java.util.List<java.lang.Double>> partitionVariable(java.util.List<java.lang.Double> arr, int chunk)
arr
- the data applyTransformToDestination to pass inchunk
- the number to separate bypublic static java.util.List<double[]> coordSplit(java.util.List<java.lang.Double> vector)
vector
- the vector to split with x and y values
Note that the list will be more stable due to the size operator.
The array version will have extraneous values if not monitored
properly.public static double[] xVals(double[] vector)
vector
- the vector to getFromOrigin the values forpublic static double[] yVals(double[] vector)
vector
- the odd indexed values of rht egiven vectorpublic static double sumOfSquares(double[] vector)
vector
- the vector to obtain the sum of squares forpublic static double determinationCoefficient(double[] y1, double[] y2, int n)
y1
- the first vectory2
- the second vectorn
- the length of both vectorspublic static double log2(double a)
a
- a doublepublic double slope(double x1, double x2, double y1, double y2)
x1
- the first x to usex2
- the end x to usey1
- the begin y to usey2
- the end y to usepublic static double rootMeansSquaredError(double[] real, double[] predicted)
real
- the real valuespredicted
- the predicted valuespublic static double entropy(double[] vector)
vector
- the vector of values to getFromOrigin the entropy forpublic static int kroneckerDelta(double i, double j)
i
- the first number to comparej
- the second number to comparepublic static double adjustedrSquared(double rSquared, int numRegressors, int numDataPoints)
rSquared
- the r squared value to calculatenumRegressors
- number of variablesnumDataPoints
- size of the data applyTransformToDestinationpublic static double[] normalizeToOne(double[] doubles)
public static double min(double[] doubles)
public static double max(double[] doubles)
public static void normalize(double[] doubles, double sum)
doubles
- the array of doublesum
- the value by which the doubles are to be normalizedjava.lang.IllegalArgumentException
- if sum is zero or NaNpublic static double[] logs2probs(double[] a)
a
- an array holding the natural logarithms of the probabilitiespublic static double information(double[] probabilities)
probabilities
- the probabilities to getFromOrigin the entropy forpublic static int maxIndex(double[] doubles)
doubles
- the array of doublespublic static double factorial(double n)
n
- the number to getFromOrigin the factorial forpublic static double probToLogOdds(double prob)
prob
- the probabilitypublic static int round(double value)
value
- the double valuepublic static double permutation(double n, double r)
n
- the n to chooser
- the number of elements to choosepublic static double combination(double n, double r)
n
- the number of elements overallr
- the number of elements to choosepublic static double hypotenuse(double a, double b)
public static int probRound(double value, java.util.Random rand)
value
- the double valuerand
- the random number generatorpublic static double roundDouble(double value, int afterDecimalPoint)
value
- the double valueafterDecimalPoint
- the number of digits after the decimal pointpublic static float roundFloat(float value, int afterDecimalPoint)
value
- the double valueafterDecimalPoint
- the number of digits after the decimal pointpublic static double bernoullis(double n, double k, double successProb)
n
- the number of trialsk
- the number of times the target event occurssuccessProb
- the probability of the event happeningpublic static boolean sm(double a, double b)
a
- a doubleb
- a doublepublic static boolean gr(double a, double b)
a
- a doubleb
- a doublepublic static double[] fromString(java.lang.String data, java.lang.String separator)
data
- the data to separateseparator
- the separator to usepublic static double mean(double[] vector)
vector
- the arraypublic org.apache.commons.math3.linear.CholeskyDecomposition choleskyFromMatrix(org.apache.commons.math3.linear.RealMatrix m) throws java.lang.Exception
m
- the matrix to convertorg.apache.commons.math3.linear.NonSquareMatrixException
java.lang.Exception
public static int toDecimal(java.lang.String binary)
binary
- the binary string to convertpublic static int distanceFinderZValue(double[] vector)
vector
- the vector to translatepublic static double euclideanDistance(double[] p, double[] q)
p
- the first vectorq
- the second vectorpublic static double euclideanDistance(float[] p, float[] q)
p
- the first vectorq
- the second vectorpublic static double[] generateUniform(int l)
l
- the number of numbers to generatepublic static double manhattanDistance(double[] p, double[] q)
p
- the first point vectorq
- the second point vectorpublic static double[] sampleDoublesInInterval(double[][] doubles, int l)
public static int randomNumberBetween(double begin, double end)
begin
- the begin of the intervalend
- the end of the intervalpublic static int randomNumberBetween(double begin, double end, org.apache.commons.math3.random.RandomGenerator rng)
begin
- the begin of the intervalend
- the end of the intervalpublic static float randomFloatBetween(float begin, float end)
public static double randomDoubleBetween(double begin, double end)
public static void shuffleArray(int[] array, long rngSeed)
public static void shuffleArray(int[] array, java.util.Random rng)