16 #include <Eigen/Dense>
34 double sample_var(
const Ref<const VectorXd> vec);
53 void regressOutLS(VectorXd& signal,
const MatrixXd& X,
const MatrixXd& covInv)
55 VectorXd beta = covInv*X.transpose()*signal;
62 static std::random_device rd;
63 static std::default_random_engine rng(rd());
64 std::normal_distribution<double> rdist(0,1);
66 for(
size_t cc=0; cc<m.cols(); cc++) {
67 for(
size_t rr=0; rr<m.rows(); rr++) {
68 m(rr, cc) = rdist(rng);
88 double gaussianPDF(
double mean,
double sd,
double x);
99 double gaussianCDF(
double mean,
double sd,
double x);
122 double gammaPDF_MS(
double mean,
double sd,
double x);
147 double correlation(
size_t len,
double* a,
double* b);
163 return (sumsqr-sum*sum/count)/(count-1);
180 double sumsq1,
double sumsq2,
double s1s2)
182 return (count*s1s2-sum1*sum2)/
183 sqrt((count*sumsq1-sum1*sum1)*(count*sumsq2-sum2*sum2));
255 StudentsT(
int dof = 2,
double dt = 0.1,
double tmax = 20);
309 double density(
double t)
const;
328 double icdf(
double t)
const;
346 std::vector<double> m_cdf;
347 std::vector<double> m_pdf;
348 std::vector<double> m_tvals;
371 const Ref<const VectorXd> y,
372 const Ref<const MatrixXd> X,
373 const Ref<const VectorXd> covInv,
374 const Ref<const MatrixXd> Xinv,
375 const StudentsT& distrib);
391 const Ref<const VectorXd> y,
392 const Ref<const MatrixXd> X);
411 size_t subsize,
size_t poweriters, MatrixXd& U, VectorXd& E,
415 double tol,
size_t startrank,
size_t maxrank,
size_t poweriters,
416 MatrixXd& U, VectorXd& E, MatrixXd& V);
438 MatrixXd
pca(
const Ref<const MatrixXd> X,
double varth = 1,
int odim = -1);
461 MatrixXd
rpiPCA(
const Ref<const MatrixXd> X,
double varth,
int odim);
492 MatrixXd
symICA(
const Ref<const MatrixXd> Xin, MatrixXd* unmix = NULL);
524 MatrixXd
asymICA(
const Ref<const MatrixXd> Xin, MatrixXd* unmix = NULL);
555 const Ref<const VectorXd> y,
double gamma);
577 const Ref<const VectorXd> y,
double gamma);
592 MatrixXd
pcacov(
const Ref<const MatrixXd> cov,
double varth);
615 void expMax1D(
const Ref<const VectorXd> data,
616 vector<std::function<
double(
double,
double,
double)>> pdfs,
617 Ref<VectorXd> mean, Ref<VectorXd> sd, Ref<VectorXd> prior,
618 std::string plotfile =
"");
636 Ref<VectorXd> mu, Ref<VectorXd> sd, Ref<VectorXd> prior,
637 std::string plotfile);
651 size_t nclass, MatrixXd& means);
675 Eigen::VectorXi
classify(
const Ref<const MatrixXd> samples) = 0;
688 size_t classify(
const Ref<const MatrixXd> samples, Ref<VectorXi> oclass) = 0;
703 int update(
const Ref<const MatrixXd> samples,
bool reinit =
false) = 0;
713 void compute(
const Ref<const MatrixXd> samples)
748 KMeans(
size_t rank,
size_t k = 2);
756 void setk(
size_t ngroups);
764 void updateMeans(
const Ref<const MatrixXd> newmeans);
774 void updateMeans(
const Ref<const MatrixXd> samples,
775 const Ref<const Eigen::VectorXi> classes);
785 VectorXi
classify(
const Ref<const MatrixXd> samples);
795 size_t classify(
const Ref<const MatrixXd> samples, Ref<VectorXi> oclass);
809 int update(
const Ref<const MatrixXd> samples,
bool reinit =
false);
842 ExpMax(
size_t rank,
size_t k = 2);
850 void setk(
size_t ngroups);
864 void updateMeanCovTau(
const Ref<const MatrixXd> newmeans,
const Ref<const MatrixXd> newcovs,
865 const Ref<const VectorXd> tau);
874 void updateMeanCovTau(
const Ref<const MatrixXd> samples, Ref<MatrixXd> prob);
886 double expectation(
const Ref<const MatrixXd> samples, Ref<MatrixXd> prob);
896 Eigen::VectorXi
classify(
const Ref<const MatrixXd> samples);
906 size_t classify(
const Ref<const MatrixXd> samples, Ref<VectorXi> oclass);
920 int update(
const Ref<const MatrixXd> samples,
bool reinit =
false);
990 double outthresh, Eigen::VectorXi& classes,
bool brute =
false);
1006 Eigen::VectorXf& rho, Eigen::VectorXf& delta,
1007 Eigen::VectorXi& parent);
1023 Eigen::VectorXf& rho, Eigen::VectorXf& delta,
1024 Eigen::VectorXi& parent);
1032 #endif // STATISTICS_H
MatrixXd pca(const Ref< const MatrixXd > X, double varth=1, int odim=-1)
Computes the Principal Components of input matrix X.
void setk(size_t ngroups)
Update the number of groups. Note that this invalidates any current information.
double sigmahat
sigma hat - estimate standard deviation of noise
Classifier(size_t rank)
Initializes the classifier.
void setMaxT(double tmax)
Set the maximum t for numerical integration, and recompute the cdf/pdf caches.
ExpMax(size_t rank, size_t k=2)
Constructor for k-means class.
double cumulative(double t) const
Get the cumulative probability at some t value.
double sample_var(const Ref< const VectorXd > vec)
Computes the statistical variance of a column vector.
VectorXd p
Significance of each of the regressors.
double gammaPDF_MS(double mean, double sd, double x)
PDF for the gamma distribution, if mean is negative then it is assumed that x should be negated as we...
MatrixXd asymICA(const Ref< const MatrixXd > Xin, MatrixXd *unmix=NULL)
Computes the Independent Components of input matrix X using sequential component extraction. Note that you should run PCA on X before running ICA.
void regress(RegrResult *out, const Ref< const VectorXd > y, const Ref< const MatrixXd > X, const Ref< const VectorXd > covInv, const Ref< const MatrixXd > Xinv, const StudentsT &distrib)
Computes the Ordinary Least Square predictors, beta for.
double pdf(double t) const
Get the probability density at some t value.
void updateMeanCovTau(const Ref< const MatrixXd > newmeans, const Ref< const MatrixXd > newcovs, const Ref< const VectorXd > tau)
Sets the mean matrix. Each row of the matrix is a ND-mean, where N is the number of columns...
double tthresh(double p) const
Get the T-score that corresponds to a particular p-value. Alias for icdf.
double density(double t) const
Get the probability density at some t value.
double icdf(double t) const
Get the T-score that corresponds to a particular p-value. Alias for tthresh.
int findDensityPeaks(const Eigen::MatrixXf &samples, double thresh, Eigen::VectorXf &rho, Eigen::VectorXf &delta, Eigen::VectorXi &parent)
Computes Density and Peak computation for Fast Search and Find of Density Peaks algorithm.
int fastSearchFindDP(const Eigen::MatrixXf &samples, double thresh, double outthresh, Eigen::VectorXi &classes, bool brute=false)
Algorithm of unsupervised learning (clustering) based on density.
double adj_rsqr
Coefficient of determination, corrected for the number of regressors.
Student's T-distribution. A cache of the Probability Density Function and cumulative density function...
virtual int update(const Ref< const MatrixXd > samples, bool reinit=false)=0
Updates the classifier with new samples, if reinit is true then no prior information will be used...
Expectation Maximization With Gaussian Mixture Model.
void expMax1D(const Ref< const VectorXd > data, vector< std::function< double(double, double, double)>> pdfs, Ref< VectorXd > mean, Ref< VectorXd > sd, Ref< VectorXd > prior, std::string plotfile="")
Computes the mean and standard deviation of multiple distributions based on 1D data. The probability distribution functions should be passed in through a vector of function objects (pdfs) taking mu/sd/x.
KMeans(size_t rank, size_t k=2)
Constructor for k-means class.
double gaussianCDF(double mean, double sd, double x)
1D Gaussian cumulative distribution function
void setDOF(double dof)
Change the degress of freedom, update cache.
double ssres
Sum of square of the residuals.
int findDensityPeaks_brute(const Eigen::MatrixXf &samples, double thresh, Eigen::VectorXf &rho, Eigen::VectorXf &delta, Eigen::VectorXi &parent)
Computes Density and Peak computation for Fast Search and Find of Density Peaks algorithm. This is a slower, non-bin based version.
void setStepT(double dt)
Step in t to use for computing the CDF, smaller means more precision although in reality the distribu...
VectorXd shootingRegr(const Ref< const MatrixXd > X, const Ref< const VectorXd > y, double gamma)
Performs LASSO regression using the 'shooting' algorithm of.
bool m_valid
Whether the classifier has been initialized yet.
VectorXd std_err
Standard errors for each of the regressors.
virtual Eigen::VectorXi classify(const Ref< const MatrixXd > samples)=0
Given a matrix of samples (Samples x Dims, sample on each row), apply the classifier to each sample a...
int update(const Ref< const MatrixXd > samples, bool reinit=false)
Updates the classifier with new samples, if reinit is true then no prior information will be used...
void regressOutLS(VectorXd &signal, const MatrixXd &X, const MatrixXd &covInv)
Removes the effects of X from signal (y). Note that this takes both X and the pseudoinverse of X beca...
double gaussianPDF(double mean, double sd, double x)
1D Gaussian distribution function
double cdf(double t) const
Get the cumulative probability at some t value.
double rsqr
Coefficient of determination (Rsqr)
Eigen::VectorXi classify(const Ref< const MatrixXd > samples)
Given a matrix of samples (Samples x Dims, sample on each row), apply the classifier to each sample a...
void updateMeans(const Ref< const MatrixXd > newmeans)
Sets the mean matrix. Each row of the matrix is a ND-mean, where N is the number of columns...
Base class for all ND classifiers.
void randomizePowerIterationSVD(const Ref< const MatrixXd > A, size_t subsize, size_t poweriters, MatrixXd &U, VectorXd &E, MatrixXd &V)
double mutualInformation(size_t len, double *a, double *b, size_t mbin)
Computes mutual information between signal a and signal b which are of length len. Marginal bins used is mbin.
StudentsT(int dof=2, double dt=0.1, double tmax=20)
Defualt constructor takes the degrees of freedom (Nu), step size for numerical CDF computation and Ma...
double sample_corr(int count, double sum1, double sum2, double sumsq1, double sumsq2, double s1s2)
Computes the sample correlation.
MatrixXd pcacov(const Ref< const MatrixXd > cov, double varth)
Computes the Principal Components of input matrix X using the covariance matrix.
VectorXi classify(const Ref< const MatrixXd > samples)
Given a matrix of samples (Samples x Dims, sample on each row), apply the classifier to each sample a...
VectorXd t
Students t score of each of the regressors.
void compute(const Ref< const MatrixXd > samples)
Alias for updateClasses with reinit = true. This will perform a classification scheme on all the inpu...
double correlation(size_t len, double *a, double *b)
Computes correlation between signal a and signal b which are of length len.
VectorXd bhat
Estimated Beta.
const int ndim
Number of dimensions, must be set at construction. This is the number of columns in input samples...
void setk(size_t ngroups)
Update the number of groups. Note that this invalidates any current information.
MatrixXd symICA(const Ref< const MatrixXd > Xin, MatrixXd *unmix=NULL)
Computes the Independent Components of input matrix X using symmetric decorrlation. Note that you should run PCA on X before running ICA.
int update(const Ref< const MatrixXd > samples, bool reinit=false)
Updates the classifier with new samples, if reinit is true then no prior information will be used...
int maxit
Maximum number of iterations. Set below 0 for infinite.
double expectation(const Ref< const MatrixXd > samples, Ref< MatrixXd > prob)
Given a matrix of samples (Samples x Dims, sample on each row), apply the classifier to each sample a...
MatrixXd pseudoInverse(const Ref< const MatrixXd > X)
Computes the pseudoinverse of the input matrix.
void gaussGammaMixtureModel(const Ref< const VectorXd > data, Ref< VectorXd > mu, Ref< VectorXd > sd, Ref< VectorXd > prior, std::string plotfile)
Computes the mean and standard deviation of multiple distributions based on 1D data. This is a special version in which a negative gamma, a gaussian and positive gamma. The means of the negative and positive gamma are relative to the center of the gaussian.
void fillGaussian(ptr< NDArray > inout)
Fills image with the linear index at each pixel.
const MatrixXd & getCovs()
Returns the current mean matrix.
void approxKMeans(const Ref< const MatrixXd > samples, size_t nclass, MatrixXd &means)
Approximates k-means using the algorithm of:
MatrixXd rpiPCA(const Ref< const MatrixXd > X, double varth, int odim)
Computes the Principal Components of input matrix X using the randomized power iteration SVD algorith...
const MatrixXd & getMeans()
Returns the current mean matrix.
VectorXd activeShootingRegr(const Ref< const MatrixXd > X, const Ref< const VectorXd > y, double gamma)
Performs LASSO regression using the 'activeShooting' algorithm of.
const MatrixXd & getMeans()
Returns the current mean matrix.
double dof
Degrees of freedom in the regression.
VectorXd yhat
Predicted y values, based on estimate of Beta.