RcppGetPCAIndividualsHdf5
C++ Function Reference
1 Signature
void BigDataStatMeth::RcppGetPCAIndividualsHdf5(std::string strPCAgroup, BigDataStatMeth::hdf5Dataset *dsX, BigDataStatMeth::hdf5Dataset *dsd, BigDataStatMeth::hdf5Dataset *dsu, bool overwrite)2 Description
Calculate PCA individuals statistics.
3 Parameters
strPCAgroup(std::string): HDF5 group name for PCA resultsdsX(BigDataStatMeth::hdf5Dataset *): Input data matrix datasetdsd(BigDataStatMeth::hdf5Dataset *): Singular values datasetdsu(BigDataStatMeth::hdf5Dataset *): Left singular vectors datasetoverwrite(bool): Whether to overwrite existing results
4 Details
Computes and stores various PCA statistics for individuals including:DistancesComponent coordinatesIndividual coordinatesCos² quality metricsContributions
5 Call Graph
6 Source Code
NoteImplementation
File: inst/include/hdf5Algebra/matrixPCA.hpp • Lines 188-321
inline void RcppGetPCAIndividualsHdf5( std::string strPCAgroup,
BigDataStatMeth::hdf5Dataset* dsX,
BigDataStatMeth::hdf5Dataset* dsd,
BigDataStatMeth::hdf5Dataset* dsu,
bool overwrite )
{
BigDataStatMeth::hdf5Dataset* dsdist2 = nullptr;
BigDataStatMeth::hdf5Dataset* dsComp = nullptr;
BigDataStatMeth::hdf5Dataset* dscoord = nullptr;
BigDataStatMeth::hdf5Dataset* dscos2 = nullptr;
BigDataStatMeth::hdf5Dataset* dscontrib = nullptr;
try
{
H5::Exception::dontPrint();
// int ielements = 0;
std::vector<hsize_t> stride = {1, 1},
block = {1, 1},
offset = {0, 0},
count_x = {dsX->nrows(), dsX->ncols()},
count_d = {dsd->nrows(), dsd->ncols()},
count_u = {dsu->nrows(), dsu->ncols()};
Eigen::VectorXd dist2;
Eigen::MatrixXd adjust;
double weights = 1/(double)dsX->ncols();
{
std::vector<double> vdX( count_x[0] * count_x[1] );
dsX->readDatasetBlock( {0,0}, count_x, stride, block, vdX.data() );
Eigen::Map<Eigen::Matrix<double, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor>> X (vdX.data(), count_x[0], count_x[1]);
X = RcppNormalizeRowwise(X, true, false);
adjust = Eigen::MatrixXd::Constant(1, X.cols(), weights);
Eigen::RowVectorXd ecart = adjust / adjust.sum() ;
ecart = ecart * (X.unaryExpr([](double d) {return std::pow(d, 2);})).transpose();
ecart = ecart.unaryExpr([](double d) {return std::sqrt(d);});
X = X.array().colwise() / ecart.transpose().array();
dist2 = (X.unaryExpr([](double d) {return std::pow(d, 2);})).colwise().sum();
Eigen::VectorXd dist = dist2.array().sqrt();
dsdist2 = new BigDataStatMeth::hdf5Dataset(dsd->getFileName(), strPCAgroup, "ind.dist" ,overwrite );
dsdist2->createDataset( dist2.rows(), dist2.cols(), "real");
dsdist2->writeDataset( dist.data() );
delete dsdist2; dsdist2 = nullptr;
}
// Load d
std::vector<double> vdd( count_d[0] * count_d[1] );
dsd->readDatasetBlock( offset, count_d, stride, block, vdd.data() );
Eigen::Map<Eigen::Matrix<double, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor>> d (vdd.data(), count_d[0], count_d[1]);
Eigen::MatrixXd var_coord;
{
// Load u
std::vector<double> vdu( count_u[0] * count_u[1] );
dsu->readDatasetBlock( {0, 0}, count_u, stride, block, vdu.data() );
Eigen::Map<Eigen::Matrix<double, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor>> u (vdu.data(), count_u[0], count_u[1]);
u = u * sqrt(1/weights);
// Components
dsComp = new BigDataStatMeth::hdf5Dataset(dsd->getFileName(), strPCAgroup, "components" ,overwrite );
dsComp->createDataset( u.cols(), u.rows(), "real");
dsComp->writeDataset( u.data() );
delete dsComp; dsComp = nullptr;
var_coord = Rcpp_matrixVectorMultiplication_byRow(u, d);
}
// Coord inds
dscoord = new BigDataStatMeth::hdf5Dataset(dsd->getFileName(), strPCAgroup, "ind.coord" ,overwrite );
dscoord->createDataset( var_coord.cols(), var_coord.rows(), "real");
dscoord->writeDataset( Rcpp::wrap(var_coord.transpose()));
delete dscoord; dscoord = nullptr;
// Cos2 inds
Eigen::MatrixXd coord2 = var_coord.unaryExpr([](double d) {return std::pow(d, 2);});
{
Eigen::MatrixXd ind_cos2 = coord2.array().rowwise() / dist2.transpose().array();
dscos2 = new BigDataStatMeth::hdf5Dataset(dsd->getFileName(), strPCAgroup, "ind.cos2" ,overwrite );
dscos2->createDataset( ind_cos2.cols(), ind_cos2.rows(), "real");
dscos2->writeDataset( Rcpp::wrap(ind_cos2.transpose()) );
delete dscos2; dscos2 = nullptr;
}
Eigen::MatrixXd ind_contrib = coord2.array().rowwise() * adjust.row(0).array();
d = d.unaryExpr([](double xd) {return std::pow(xd, 2);});
ind_contrib = ind_contrib.array().colwise() / d.col(0).array();
ind_contrib = ind_contrib.unaryExpr([](double d) {return d*100;});
dscontrib = new BigDataStatMeth::hdf5Dataset(dsd->getFileName(), strPCAgroup, "ind.contrib" ,overwrite );
dscontrib->createDataset( ind_contrib.cols(), ind_contrib.rows(), "real");
dscontrib->writeDataset( Rcpp::wrap(ind_contrib.transpose()) );
delete dscontrib; dscontrib = nullptr;
// Components
// createHardLink(dsu->getFileptr(), dsu->getGroupName() + "/" + dsu->getDatasetName(), strPCAgroup + "/components");
} catch( H5::FileIException& error ) { // catch failure caused by the H5File operations
checkClose_file( dsX, dsd, dsu, dsdist2, dsComp, dscoord, dscos2, dscontrib);
Rcpp::Rcerr<<"\nc++ exception RcppGetPCAIndividualsHdf5 (File IException)\n";
return void();
} catch( H5::DataSetIException& error ) { // catch failure caused by the DataSet operations
checkClose_file( dsX, dsd, dsu, dsdist2, dsComp, dscoord, dscos2, dscontrib);
Rcpp::Rcerr<<"\nc++ exception RcppGetPCAIndividualsHdf5 (DataSet IException)\n";
return void();
} catch( H5::DataSpaceIException& error ) { // catch failure caused by the DataSpace operations
checkClose_file( dsX, dsd, dsu, dsdist2, dsComp, dscoord, dscos2, dscontrib);
Rcpp::Rcerr<<"\nc++ exception RcppGetPCAIndividualsHdf5 (DataSpace IException)\n";
return void();
} catch(std::exception &ex) {
checkClose_file( dsX, dsd, dsu, dsdist2, dsComp, dscoord, dscos2, dscontrib);
Rcpp::Rcerr<<"\nc++ exception RcppGetPCAIndividualsHdf5 \n"<< ex.what();
return void();
} catch (...) {
checkClose_file( dsX, dsd, dsu, dsdist2, dsComp, dscoord, dscos2, dscontrib);
Rcpp::Rcerr<<"\nC++ exception RcppGetPCAIndividualsHdf5 (unknown reason)";
return void();
}
return void();
}7 Usage Example
#include "BigDataStatMeth.hpp"
// Example usage
auto result = RcppGetPCAIndividualsHdf5(...);