RcppGetPCAVariablesHdf5
C++ Function Reference
1 Signature
void BigDataStatMeth::RcppGetPCAVariablesHdf5(std::string strPCAgroup, BigDataStatMeth::hdf5Dataset *dsd, BigDataStatMeth::hdf5Dataset *dsv, bool overwrite)2 Description
Calculate PCA variables statistics.
3 Parameters
strPCAgroup(std::string): HDF5 group name for PCA resultsdsd(BigDataStatMeth::hdf5Dataset *): Singular values datasetdsv(BigDataStatMeth::hdf5Dataset *): Right singular vectors datasetoverwrite(bool): Whether to overwrite existing results
4 Details
Computes and stores various PCA statistics for variables including:Eigenvalues (lambda)Variance contributionsCumulative varianceVariable coordinatesCos² quality metrics
5 Call Graph
6 Source Code
NoteImplementation
File: inst/include/hdf5Algebra/matrixPCA.hpp • Lines 66-168
inline void RcppGetPCAVariablesHdf5( std::string strPCAgroup,
BigDataStatMeth::hdf5Dataset* dsd,
BigDataStatMeth::hdf5Dataset* dsv,
bool overwrite )
{
BigDataStatMeth::hdf5Dataset* dslambda = nullptr;
BigDataStatMeth::hdf5Dataset* dsvar = nullptr;
BigDataStatMeth::hdf5Dataset* dscumvar = nullptr;
BigDataStatMeth::hdf5Dataset* dscoord = nullptr;
BigDataStatMeth::hdf5Dataset* dscos2 = nullptr;
try
{
H5::Exception::dontPrint();
// int ielements = 0;
std::vector<hsize_t> stride = {1, 1},
block = {1, 1},
offset_d = {0, 0},
count_d = {dsd->nrows(), dsd->ncols()},
offset_v = {0, 0},
count_v = {dsv->nrows(), dsv->ncols()};
std::vector<double> vdd( count_d[0] * count_d[1] );
dsd->readDatasetBlock( {offset_d[0], offset_d[1]}, {count_d[0], count_d[1]}, stride, block, vdd.data() );
Eigen::Map<Eigen::Matrix<double, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor>> d (vdd.data(), count_d[0], count_d[1]);
{
// lambda
Eigen::VectorXd vvar = d.array().pow(2);
dslambda = new BigDataStatMeth::hdf5Dataset(dsd->getFileName(), strPCAgroup, "lambda" ,overwrite );
dslambda->createDataset( d.rows(), d.cols(), "real");
dslambda->writeDataset( vvar.data() );
delete dslambda; dslambda = nullptr;
// Variance
vvar = (vvar/vvar.array().sum()) * 100;
dsvar = new BigDataStatMeth::hdf5Dataset(dsd->getFileName(), strPCAgroup, "variance" ,overwrite );
dsvar->createDataset( d.rows(), d.cols(), "real");
dsvar->writeDataset( vvar.data() );
delete dsvar; dsvar = nullptr;
// cumulative variance (max 1000 elements (firsts))
// if(vvar.size()>1000){ ielements = 1000; }
// else{ ielements = vvar.size(); }
dscumvar = new BigDataStatMeth::hdf5Dataset(dsd->getFileName(), strPCAgroup, "cumvar" ,overwrite );
dscumvar->createDataset( d.rows(), d.cols(), "real");
dscumvar->writeDataset( (cumsum(vvar)).data());
delete dscumvar; dscumvar = nullptr;
}
{
Eigen::MatrixXd var_coord;
{
// Coord vars
std::vector<double> vdv( count_v[0] * count_v[1] );
dsv->readDatasetBlock( {offset_v[0], offset_v[1]}, {count_v[0], count_v[1]}, stride, block, vdv.data() );
Eigen::Map<Eigen::Matrix<double, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor>> v (vdv.data(), count_v[0], count_v[1]);
var_coord = Rcpp_matrixVectorMultiplication_byRow(v, d);
}
dscoord = new BigDataStatMeth::hdf5Dataset(dsd->getFileName(), strPCAgroup, "var.coord" ,overwrite );
dscoord->createDataset( var_coord.rows(), var_coord.cols(), "real");
dscoord->writeDataset( var_coord.transpose().data() );
delete dscoord; dscoord = nullptr;
// Cos2
Eigen::MatrixXd var_cos2 = var_coord.unaryExpr([](double d) {return std::pow(d, 2);});
dscos2 = new BigDataStatMeth::hdf5Dataset(dsd->getFileName(), strPCAgroup, "var.cos2" ,overwrite );
dscos2->createDataset( var_cos2.rows(), var_cos2.cols(), "real");
dscos2->writeDataset( var_cos2.transpose().data() );
delete dscos2; dscos2 = nullptr;
}
} catch( H5::FileIException& error ) {
checkClose_file(dsd, dsv, dslambda, dsvar, dscumvar, dscoord, dscos2);
Rcpp::Rcerr<<"\nc++ exception RcppGetPCAVariablesHdf5 (File IException)\n";
return void();
} catch( H5::DataSetIException& error ) {
checkClose_file(dsd, dsv, dslambda, dsvar, dscumvar, dscoord, dscos2);
Rcpp::Rcerr<<"\nc++ exception RcppGetPCAVariablesHdf5 (DataSet IException)\n";
return void();
} catch( H5::DataSpaceIException& error ) {
checkClose_file(dsd, dsv, dslambda, dsvar, dscumvar, dscoord, dscos2);
Rcpp::Rcerr<<"\nc++ exception RcppGetPCAVariablesHdf5 (DataSpace IException)\n";
return void();
} catch(std::exception &ex) {
checkClose_file(dsd, dsv, dslambda, dsvar, dscumvar, dscoord, dscos2);
Rcpp::Rcout<<"c++ exception RcppGetPCAVariablesHdf5 \n"<< ex.what();
return void();
} catch (...) {
checkClose_file(dsd, dsv, dslambda, dsvar, dscumvar, dscoord, dscos2);
Rcpp::Rcout<<"\nC++ exception RcppGetPCAVariablesHdf5 (unknown reason)";
return void();
}
return void();
}7 Usage Example
#include "BigDataStatMeth.hpp"
// Example usage
auto result = RcppGetPCAVariablesHdf5(...);