RcppGetPCAVariablesHdf5
C++ Function Reference
1 Signature
void BigDataStatMeth::RcppGetPCAVariablesHdf5(std::string strPCAgroup, BigDataStatMeth::hdf5Dataset *dsd, BigDataStatMeth::hdf5Dataset *dsv, bool overwrite)2 Description
Calculate PCA variables statistics.
3 Parameters
strPCAgroup(std::string): HDF5 group name for PCA resultsdsd(BigDataStatMeth::hdf5Dataset *): Singular values datasetdsv(BigDataStatMeth::hdf5Dataset *): Right singular vectors datasetoverwrite(bool): Whether to overwrite existing results
4 Details
Computes and stores various PCA statistics for variables including:Eigenvalues (lambda)Variance contributionsCumulative varianceVariable coordinatesCos² quality metrics
5 Call Graph
6 Source Code
NoteImplementation
File: inst/include/hdf5Algebra/matrixPCA.hpp • Lines 68-178
inline void RcppGetPCAVariablesHdf5( std::string strPCAgroup,
BigDataStatMeth::hdf5Dataset* dsd,
BigDataStatMeth::hdf5Dataset* dsv,
bool overwrite )
{
try
{
H5::Exception::dontPrint();
std::unique_ptr<BigDataStatMeth::hdf5Dataset> dslambda(nullptr);
std::unique_ptr<BigDataStatMeth::hdf5Dataset> dsvar(nullptr);
std::unique_ptr<BigDataStatMeth::hdf5Dataset> dscumvar(nullptr);
std::unique_ptr<BigDataStatMeth::hdf5Dataset> dscoord(nullptr);
std::unique_ptr<BigDataStatMeth::hdf5Dataset> dscos2(nullptr);
// int ielements = 0;
std::vector<hsize_t> stride = {1, 1},
block = {1, 1},
offset_d = {0, 0},
count_d = {dsd->nrows(), dsd->ncols()},
offset_v = {0, 0},
count_v = {dsv->nrows(), dsv->ncols()};
std::vector<double> vdd( count_d[0] * count_d[1] );
dsd->readDatasetBlock( {offset_d[0], offset_d[1]}, {count_d[0], count_d[1]}, stride, block, vdd.data() );
Eigen::Map<Eigen::Matrix<double, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor>> d (vdd.data(), count_d[0], count_d[1]);
{
// lambda
Eigen::VectorXd vvar = d.array().pow(2);
// dslambda = new BigDataStatMeth::hdf5Dataset(dsd->getFileName(), strPCAgroup, "lambda" ,overwrite );
dslambda.reset(new BigDataStatMeth::hdf5Dataset(dsd->getFullPath(), strPCAgroup, "lambda" ,overwrite ));
dslambda->inheritCompressionLevel(dsd->getCompressionLevel());
dslambda->createDataset( d.rows(), d.cols(), "real");
dslambda->writeDataset( vvar.data() );
// delete dslambda; dslambda = nullptr;
// Variance
vvar = (vvar/vvar.array().sum()) * 100;
// dsvar = new BigDataStatMeth::hdf5Dataset(dsd->getFileName(), strPCAgroup, "variance" ,overwrite );
dsvar.reset(new BigDataStatMeth::hdf5Dataset(dsd->getFullPath(), strPCAgroup, "variance" ,overwrite ));
dsvar->inheritCompressionLevel(dsd->getCompressionLevel());
dsvar->createDataset( d.rows(), d.cols(), "real");
dsvar->writeDataset( vvar.data() );
// delete dsvar; dsvar = nullptr;
// cumulative variance (max 1000 elements (firsts))
// if(vvar.size()>1000){ ielements = 1000; }
// else{ ielements = vvar.size(); }
// dscumvar = new BigDataStatMeth::hdf5Dataset(dsd->getFileName(), strPCAgroup, "cumvar" ,overwrite );
dscumvar.reset(new BigDataStatMeth::hdf5Dataset(dsd->getFullPath(), strPCAgroup, "cumvar" ,overwrite ));
dscumvar->inheritCompressionLevel(dsd->getCompressionLevel());
dscumvar->createDataset( d.rows(), d.cols(), "real");
dscumvar->writeDataset( (cumsum(vvar)).data());
// delete dscumvar; dscumvar = nullptr;
}
{
Eigen::MatrixXd var_coord;
{
// Coord vars
std::vector<double> vdv( count_v[0] * count_v[1] );
dsv->readDatasetBlock( {offset_v[0], offset_v[1]}, {count_v[0], count_v[1]}, stride, block, vdv.data() );
Eigen::Map<Eigen::Matrix<double, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor>> v (vdv.data(), count_v[0], count_v[1]);
var_coord = Rcpp_matrixVectorMultiplication_byRow(v, d);
}
// dscoord = new BigDataStatMeth::hdf5Dataset(dsd->getFileName(), strPCAgroup, "var.coord" ,overwrite );
dscoord.reset(new BigDataStatMeth::hdf5Dataset(dsd->getFullPath(), strPCAgroup, "var.coord" ,overwrite ));
dscoord->inheritCompressionLevel(dsd->getCompressionLevel());
dscoord->createDataset( var_coord.rows(), var_coord.cols(), "real");
dscoord->writeDataset( var_coord.transpose().data() );
// delete dscoord; dscoord = nullptr;
// Cos2
Eigen::MatrixXd var_cos2 = var_coord.unaryExpr([](double d) {return std::pow(d, 2);});
// dscos2 = new BigDataStatMeth::hdf5Dataset(dsd->getFileName(), strPCAgroup, "var.cos2" ,overwrite );
dscos2.reset(new BigDataStatMeth::hdf5Dataset(dsd->getFullPath(), strPCAgroup, "var.cos2" ,overwrite ));
dscos2->inheritCompressionLevel(dsd->getCompressionLevel());
dscos2->createDataset( var_cos2.rows(), var_cos2.cols(), "real");
dscos2->writeDataset( var_cos2.transpose().data() );
// delete dscos2; dscos2 = nullptr;
}
} catch( H5::FileIException& error ) {
// checkClose_file(dsd, dsv, dslambda, dsvar, dscumvar, dscoord, dscos2);
throw std::runtime_error("c++ exception RcppGetPCAVariablesHdf5 (File IException)");
} catch( H5::DataSetIException& error ) {
// checkClose_file(dsd, dsv, dslambda, dsvar, dscumvar, dscoord, dscos2);
throw std::runtime_error("c++ exception RcppGetPCAVariablesHdf5 (DataSet IException)");
} catch( H5::DataSpaceIException& error ) {
// checkClose_file(dsd, dsv, dslambda, dsvar, dscumvar, dscoord, dscos2);
throw std::runtime_error("c++ exception RcppGetPCAVariablesHdf5 (DataSpace IException)");
} catch(std::exception &ex) {
// checkClose_file(dsd, dsv, dslambda, dsvar, dscumvar, dscoord, dscos2);
throw std::runtime_error(std::string("c++ exception RcppGetPCAVariablesHdf5: ") + ex.what());
} catch (...) {
// checkClose_file(dsd, dsv, dslambda, dsvar, dscumvar, dscoord, dscos2);
throw std::runtime_error("C++ exception RcppGetPCAVariablesHdf5 (unknown reason)");
}
return void();
}7 Usage Example
#include "BigDataStatMeth.hpp"
// Example usage
auto result = RcppGetPCAVariablesHdf5(...);