RcppGetPCAVariablesHdf5

C++ Function Reference

1 Signature

void BigDataStatMeth::RcppGetPCAVariablesHdf5(std::string strPCAgroup, BigDataStatMeth::hdf5Dataset *dsd, BigDataStatMeth::hdf5Dataset *dsv, bool overwrite)

2 Description

Calculate PCA variables statistics.

3 Parameters

  • strPCAgroup (std::string): HDF5 group name for PCA results
  • dsd (BigDataStatMeth::hdf5Dataset *): Singular values dataset
  • dsv (BigDataStatMeth::hdf5Dataset *): Right singular vectors dataset
  • overwrite (bool): Whether to overwrite existing results

4 Details

Computes and stores various PCA statistics for variables including:Eigenvalues (lambda)Variance contributionsCumulative varianceVariable coordinatesCos² quality metrics

5 Call Graph

Function dependencies

6 Source Code

File: inst/include/hdf5Algebra/matrixPCA.hppLines 66-168

inline void RcppGetPCAVariablesHdf5( std::string strPCAgroup, 
                                  BigDataStatMeth::hdf5Dataset* dsd, 
                                  BigDataStatMeth::hdf5Dataset* dsv, 
                                  bool overwrite )
    {
        
        BigDataStatMeth::hdf5Dataset* dslambda = nullptr;
        BigDataStatMeth::hdf5Dataset* dsvar = nullptr;
        BigDataStatMeth::hdf5Dataset* dscumvar = nullptr;
        BigDataStatMeth::hdf5Dataset* dscoord = nullptr;
        BigDataStatMeth::hdf5Dataset* dscos2 = nullptr;
        
        try
        {
            H5::Exception::dontPrint();
            
            // int ielements = 0;
            std::vector<hsize_t> stride = {1, 1},
                                 block = {1, 1},
                                 offset_d = {0, 0},
                                 count_d = {dsd->nrows(), dsd->ncols()},
                                 offset_v = {0, 0},
                                 count_v = {dsv->nrows(), dsv->ncols()};
            
            std::vector<double> vdd( count_d[0] * count_d[1] );
            dsd->readDatasetBlock( {offset_d[0], offset_d[1]}, {count_d[0], count_d[1]}, stride, block, vdd.data() );
            Eigen::Map<Eigen::Matrix<double, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor>> d (vdd.data(), count_d[0], count_d[1]);
            
            {    
                // lambda
                Eigen::VectorXd vvar = d.array().pow(2);
                dslambda = new BigDataStatMeth::hdf5Dataset(dsd->getFileName(), strPCAgroup, "lambda" ,overwrite );
                dslambda->createDataset( d.rows(), d.cols(), "real");
                dslambda->writeDataset( vvar.data() );
                delete dslambda; dslambda = nullptr;
                
                // Variance
                vvar = (vvar/vvar.array().sum()) * 100;
                dsvar = new BigDataStatMeth::hdf5Dataset(dsd->getFileName(), strPCAgroup, "variance" ,overwrite );
                dsvar->createDataset( d.rows(), d.cols(), "real");
                dsvar->writeDataset( vvar.data() );
                delete dsvar; dsvar = nullptr;
                
                // cumulative variance (max 1000 elements (firsts))
                // if(vvar.size()>1000){  ielements = 1000;    }
                // else{ ielements = vvar.size();    }
                
                dscumvar = new BigDataStatMeth::hdf5Dataset(dsd->getFileName(), strPCAgroup, "cumvar" ,overwrite );
                dscumvar->createDataset( d.rows(), d.cols(), "real");
                dscumvar->writeDataset( (cumsum(vvar)).data());
                delete dscumvar; dscumvar = nullptr;
            }
            
            {
                Eigen::MatrixXd var_coord;
                {
                    // Coord vars
                    std::vector<double> vdv( count_v[0] * count_v[1] );
                    dsv->readDatasetBlock( {offset_v[0], offset_v[1]}, {count_v[0], count_v[1]}, stride, block, vdv.data() );
                    Eigen::Map<Eigen::Matrix<double, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor>> v (vdv.data(), count_v[0], count_v[1]);
                    
                    var_coord = Rcpp_matrixVectorMultiplication_byRow(v, d);
                }
                
                dscoord = new BigDataStatMeth::hdf5Dataset(dsd->getFileName(), strPCAgroup, "var.coord" ,overwrite );
                dscoord->createDataset( var_coord.rows(), var_coord.cols(), "real");
                dscoord->writeDataset( var_coord.transpose().data() );
                delete dscoord; dscoord = nullptr;
    
                // Cos2
                Eigen::MatrixXd var_cos2 = var_coord.unaryExpr([](double d) {return std::pow(d, 2);});
                dscos2 = new BigDataStatMeth::hdf5Dataset(dsd->getFileName(), strPCAgroup, "var.cos2" ,overwrite );
                dscos2->createDataset( var_cos2.rows(), var_cos2.cols(), "real");
                dscos2->writeDataset( var_cos2.transpose().data() );
                delete dscos2; dscos2 = nullptr;
                
            }
            
        } catch( H5::FileIException& error ) { 
            checkClose_file(dsd, dsv, dslambda, dsvar, dscumvar, dscoord, dscos2);
            Rcpp::Rcerr<<"\nc++ exception RcppGetPCAVariablesHdf5 (File IException)\n";
            return void();
        } catch( H5::DataSetIException& error ) { 
            checkClose_file(dsd, dsv, dslambda, dsvar, dscumvar, dscoord, dscos2);
            Rcpp::Rcerr<<"\nc++ exception RcppGetPCAVariablesHdf5 (DataSet IException)\n";
            return void();
        } catch( H5::DataSpaceIException& error ) { 
            checkClose_file(dsd, dsv, dslambda, dsvar, dscumvar, dscoord, dscos2);
            Rcpp::Rcerr<<"\nc++ exception RcppGetPCAVariablesHdf5 (DataSpace IException)\n";
            return void();
        } catch(std::exception &ex) {
            checkClose_file(dsd, dsv, dslambda, dsvar, dscumvar, dscoord, dscos2);
            Rcpp::Rcout<<"c++ exception RcppGetPCAVariablesHdf5 \n"<< ex.what();
            return void();
        } catch (...) {
            checkClose_file(dsd, dsv, dslambda, dsvar, dscumvar, dscoord, dscos2);
            Rcpp::Rcout<<"\nC++ exception RcppGetPCAVariablesHdf5 (unknown reason)";
            return void();
        }
        
        
        return void();
    }

7 Usage Example

#include "BigDataStatMeth.hpp"

// Example usage
auto result = RcppGetPCAVariablesHdf5(...);