RcppGetPCAIndividualsHdf5

C++ Function Reference

1 Signature

void BigDataStatMeth::RcppGetPCAIndividualsHdf5(std::string strPCAgroup, BigDataStatMeth::hdf5Dataset *dsX, BigDataStatMeth::hdf5Dataset *dsd, BigDataStatMeth::hdf5Dataset *dsu, bool overwrite)

2 Description

Calculate PCA individuals statistics.

3 Parameters

  • strPCAgroup (std::string): HDF5 group name for PCA results
  • dsX (BigDataStatMeth::hdf5Dataset *): Input data matrix dataset
  • dsd (BigDataStatMeth::hdf5Dataset *): Singular values dataset
  • dsu (BigDataStatMeth::hdf5Dataset *): Left singular vectors dataset
  • overwrite (bool): Whether to overwrite existing results

4 Details

Computes and stores various PCA statistics for individuals including:DistancesComponent coordinatesIndividual coordinatesCos² quality metricsContributions

5 Call Graph

Function dependencies

6 Source Code

File: inst/include/hdf5Algebra/matrixPCA.hppLines 188-321

inline void RcppGetPCAIndividualsHdf5( std::string strPCAgroup, 
                                    BigDataStatMeth::hdf5Dataset* dsX,
                                    BigDataStatMeth::hdf5Dataset* dsd, 
                                    BigDataStatMeth::hdf5Dataset* dsu, 
                                    bool overwrite )
    {
        
        
        BigDataStatMeth::hdf5Dataset* dsdist2 = nullptr;
        BigDataStatMeth::hdf5Dataset* dsComp = nullptr;
        BigDataStatMeth::hdf5Dataset* dscoord = nullptr;
        BigDataStatMeth::hdf5Dataset* dscos2 = nullptr;
        BigDataStatMeth::hdf5Dataset* dscontrib = nullptr;
        
        
        try
        {
            H5::Exception::dontPrint();
            
            // int ielements = 0;
            std::vector<hsize_t> stride = {1, 1},
                block = {1, 1},
                offset = {0, 0},
                count_x = {dsX->nrows(), dsX->ncols()},
                count_d = {dsd->nrows(), dsd->ncols()},
                count_u = {dsu->nrows(), dsu->ncols()};
            
            Eigen::VectorXd dist2;
            Eigen::MatrixXd adjust;
            double weights = 1/(double)dsX->ncols();
            {
                
                std::vector<double> vdX( count_x[0] * count_x[1] );
                dsX->readDatasetBlock( {0,0}, count_x, stride, block, vdX.data() );
                Eigen::Map<Eigen::Matrix<double, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor>> X (vdX.data(), count_x[0], count_x[1]);
    
                X = RcppNormalizeRowwise(X, true, false);
                adjust = Eigen::MatrixXd::Constant(1, X.cols(), weights);
                
                Eigen::RowVectorXd ecart =  adjust / adjust.sum() ;
                ecart = ecart * (X.unaryExpr([](double d) {return std::pow(d, 2);})).transpose();
                ecart = ecart.unaryExpr([](double d) {return std::sqrt(d);});
                
                X = X.array().colwise() / ecart.transpose().array();
    
                dist2 =  (X.unaryExpr([](double d) {return std::pow(d, 2);})).colwise().sum();
                Eigen::VectorXd dist = dist2.array().sqrt();
                
                dsdist2 = new BigDataStatMeth::hdf5Dataset(dsd->getFileName(), strPCAgroup, "ind.dist" ,overwrite );
                dsdist2->createDataset( dist2.rows(), dist2.cols(), "real");
                dsdist2->writeDataset( dist.data() );
                delete dsdist2; dsdist2 = nullptr;
            }
    
            // Load d
            std::vector<double> vdd( count_d[0] * count_d[1] );
            dsd->readDatasetBlock( offset, count_d, stride, block, vdd.data() );
            Eigen::Map<Eigen::Matrix<double, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor>> d (vdd.data(), count_d[0], count_d[1]); 
            
            Eigen::MatrixXd var_coord;
            {
                // Load u
                std::vector<double> vdu( count_u[0] * count_u[1] );
                dsu->readDatasetBlock( {0, 0}, count_u, stride, block, vdu.data() );
                Eigen::Map<Eigen::Matrix<double, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor>> u (vdu.data(), count_u[0], count_u[1]);
                
                u = u * sqrt(1/weights); 
            
                // Components
                dsComp = new BigDataStatMeth::hdf5Dataset(dsd->getFileName(), strPCAgroup, "components" ,overwrite );
                dsComp->createDataset( u.cols(), u.rows(), "real");
                dsComp->writeDataset( u.data() );
                delete dsComp; dsComp = nullptr;
                
                var_coord = Rcpp_matrixVectorMultiplication_byRow(u, d);
            }
            
            // Coord inds
            dscoord = new BigDataStatMeth::hdf5Dataset(dsd->getFileName(), strPCAgroup, "ind.coord" ,overwrite );
            dscoord->createDataset( var_coord.cols(), var_coord.rows(), "real");
            dscoord->writeDataset( Rcpp::wrap(var_coord.transpose()));
            delete dscoord; dscoord = nullptr;
            
            // Cos2 inds
            Eigen::MatrixXd coord2 = var_coord.unaryExpr([](double d) {return std::pow(d, 2);});
            
            {
                Eigen::MatrixXd ind_cos2 = coord2.array().rowwise() / dist2.transpose().array();
                
                dscos2 = new BigDataStatMeth::hdf5Dataset(dsd->getFileName(), strPCAgroup, "ind.cos2" ,overwrite );
                dscos2->createDataset( ind_cos2.cols(), ind_cos2.rows(), "real");
                dscos2->writeDataset( Rcpp::wrap(ind_cos2.transpose()) );
                delete dscos2; dscos2 = nullptr;
            }
            
            Eigen::MatrixXd ind_contrib = coord2.array().rowwise() * adjust.row(0).array();
            
            d = d.unaryExpr([](double xd) {return std::pow(xd, 2);});
            
            ind_contrib = ind_contrib.array().colwise() / d.col(0).array();
            ind_contrib = ind_contrib.unaryExpr([](double d) {return d*100;});
            
            dscontrib = new BigDataStatMeth::hdf5Dataset(dsd->getFileName(), strPCAgroup, "ind.contrib" ,overwrite );
            dscontrib->createDataset( ind_contrib.cols(), ind_contrib.rows(), "real");
            dscontrib->writeDataset( Rcpp::wrap(ind_contrib.transpose()) );
            delete dscontrib; dscontrib = nullptr;
                
            // Components
            // createHardLink(dsu->getFileptr(), dsu->getGroupName() + "/" + dsu->getDatasetName(), strPCAgroup + "/components");
            
        } catch( H5::FileIException& error ) { // catch failure caused by the H5File operations
            checkClose_file( dsX, dsd, dsu, dsdist2, dsComp, dscoord, dscos2, dscontrib);
            Rcpp::Rcerr<<"\nc++ exception RcppGetPCAIndividualsHdf5 (File IException)\n";
            return void();
        } catch( H5::DataSetIException& error ) { // catch failure caused by the DataSet operations
            checkClose_file( dsX, dsd, dsu, dsdist2, dsComp, dscoord, dscos2, dscontrib);
            Rcpp::Rcerr<<"\nc++ exception RcppGetPCAIndividualsHdf5 (DataSet IException)\n";
            return void();
        } catch( H5::DataSpaceIException& error ) { // catch failure caused by the DataSpace operations
            checkClose_file( dsX, dsd, dsu, dsdist2, dsComp, dscoord, dscos2, dscontrib);
            Rcpp::Rcerr<<"\nc++ exception RcppGetPCAIndividualsHdf5 (DataSpace IException)\n";
            return void();
        } catch(std::exception &ex) {
            checkClose_file( dsX, dsd, dsu, dsdist2, dsComp, dscoord, dscos2, dscontrib);
            Rcpp::Rcerr<<"\nc++ exception RcppGetPCAIndividualsHdf5 \n"<< ex.what();
            return void();
        } catch (...) {
            checkClose_file( dsX, dsd, dsu, dsdist2, dsComp, dscoord, dscos2, dscontrib);
            Rcpp::Rcerr<<"\nC++ exception RcppGetPCAIndividualsHdf5 (unknown reason)";
            return void();
        }
        
        return void();
    }

7 Usage Example

#include "BigDataStatMeth.hpp"

// Example usage
auto result = RcppGetPCAIndividualsHdf5(...);