RcppbdSVD_hdf5

C++ Function Reference

1 Signature

void BigDataStatMeth::RcppbdSVD_hdf5(std::string filename, std::string strsubgroup, std::string strdataset, int k, int q, int nev, bool bcenter, bool bscale, double dthreshold, bool bforce, bool asRowMajor, Rcpp::Nullable< Rcpp::CharacterVector > method=R_NilValue, Rcpp::Nullable< int > ithreads=R_NilValue)

2 Description

Main SVD computation function for HDF5 matrices.

3 Parameters

  • filename (std::string): Path to the HDF5 file containing input matrix
  • strsubgroup (std::string): Group path within the HDF5 file
  • strdataset (std::string): Dataset name containing the matrix
  • k (int): Number of local SVDs to concatenate at each level
  • q (int): Number of decomposition levels
  • nev (int): Number of eigenvalues per block
  • bcenter (bool): Whether to center the data before SVD
  • bscale (bool): Whether to scale the data before SVD
  • dthreshold (double): Numerical threshold for computations
  • bforce (bool): Whether to overwrite existing results
  • asRowMajor (bool): Whether to interpret matrix as row-major
  • method (Rcpp::Nullable< Rcpp::CharacterVector >): Computation method (“auto”, “blocks”, “full”)
  • ithreads (Rcpp::Nullable< int >): Number of parallel threads (optional)

4 Details

This is the main interface for computing SVD of matrices stored in HDF5 format. It automatically selects between direct LAPACK computation for small matrices and block-wise decomposition for large matrices.

5 Call Graph

Function dependencies

6 Source Code

File: inst/include/hdf5Algebra/matrixSvd.hppLines 370-475

inline void RcppbdSVD_hdf5( std::string filename, std::string strsubgroup, std::string strdataset,  
                                int k, int q, int nev, bool bcenter, bool bscale, double dthreshold, 
                                bool bforce, bool asRowMajor, 
                                Rcpp::Nullable<Rcpp::CharacterVector> method = R_NilValue,
                                Rcpp::Nullable<int> ithreads = R_NilValue)
    {
        
        BigDataStatMeth::hdf5Dataset* dsA = nullptr;
        BigDataStatMeth::hdf5Dataset* dsu = nullptr;
        BigDataStatMeth::hdf5Dataset* dsv = nullptr;
        BigDataStatMeth::hdf5Dataset* dsd = nullptr;
        
        try {
            
            std::string strMethod;
            
            std::vector<hsize_t> stride = {1, 1},
                block = {1, 1},
                offset = {0, 0},
                count = {0, 0};
            
            std::vector<std::string> strMethods = {"auto", "blocks", "full"};
            
            if(method.isNull())  strMethod = "auto" ;
            else    strMethod = Rcpp::as<std::string>(method);
            
            dsA = new BigDataStatMeth::hdf5Dataset(filename, strsubgroup, strdataset, false);
            dsA->openDataset();
            
            if( dsA->getDatasetptr() != nullptr ) { 
                
                // Create results folder
                std::string stroutgroup = "SVD/"+ strdataset;
                
                std::vector<hsize_t> dims_out = {dsA->nrows(), dsA->ncols()};;
                count = { dims_out[0], dims_out[1]};
                
                // Small matrices ==> Direct SVD (lapack)
                if( (dims_out[0] * dims_out[1] < (MAXELEMSINBLOCK / 20) && strMethod == "auto") || strMethod == "full" ) {
                    
                    // Rcpp::Rcout<<"\nEste, aquí - 1";
                    Eigen::MatrixXd X;
                    svdeig retsvd;
                    
                    std::vector<double> vdA( count[0] * count[1] ); 
                    dsA->readDatasetBlock( {offset[0], offset[1]}, {count[0], count[1]}, stride, block, vdA.data() );
                    
                    if(asRowMajor == true) {
                        Eigen::Map<Eigen::Matrix<double, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor>> X (vdA.data(), count[0], count[1] );    
                        retsvd = RcppbdSVD_lapack(X, bcenter, bscale, false);
                    } else {
                        Eigen::Map<Eigen::Matrix<double, Eigen::Dynamic, Eigen::Dynamic, Eigen::ColMajor>> X (vdA.data(), count[1], count[0] );
                        retsvd = RcppbdSVD_lapack(X, bcenter, bscale, false);
                    }
                    
                    dsu = new hdf5Dataset(filename, stroutgroup, "u", bforce);
                    dsu->createDataset( retsvd.u.rows(), retsvd.u.cols(), "real");
                    dsu->writeDataset( Rcpp::wrap(retsvd.u) );
                    
                    dsv = new hdf5Dataset(filename, stroutgroup, "v", bforce);
                    dsv->createDataset( retsvd.v.rows(), retsvd.v.cols(), "real");
                    dsv->writeDataset( Rcpp::wrap(retsvd.v) );
                    
                    dsd = new hdf5Dataset(filename, stroutgroup, "d", bforce);
                    //.. COMENTAT 2025-02-06 ..// dsd->createDataset( retsvd.d.size(), 1, "real");
                    dsd->createDataset( 1, retsvd.d.size(), "real");
                    dsd->writeDataset( Rcpp::wrap(retsvd.d) );
                    
                } else {
                    // Rcpp::Rcout<<"\nEste, aquí - 2";
                    dsu = new BigDataStatMeth::hdf5Dataset(filename, stroutgroup, "u", true);
                    dsv = new BigDataStatMeth::hdf5Dataset(filename, stroutgroup, "v", true);
                    dsd = new BigDataStatMeth::hdf5Dataset(filename, stroutgroup, "d", true);
                    
                    if( dsA->getDatasetptr() != nullptr) {
                        RcppbdSVD_hdf5_Block( dsA, dsu, dsv, dsd, k, q, nev, bcenter, bscale, count[1], count[0], dthreshold, ithreads );
                    }
                    
                } 
            } 
            
            delete dsu; dsu = nullptr;
            delete dsv; dsv = nullptr;
            delete dsd; dsd = nullptr;
            delete dsA; dsA = nullptr;
            
        }  catch( H5::FileIException& error ) { // catch failure caused by the H5File operations
            checkClose_file(dsA, dsd, dsu, dsv);
            Rcpp::Rcerr<<"\nc++ exception RcppbdSVD_hdf5 (File IException)\n";
            return void();
        } catch( H5::DataSetIException& error ) { // catch failure caused by the DataSet operations
            checkClose_file(dsA, dsd, dsu, dsv);
            Rcpp::Rcerr<<"\nc++ exception RcppbdSVD_hdf5 (DataSet IException)\n";
            return void();
        } catch(std::exception &ex) {
            checkClose_file(dsA, dsd, dsu, dsv);
            Rcpp::Rcerr<<"c++ exception RcppbdSVD_hdf5 \n"<< ex.what();
            return void();
        } catch (...) {
            checkClose_file(dsA, dsd, dsu, dsv);
            Rcpp::Rcerr<<"\nC++ exception RcppbdSVD_hdf5 (unknown reason)";
            return void();
        }
        
        return void();
    }

7 Usage Example

#include "BigDataStatMeth.hpp"

// Example usage
auto result = RcppbdSVD_hdf5(...);