RcppPseudoinvHdf5

C++ Function Reference

1 Signature

void BigDataStatMeth::RcppPseudoinvHdf5(BigDataStatMeth::hdf5Dataset *dsA, BigDataStatMeth::hdf5Dataset *dsR, Rcpp::Nullable< int > threads=R_NilValue)

2 Description

Compute pseudoinverse of HDF5 matrix.

3 Parameters

  • dsA (BigDataStatMeth::hdf5Dataset *): Input matrix dataset
  • dsR (BigDataStatMeth::hdf5Dataset *): Output pseudoinverse dataset
  • threads (Rcpp::Nullable< int >): Number of threads for parallel processing (optional)

4 Details

Computes the Moore-Penrose pseudoinverse of a matrix stored in HDF5 format using SVD decomposition with parallel processing support.

5 Call Graph

Function dependencies

6 Source Code

File: inst/include/hdf5Algebra/matrixPseudoinverse.hppLines 141-198

inline void RcppPseudoinvHdf5( BigDataStatMeth::hdf5Dataset* dsA, 
                                      BigDataStatMeth::hdf5Dataset* dsR, 
                                      Rcpp::Nullable<int> threads = R_NilValue )
{
    
    char Schar='S';
    char Cchar='C';
    int ione = 1; 
    double done = 1.0;
    double dzero = 0.0;
    
    int n = dsA->nrows();
    int m = dsA->ncols();
    int lda = m;
    int ldu = std::max(1,m);
    int ldvt =  std::max(1, std::min(m, n));
    int k = std::min(m,n);
    int lwork = std::max( 1, 4*std::min(m,n)* std::min(m,n) + 7*std::min(m, n) );
    int info = 0;
    
    std::vector<hsize_t> offset = {0,0},
        count = {dsA->nrows(), dsA->ncols()},
        stride = {1,1},
        block = {1,1};
    
    Eigen::VectorXd s = Eigen::VectorXd::Zero(k);
    Eigen::MatrixXd u = Eigen::MatrixXd::Zero(ldu,k);
    Eigen::MatrixXd vt = Eigen::MatrixXd::Zero(ldvt,n);
    
    {
        Eigen::VectorXd work = Eigen::VectorXd::Zero(lwork);
        std::vector<double> vdA( count[0] * count[1] );
        
        dsA->readDatasetBlock( {offset[0], offset[1]}, {count[0], count[1]}, stride, block, vdA.data() );
        Eigen::Map<Eigen::Matrix<double, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor>> A (vdA.data(), count[0], count[1]);
        
        dgesvd_( &Schar, &Schar, &m, &n, A.transpose().data() , &lda, s.data(), u.data(), &ldu, vt.data(), &ldvt, work.data(), &lwork, &info);
    }
    
    Eigen::MatrixXd pinv = Eigen::MatrixXd::Zero(n,m);
    dsR->createDataset( n, m, "real" );
    
#pragma omp parallel for num_threads(get_number_threads(threads, R_NilValue))
    for (int i = 0; i < k; i++){
        double tempS;
        if(s[i] > 1.0e-9)
            tempS = 1.0/s[i];
        else
            tempS = s[i];
        
        dscal_( &m, &tempS, &(u(i*ldu)), &ione );
    }
    
    dgemm_( &Cchar, &Cchar, &n, &m, &k, &done, vt.data(), &k, u.data(), &m, &dzero, pinv.data(), &n );
    dsR->writeDataset(Rcpp::wrap(pinv));

    return void();
}

7 Usage Example

#include "BigDataStatMeth.hpp"

// Example usage
auto result = RcppPseudoinvHdf5(...);