Next_level_SvdBlock_decomposition_hdf5

C++ Function Reference

1 Signature

void BigDataStatMeth::Next_level_SvdBlock_decomposition_hdf5(T *dsA, std::string strGroupName, int k, int q, double dthreshold, Rcpp::Nullable< int > threads=R_NilValue)

2 Parameters

  • dsA (T *)
  • strGroupName (std::string)
  • k (int)
  • q (int)
  • dthreshold (double)
  • threads (Rcpp::Nullable< int >)

3 Returns

Type: class T

4 Call Graph

Function dependencies

5 Source Code

File: inst/include/hdf5Algebra/matrixSvdBlock.hppLines 474-637

inline void Next_level_SvdBlock_decomposition_hdf5( T* dsA, std::string strGroupName, int k, int q, 
                                                           double dthreshold, Rcpp::Nullable<int> threads = R_NilValue)
    {
    
    
    static_assert(std::is_same<T*, BigDataStatMeth::hdf5Dataset* >::value || 
                  std::is_same<T*, BigDataStatMeth::hdf5DatasetInternal* >::value,
                  "Error - type not allowed");


    BigDataStatMeth::hdf5Dataset* unlimDataset = nullptr;    
    BigDataStatMeth::hdf5Dataset* dsCur = nullptr;
    
    try {
        
        int cummoffset = 0, M;
            // ithreads,  M;
        
        std::vector<hsize_t> stride = {1, 1},
            block = {1, 1},
            offset = {0, 0},
            count = {0, 0};
        
        Rcpp::CharacterVector strvmatnames = {"A","B","C","D","E","F","G","H",
                                              "I","J","K","L","M","N","O","P",
                                              "Q","R","S","T","U","V","W","X",
                                              "Y","Z"};

        // Get dataset names
        Rcpp::StringVector joindata =  dsA->getDatasetNames(strGroupName, (std::string)strvmatnames[q-1], "");
        M = joindata.size();
        
        // ithreads = get_number_threads(threads, R_NilValue);
        
        #pragma omp parallel num_threads( get_number_threads(threads, R_NilValue) )

        // Get data from M blocks in initial matrix
        #pragma omp for ordered schedule (dynamic)
        for( int i = 0; i< M ; i++)
        {
            
            std::string strDatasetName = strGroupName + "/" + strvmatnames[q] + std::to_string(i/(M/k));
            
            Eigen::MatrixXd restmp;
            Eigen::MatrixXd X;

            #pragma omp critical(accessFile)
            {
                //    a) Get dataset
                dsCur = new BigDataStatMeth::hdf5Dataset(dsA->getFullPath(), strGroupName + "/" + joindata[i], false);
                dsCur->openDataset();
                hsize_t* dims_out = dsCur->dim();
                
                std::vector<double> vdCurDataset( dims_out[0] * dims_out[1] ); 
                dsCur->readDatasetBlock( {0, 0}, {dims_out[0], dims_out[1]}, stride, block, vdCurDataset.data() );
                X = Eigen::Map<Eigen::Matrix<double, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor>> (vdCurDataset.data(), dims_out[0], dims_out[1] );
                
                delete dsCur; dsCur = nullptr;
            }

            {
                //    b) SVD for each block
                svdeig retsvd;
                retsvd = RcppbdSVD_lapack(X, false, false, false);
            
                int size_d = (retsvd.d).size();
                int nzeros = 0;
            
                if( (retsvd.d)[size_d - 1] <= dthreshold ) {
                    nzeros = 1;
                    for( int j = (size_d - 2); ( j>1 && (retsvd.d)[i] <= dthreshold ); j-- ) {
                        nzeros++;
                    }
                }
            
                //    c)  U*d
                // Create diagonal matrix from svd decomposition d
                int isize = (retsvd.d).size() - nzeros;
                if( isize < 2 ) {
                    isize = 2;
                }
            
                Eigen::MatrixXd d = Eigen::MatrixXd::Zero(isize, isize);
                d.diagonal() = (retsvd.d).head(isize);
            
                Eigen::MatrixXd u = (retsvd.u).block(0, 0, (retsvd.u).rows(), isize);
            
                // if( u.size() < MAXELEMSINBLOCK ) {
                if (static_cast<hsize_t>(u.size()) < MAXELEMSINBLOCK) {
                    restmp = u*d;
                } else{
                    restmp = Rcpp_block_matrix_mul( u, d, R_NilValue);    
                } 
            }
            
            //    d) Write results to dataset
            count[0] = restmp.rows();
            count[1] = restmp.cols();

            #pragma omp ordered
            {
                #pragma omp critical(accessFile)
                {
                    
                    if( i%(M/k) == 0 || ( (i%(M/k) > 0 &&  !BigDataStatMeth::exists_HDF5_element(dsA->getFileptr(),  strDatasetName)) ) ) {
                        // Create unlimited dataset in hdf5 file
                        unlimDataset = new BigDataStatMeth::hdf5DatasetInternal(dsA->getFullPath(), strDatasetName, true );
                        unlimDataset->createUnlimitedDataset(count[0], count[1], "real");
                        delete unlimDataset; unlimDataset = nullptr;
                        
                        cummoffset = 0;
                    }
                    
                    unlimDataset = new BigDataStatMeth::hdf5DatasetInternal(dsA->getFullPath(), strDatasetName, true );
                    unlimDataset->openDataset();
                    
                    // Get write position
                    offset[1] = cummoffset;
                    cummoffset = cummoffset + restmp.cols();
                    
                    // Extend dataset before put data
                    if((i%(M/k)) != 0 && cummoffset > 0) {
                        unlimDataset->extendUnlimitedDataset(0, count[1] );
                    }
                    
                    unlimDataset->writeDatasetBlock( Rcpp::wrap(restmp), offset, count, stride, block, false);
                    delete unlimDataset; unlimDataset = nullptr;
                }
            }
        }

    } catch( H5::FileIException& error ) { 
        checkClose_file(dsA, unlimDataset, dsCur);
        Rcpp::Rcerr<<"\nc++ exception Next_level_SvdBlock_decomposition_hdf5 (File IException)\n";
        return void();
    } catch( H5::DataSetIException& error ) { 
        checkClose_file(dsA, unlimDataset, dsCur);
        Rcpp::Rcerr<<"\nc++ exception Next_level_SvdBlock_decomposition_hdf5 (DataSet IException)\n";
        return void();
    } catch( H5::GroupIException& error ) { 
        checkClose_file(dsA, unlimDataset, dsCur);
        Rcpp::Rcerr<<"\nc++ exception Next_level_SvdBlock_decomposition_hdf5 (Group IException)\n";
        return void();
    } catch( H5::DataTypeIException& error ) { 
        checkClose_file(dsA, unlimDataset, dsCur);
        Rcpp::Rcerr<<"\nc++ exception Next_level_SvdBlock_decomposition_hdf5 (DataType IException)\n";
        return void();
    } catch( H5::DataSpaceIException& error ) { 
        checkClose_file(dsA, unlimDataset, dsCur);
        Rcpp::Rcerr<<"\nc++ exception Next_level_SvdBlock_decomposition_hdf5 (DataSpace IException)\n";
        return void();
    } catch(std::exception &ex) {
        checkClose_file(dsA, unlimDataset, dsCur);
        Rcpp::Rcerr<<"c++ exception Next_level_SvdBlock_decomposition_hdf5 \n"<< ex.what();
        return void();
    } catch (...) {
        checkClose_file(dsA, unlimDataset, dsCur);
        Rcpp::Rcerr<<"\nC++ exception Next_level_SvdBlock_decomposition_hdf5 (unknown reason)";
        return void();
    }
    
    return void();

}

6 Usage Example

#include "BigDataStatMeth.hpp"

// Example usage
auto result = Next_level_SvdBlock_decomposition_hdf5(...);