RcppReduce_dataset_hdf5

C++ Function Reference

1 Signature

void BigDataStatMeth::RcppReduce_dataset_hdf5(std::string filename, std::string stringroup, std::string stroutgroup, std::string stroutdataset, std::string strreducefunction, bool boverwrite, bool bremove, bool binternal)

2 Description

Reduces multiple HDF5 datasets into a single dataset using specified operation.

3 Parameters

  • filename (std::string): HDF5 file path
  • stringroup (std::string): Input group containing datasets to reduce
  • stroutgroup (std::string): Output group for reduced dataset
  • stroutdataset (std::string): Name of output dataset
  • strreducefunction (std::string): Reduction operation (“+” or “-”)
  • boverwrite (bool): Whether to overwrite existing output dataset
  • bremove (bool): Whether to remove input datasets after reduction
  • binternal (bool): Whether this is an internal call (affects data layout)

4 Details

filenameHDF5 file path stringroupInput group containing datasets to reduce stroutgroupOutput group for reduced dataset stroutdatasetName of output dataset strreducefunctionReduction operation (“+” or “-”) boverwriteWhether to overwrite existing output dataset bremoveWhether to remove input datasets after reduction binternalWhether this is an internal call (affects data layout) Implementation approach:Opens input HDF5 file and gets dataset listProcesses datasets sequentially:Reads each dataset into memoryAdjusts dimensions if necessaryApplies reduction operation Creates output dataset with reduced result

5 Call Graph

Function dependencies

6 Source Code

File: inst/include/hdf5Utilities/hdf5ReduceDataset.hppLines 101-254

inline void RcppReduce_dataset_hdf5 ( std::string filename, 
                                   std::string stringroup, 
                                   std::string stroutgroup, 
                                   std::string stroutdataset, 
                                   std::string strreducefunction, 
                                   bool boverwrite,
                                   bool bremove,
                                   bool binternal)
    {
        
        BigDataStatMeth::hdf5File* objFile = nullptr;
        BigDataStatMeth::hdf5Dataset* dsIn = nullptr;
        BigDataStatMeth::hdf5Dataset* dsOut = nullptr;
        try {
            
            hsize_t* dims_out;
            std::vector<hsize_t> stride = {1, 1},
                block = {1, 1},
                offset = {0, 0};
            
            Eigen::MatrixXd fullReduced;
            Eigen::MatrixXd newRead;
            int ndatasets;
            
            objFile = new BigDataStatMeth::hdf5File(filename, false);
            objFile->openFile("r");
            
            // Get dataset names without prefix, all datasets inside the group
            Rcpp::StringVector joindata =  objFile->getDatasetNames(stringroup, "", "");
            
            delete objFile; // Close file 
            
            ndatasets = joindata.size();
            
            for ( int i=0; i< ndatasets; i++)
            {
                dsIn = new BigDataStatMeth::hdf5Dataset(filename, stringroup + "/" + joindata[i], false);
                dsIn->openDataset();

                if( dsIn->getDatasetptr() == nullptr) {
                    checkClose_file(dsIn);
                    Rcpp::Rcerr<< "c++ exception RcppReduce_dataset_hdf5 (Dataset IException )" << std::endl;
                    return void();
                }
                
                dims_out =   dsIn->dim();
                
                std::vector<double> vdIn( dims_out[0] * dims_out[1] ); 
                dsIn->readDatasetBlock( {offset[0], offset[1]}, {dims_out[0], dims_out[1]}, stride, block, vdIn.data() );
                
                if( i == 0 ) {
                    
                    if(binternal == true)
                        fullReduced = Eigen::Map<Eigen::Matrix<double, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor>> (vdIn.data(), dims_out[0], dims_out[1] );
                    else
                        fullReduced = Eigen::Map<Eigen::Matrix<double, Eigen::Dynamic, Eigen::Dynamic, Eigen::ColMajor>> (vdIn.data(), dims_out[0], dims_out[1] );
                    
                } else {
                    
                    if(binternal == true)
                        newRead = Eigen::Map<Eigen::Matrix<double, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor>>  (vdIn.data(), dims_out[0], dims_out[1] );
                    else
                        newRead = Eigen::Map<Eigen::Matrix<double, Eigen::Dynamic, Eigen::Dynamic, Eigen::ColMajor>>  (vdIn.data(), dims_out[0], dims_out[1] );
                    
                    if( newRead.rows() != fullReduced.rows()){
                        
                        int difference = std::abs(fullReduced.rows() - newRead.rows());
                        if( newRead.rows() > fullReduced.rows()) {
                            newRead.resize( newRead.rows() + difference, Eigen::NoChange);    
                        } else {
                            fullReduced.resize( fullReduced.rows() + difference, Eigen::NoChange);    
                        }
                    }
                    
                    if( newRead.cols() != fullReduced.cols()){
                        
                        int difference = std::abs(fullReduced.cols() - newRead.cols());
                        if( newRead.cols() > fullReduced.cols()){
                            newRead.resize( Eigen::NoChange, newRead.cols() + difference );
                        } else {
                            fullReduced.resize( Eigen::NoChange, fullReduced.cols() + difference );
                        }
                    }
                    
                    // Reduce matrix
                    if( strreducefunction.compare("+")==0) {
                        fullReduced = fullReduced + newRead;
                    } else if (strreducefunction.compare("-")==0) {
                        fullReduced = fullReduced - newRead;
                    } 
                    
                }
                
                if( bremove == true){
                    dsIn->remove();
                }
                
                delete dsIn; dsIn = nullptr;
            }
            
            dsOut = new BigDataStatMeth::hdf5Dataset(filename, stroutgroup, stroutdataset, boverwrite);
            
            if(binternal == true) {
                dsOut->createDataset( fullReduced.rows() , fullReduced.cols(), "real");
                // if( dsOut->getDatasetptr() != nullptr) {
                //     dsOut->writeDataset(Rcpp::wrap(fullReduced));
                // } else {
                //     checkClose_file(dsOut);
                //     Rcpp::Rcerr<< "c++ exception RcppReduce_dataset_hdf5 (Dataset IException )" << std::endl;
                //     return void();
                // }
            } else {
                dsOut->createDataset( fullReduced.cols() , fullReduced.rows(), "real");
                fullReduced.transposeInPlace();
                
                // if( dsOut->getDatasetptr() != nullptr) {
                //     fullReduced.transposeInPlace();
                //     dsOut->writeDataset(Rcpp::wrap(fullReduced));
                // } else {
                //     checkClose_file(dsOut);
                //     Rcpp::Rcerr<< "c++ exception RcppReduce_dataset_hdf5 (Dataset IException )" << std::endl;
                //     return void();
                // }
            }
            
            
            if( dsOut->getDatasetptr() != nullptr) {
                dsOut->writeDataset(Rcpp::wrap(fullReduced));
            } else {
                checkClose_file(dsOut);
                Rcpp::Rcerr<< "c++ exception RcppReduce_dataset_hdf5 (Dataset IException )" << std::endl;
                return void();
            }
            
            delete dsOut; dsOut = nullptr;
            
        }catch( H5::FileIException& error ) {
            checkClose_file(dsIn, dsOut);
            // ::Rf_error( "c++ exception RcppReduce_dataset_hdf5 (File IException )" );
            Rcpp::Rcerr<< "c++ exception RcppReduce_dataset_hdf5 (File IException )" << std::endl;
            return void();
        } catch( H5::DataSetIException& error ) { // catch failure caused by the dstosplit operations
            checkClose_file(dsIn, dsOut);
            // ::Rf_error( "c++ exception RcppReduce_dataset_hdf5 (dstosplit IException )" );
            Rcpp::Rcerr<< "c++ exception RcppReduce_dataset_hdf5 (dstosplit IException )" << std::endl;
            return void();
        } catch( H5::DataSpaceIException& error ) { // catch failure caused by the DataSpace operations
            checkClose_file(dsIn, dsOut);
            // ::Rf_error( "c++ exception RcppReduce_dataset_hdf5 (DataSpace IException )" );
            Rcpp::Rcerr<< "c++ exception RcppReduce_dataset_hdf5 (DataSpace IException )" << std::endl;
            return void();
        } 
        return void();
    }

7 Usage Example

#include "BigDataStatMeth.hpp"

// Example usage
auto result = RcppReduce_dataset_hdf5(...);