RcppSort_dataset_hdf5

C++ Function Reference

1 Signature

void BigDataStatMeth::RcppSort_dataset_hdf5(BigDataStatMeth::hdf5Dataset *dsIn, BigDataStatMeth::hdf5Dataset *dsOut, Rcpp::List blockedSortlist, std::string func)

2 Description

Sorts an HDF5 dataset by rows or columns.

3 Parameters

  • dsIn (BigDataStatMeth::hdf5Dataset *): Pointer to input HDF5 dataset
  • dsOut (BigDataStatMeth::hdf5Dataset *): Pointer to output HDF5 dataset where sorted data will be stored
  • blockedSortlist (Rcpp::List): List containing sorting specifications:Column 1: Original orderColumn 2: New orderColumn 3: Diagonal indicatorsColumn 4: Additional order information
  • func (std::string): Sorting function type:“sortRows”: Sort by rows”sortCols”: Sort by columns

4 Details

This function performs sorting operations on an HDF5 dataset based on a provided sorting specification. It supports both row-wise and column-wise sorting, with special handling for diagonal elements.

5 Call Graph

Function dependencies

6 Source Code

File: inst/include/hdf5Utilities/hdf5SortDataset.hppLines 102-185

inline void RcppSort_dataset_hdf5(BigDataStatMeth::hdf5Dataset* dsIn,
                                           BigDataStatMeth::hdf5Dataset* dsOut,
                                           Rcpp::List blockedSortlist,
                                           std::string func)
    {
        
        try {
            
            Rcpp::NumericVector oper = {0, 1};
            oper.names() = Rcpp::CharacterVector({ "sortRows", "sortCols"});
            
            Rcpp::StringVector rownames, colnames;
            std::vector<hsize_t> stride = {1, 1},
                                 block = {1, 1},
                                 offset = {0, 0},
                                 count = {0, 0};
            
            hsize_t* dims_out = dsIn->dim();
            
            for( int i = 0; i < blockedSortlist.length(); i++) {
                
                Rcpp::DataFrame df(blockedSortlist[i]);
                std::vector<double> order = df[0];
                std::vector<double> neworder = df[2];
                std::vector<double> diagonal = df[1];
                
                auto indices_0 = find_all(diagonal.begin(), diagonal.end(), 0);
                
                if( indices_0.size() > 0) {
                    // for(int t=0; t<indices_0.size(); t++){
                    //     Rcpp::Rcout<<"Indices val : " <<&indices_0[t]<<"\n";    
                    // }
                    
                } else {
                    if( oper.findName( func ) == 0 ) {
                        offset[0] = order[0] - 1;
                        count[0] = order.size();
                        count[1] = dims_out[1]; 
                        
                    } else if( oper.findName( func ) == 1 ) {
                        offset[1] = order[0] - 1;
                        count[1] = dims_out[1]; 
                        count[0] = order[order.size() - order[0]];
                    } 
                    
                    std::vector<double> vdIn( count[0] * count[1] ); 
                    dsIn->readDatasetBlock( {offset[0], offset[1]}, {count[0], count[1]}, stride, block, vdIn.data() );
                    
                    if( oper.findName( func ) == 0 ) {
                        offset[0] = neworder[0]-1;
                    } else if( oper.findName( func ) == 1 ) {
                        offset[1] = neworder[0]-1;
                    }
                    
                    dsOut->writeDatasetBlock(vdIn, offset, count, stride, block);
                    
                }
            }
            
        } catch( H5::FileIException& error ) {
            checkClose_file(dsIn, dsOut);
            Rcpp::Rcerr<<"c++ exception RcppSort_dataset_hdf5 (File IException )" << std::endl;
            return void();
        } catch( H5::DataSetIException& error ) { // catch failure caused by the dstosplit operations
            checkClose_file(dsIn, dsOut);
            Rcpp::Rcerr<<"c++ exception RcppSort_dataset_hdf5 (dstosplit IException )" << std::endl;
            return void();
        } catch( H5::DataSpaceIException& error ) { // catch failure caused by the DataSpace operations
            checkClose_file(dsIn, dsOut);
            Rcpp::Rcerr<<"c++ exception RcppSort_dataset_hdf5 (DataSpace IException )" << std::endl;
            return void();
        } catch(std::exception &ex) {
            checkClose_file(dsIn, dsOut);
            Rcpp::Rcerr << "c++ exception RcppSort_dataset_hdf5: " << ex.what();
            return void();
        } catch (...) {
            checkClose_file(dsIn, dsOut);
            Rcpp::Rcerr<<"C++ exception RcppSort_dataset_hdf5 (unknown reason)";
            return void();
        } 
        
        return void();
        
    }

7 Usage Example

#include "BigDataStatMeth.hpp"

// Example usage
auto result = RcppSort_dataset_hdf5(...);