get_HDF5_mean_sd_by_row

C++ Function Reference

1 Signature

void BigDataStatMeth::get_HDF5_mean_sd_by_row(BigDataStatMeth::hdf5Dataset *dsA, Eigen::MatrixXd &normalize, bool bsd, bool bmean, Rcpp::Nullable< int > wsize)

2 Description

Calculate row-wise mean and standard deviation.

3 Parameters

  • dsA (BigDataStatMeth::hdf5Dataset *): Input matrix dataset
  • normalize (Eigen::MatrixXd &): Output matrix for mean and std values
  • wsize (Rcpp::Nullable< int >): Block size for processing

4 Details

Computes mean and standard deviation for each row of the matrix using block-based processing for memory efficiency.

5 Call Graph

Function dependencies

6 Source Code

File: inst/include/hdf5Algebra/matrixSdMean.hppLines 88-161

inline void get_HDF5_mean_sd_by_row( BigDataStatMeth::hdf5Dataset* dsA, 
                                     Eigen::MatrixXd& normalize, 
                                     bool bsd, bool bmean, 
                                     Rcpp::Nullable<int> wsize )
{
    
    try
    {
        
        hsize_t block_size = 0;
        hsize_t* dims_out = dsA->dim();

        std::vector<hsize_t> stride = {1, 1},
                             block = {1, 1},
                             offset = {0, 0},
                             count = {0, 0};
        
        block_size = get_block_size(wsize, dims_out[0], dims_out[1]);

        count[0] = dims_out[0];
        if( block_size < dims_out[1] ) {
            count[1] = block_size;
        } else{
            count[1] = dims_out[1];
        }

        // Read data in blocks of 500 columns
        for( hsize_t i=0; (i <= floor(dims_out[1]/block_size)) || i==0 ; i++)
        {
            
            // if( i>0 ) {
                

            if( offset[1] + block_size <= dims_out[1] ) {
                count[1] = block_size;
            } else {
                count[1] = dims_out[1] - offset[1];
            }
            // }

            std::vector<double> vdA( count[0] * count[1] ); 
            dsA->readDatasetBlock( {offset[0], offset[1]}, {count[0], count[1]}, stride, block, vdA.data() );
            Eigen::Map<Eigen::Matrix<double, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor>> X (vdA.data(), count[0], count[1] );

            Eigen::RowVectorXd mean = X.colwise().mean();
            normalize.block( 0, offset[1], 1, mean.size()) = mean;
            
            if(bsd){
                Eigen::RowVectorXd sd = ((X.rowwise() - mean).array().square().colwise().sum() / (X.rows() - 1)).sqrt();
                normalize.block( 1, offset[1], 1, sd.size()) = sd;
            }
            
            
            
            offset[1] = offset[1] + block_size;

        }
        
    } catch( H5::FileIException& error ) { // catch failure caused by the H5File operations
        // error.printErrorStack();
        checkClose_file(dsA);
        Rf_error("c++ exception get_HDF5_mean_sd_by_row (File IException)");
    } catch( H5::DataSetIException& error ) { // catch failure caused by the DataSet operations
        // error.printErrorStack();
        checkClose_file(dsA);
        Rf_error("c++ exception get_HDF5_mean_sd_by_row (DataSet IException)");
    } catch(std::exception& error) {
        checkClose_file(dsA);
        Rf_error("c++ exception get_HDF5_mean_sd_by_row function: %s",error.what());
    }
    
    return void(); 
    
}

7 Usage Example

#include "BigDataStatMeth.hpp"

// Example usage
auto result = get_HDF5_mean_sd_by_row(...);