get_HDF5_mean_sd_by_row
C++ Function Reference
1 Signature
void BigDataStatMeth::get_HDF5_mean_sd_by_row(BigDataStatMeth::hdf5Dataset *dsA, Eigen::MatrixXd &normalize, bool bsd, bool bmean, Rcpp::Nullable< int > wsize)2 Description
Calculate row-wise mean and standard deviation.
3 Parameters
dsA(BigDataStatMeth::hdf5Dataset *): Input matrix datasetnormalize(Eigen::MatrixXd &): Output matrix for mean and std valueswsize(Rcpp::Nullable< int >): Block size for processing
4 Details
Computes mean and standard deviation for each row of the matrix using block-based processing for memory efficiency.
5 Call Graph
6 Source Code
NoteImplementation
File: inst/include/hdf5Algebra/matrixSdMean.hpp • Lines 88-161
inline void get_HDF5_mean_sd_by_row( BigDataStatMeth::hdf5Dataset* dsA,
Eigen::MatrixXd& normalize,
bool bsd, bool bmean,
Rcpp::Nullable<int> wsize )
{
try
{
hsize_t block_size = 0;
hsize_t* dims_out = dsA->dim();
std::vector<hsize_t> stride = {1, 1},
block = {1, 1},
offset = {0, 0},
count = {0, 0};
block_size = get_block_size(wsize, dims_out[0], dims_out[1]);
count[0] = dims_out[0];
if( block_size < dims_out[1] ) {
count[1] = block_size;
} else{
count[1] = dims_out[1];
}
// Read data in blocks of 500 columns
for( hsize_t i=0; (i <= floor(dims_out[1]/block_size)) || i==0 ; i++)
{
// if( i>0 ) {
if( offset[1] + block_size <= dims_out[1] ) {
count[1] = block_size;
} else {
count[1] = dims_out[1] - offset[1];
}
// }
std::vector<double> vdA( count[0] * count[1] );
dsA->readDatasetBlock( {offset[0], offset[1]}, {count[0], count[1]}, stride, block, vdA.data() );
Eigen::Map<Eigen::Matrix<double, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor>> X (vdA.data(), count[0], count[1] );
Eigen::RowVectorXd mean = X.colwise().mean();
normalize.block( 0, offset[1], 1, mean.size()) = mean;
if(bsd){
Eigen::RowVectorXd sd = ((X.rowwise() - mean).array().square().colwise().sum() / (X.rows() - 1)).sqrt();
normalize.block( 1, offset[1], 1, sd.size()) = sd;
}
offset[1] = offset[1] + block_size;
}
} catch( H5::FileIException& error ) { // catch failure caused by the H5File operations
// error.printErrorStack();
checkClose_file(dsA);
Rf_error("c++ exception get_HDF5_mean_sd_by_row (File IException)");
} catch( H5::DataSetIException& error ) { // catch failure caused by the DataSet operations
// error.printErrorStack();
checkClose_file(dsA);
Rf_error("c++ exception get_HDF5_mean_sd_by_row (DataSet IException)");
} catch(std::exception& error) {
checkClose_file(dsA);
Rf_error("c++ exception get_HDF5_mean_sd_by_row function: %s",error.what());
}
return void();
}7 Usage Example
#include "BigDataStatMeth.hpp"
// Example usage
auto result = get_HDF5_mean_sd_by_row(...);