get_HDF5_mean_sd_by_column
C++ Function Reference
1 Signature
void BigDataStatMeth::get_HDF5_mean_sd_by_column(BigDataStatMeth::hdf5Dataset *dsA, Eigen::MatrixXd &normalize, bool bsd, bool bmean, Rcpp::Nullable< int > wsize)2 Description
Calculate column-wise mean and standard deviation.
3 Parameters
dsA(BigDataStatMeth::hdf5Dataset *): Input matrix datasetnormalize(Eigen::MatrixXd &): Output matrix for mean and std valuesbsd(bool): compute sdbmean(bool): compute meanwsize(Rcpp::Nullable< int >): Block size for processing
4 Details
Computes mean and standard deviation for each column of the matrix using block-based processing for memory efficiency. Optimized for cases where n << m (rows much fewer than columns).
5 Call Graph
6 Source Code
NoteImplementation
File: inst/include/hdf5Algebra/matrixSdMean.hpp • Lines 175-245
inline void get_HDF5_mean_sd_by_column( BigDataStatMeth::hdf5Dataset* dsA,
Eigen::MatrixXd& normalize,
bool bsd, bool bmean,
Rcpp::Nullable<int> wsize )
{
// IntegerVector dims_out = get_HDF5_dataset_size(*dataset);
try
{
hsize_t block_size = 0;
hsize_t* dims_out = dsA->dim();
std::vector<hsize_t> stride = {1, 1},
block = {1, 1},
offset = {0, 0},
count = {0, 0};
block_size = get_block_size(wsize, dims_out[1], dims_out[0]);
count[1] = dims_out[1];
if( block_size < dims_out[0] )
count[0] = block_size;
else
count[0] = dims_out[0];
// Read data in blocks of 500 columns
for(hsize_t i=0; (i <= floor(dims_out[0]/block_size)) || i==0; i++)
{
if( offset[0] + block_size <= dims_out[0] ) {
count[0] = block_size;
}else {
count[0] = dims_out[0] - offset[0];
}
std::vector<double> vdA( count[0] * count[1] );
dsA->readDatasetBlock( {offset[0], offset[1]}, {count[0], count[1]}, stride, block, vdA.data() );
Eigen::Map<Eigen::Matrix<double, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor>> X (vdA.data(), count[0], count[1] );
Eigen::VectorXd mean = X.rowwise().mean();
normalize.block( 0, offset[0], 1, mean.size()) = mean.transpose();
if(bsd) {
Eigen::VectorXd sd = ((X.colwise() - mean).array().square().rowwise().sum() / (X.cols() - 1)).sqrt();
normalize.block( 1, offset[0], 1, sd.size()) = sd.transpose();
}
offset[0] = offset[0] + block_size;
}
} catch( H5::FileIException& error ) { // catch failure caused by the H5File operations
// error.printErrorStack();
checkClose_file(dsA);
Rf_error("c++ exception get_HDF5_mean_sd_by_column (File IException)");
} catch( H5::DataSetIException& error ) { // catch failure caused by the DataSet operations
// error.printErrorStack();
checkClose_file(dsA);
Rf_error("c++ exception get_HDF5_mean_sd_by_column (DataSet IException)");
} catch(std::exception& error) {
checkClose_file(dsA);
Rf_error("c++ exception get_HDF5_mean_sd_by_column function: %s",error.what());
// return void();
}
return void(); // successfully terminated
}7 Usage Example
#include "BigDataStatMeth.hpp"
// Example usage
auto result = get_HDF5_mean_sd_by_column(...);