get_HDF5_rowVars
C++ Function Reference
1 Signature
Eigen::VectorXd BigDataStatMeth::get_HDF5_rowVars(BigDataStatMeth::hdf5Dataset *dsA, bool bparal, Rcpp::Nullable< int > wsize, Rcpp::Nullable< int > threads)2 Description
Row variances of an HDF5 matrix (block-wise, parallel).
3 Parameters
dsA(BigDataStatMeth::hdf5Dataset *): Open HDF5 dataset.bparal(bool): Enable OpenMP parallelism.wsize(Rcpp::Nullable< int >): Block size (NULL = auto).threads(Rcpp::Nullable< int >): Thread count (NULL = auto).
4 Returns
Vector of length nrows_R.
5 Details
Equivalent to apply(X, 1, var) — uses Bessel’s correction (n-1). If ncol_R == 1 the result is a vector of NaNs, matching base R behaviour.
6 Call Graph
7 Source Code
NoteImplementation
File: inst/include/hdf5Algebra/matrixAggregations.hpp • Lines 788-852
inline Eigen::VectorXd get_HDF5_rowVars(BigDataStatMeth::hdf5Dataset* dsA,
bool bparal,
Rcpp::Nullable<int> wsize,
Rcpp::Nullable<int> threads)
{
try {
const hsize_t nHDF5rows = dsA->nrows(); // = R ncols (fixed)
const hsize_t nHDF5cols = dsA->ncols(); // = R nrows (iterated)
const double n = static_cast<double>(nHDF5rows);
// var undefined for n < 2
if (nHDF5rows < 2) {
return Eigen::VectorXd::Constant(nHDF5cols,
std::numeric_limits<double>::quiet_NaN());
}
const hsize_t bs = agg_block_size(wsize, nHDF5cols, nHDF5rows);
std::vector<hsize_t> starts, sizes;
agg_make_blocks(nHDF5cols, bs, starts, sizes);
const std::vector<hsize_t> stride = {1, 1}, blk = {1, 1};
const int nthreads = static_cast<int>(
BigDataStatMeth::get_threads(bparal, threads));
Eigen::VectorXd result(nHDF5cols);
#pragma omp parallel for schedule(dynamic) num_threads(nthreads) \
shared(dsA, starts, sizes, result)
for (hsize_t bi = 0; bi < starts.size(); bi++) {
std::vector<double> vd(nHDF5rows * sizes[bi]);
//.. 20260325 - remove critical ..// #pragma omp critical(accessFile)
//.. 20260325 - remove critical ..// {
dsA->readDatasetBlock({0, starts[bi]}, {nHDF5rows, sizes[bi]}, stride, blk, vd.data());
//.. 20260325 - remove critical ..// }
// Map as (ncols_R × block_rrows) RowMajor
Eigen::Map<const RMMatd> X(vd.data(),
static_cast<Eigen::Index>(nHDF5rows),
static_cast<Eigen::Index>(sizes[bi]));
// Computational formula over R-rows in this block:
// var_row = (sum_sq_row - sum_row^2 / n) / (n - 1)
// where n = ncols_R (HDF5 nrows, all loaded)
const Eigen::RowVectorXd rowsum = X.colwise().sum();
const Eigen::RowVectorXd rowsumsq = X.colwise().squaredNorm();
result.segment(starts[bi], sizes[bi]) =
((rowsumsq.array() - rowsum.array().square() / n) /
(n - 1.0)).transpose();
}
return result;
} catch (H5::FileIException& e) {
throw std::runtime_error("c++ exception get_HDF5_rowVars (File IException): "
+ std::string(e.getDetailMsg()));
} catch (H5::DataSetIException& e) {
throw std::runtime_error("c++ exception get_HDF5_rowVars (DataSet IException): "
+ std::string(e.getDetailMsg()));
} catch (std::exception& e) {
throw std::runtime_error(std::string("c++ exception get_HDF5_rowVars: ")
+ e.what());
}
}8 Usage Example
#include "BigDataStatMeth.hpp"
// Example usage
auto result = get_HDF5_rowVars(...);