get_HDF5_scalar_var
C++ Function Reference
1 Signature
double BigDataStatMeth::get_HDF5_scalar_var(BigDataStatMeth::hdf5Dataset *dsA, bool bparal, Rcpp::Nullable< int > wsize, Rcpp::Nullable< int > threads)2 Description
Variance of all elements of an HDF5 matrix (treated as a flat vector).
3 Parameters
dsA(BigDataStatMeth::hdf5Dataset *): Open HDF5 dataset.bparal(bool): Enable OpenMP parallelism.wsize(Rcpp::Nullable< int >): Block size (NULL = auto).threads(Rcpp::Nullable< int >): Thread count (NULL = auto).
4 Returns
Scalar variance.
5 Details
Equivalent to var(as.vector(X)) — uses Bessel’s correction (N-1).
6 Call Graph
7 Source Code
NoteImplementation
File: inst/include/hdf5Algebra/matrixAggregations.hpp • Lines 1108-1165
inline double get_HDF5_scalar_var(BigDataStatMeth::hdf5Dataset* dsA,
bool bparal,
Rcpp::Nullable<int> wsize,
Rcpp::Nullable<int> threads)
{
try {
const hsize_t nHDF5rows = dsA->nrows();
const hsize_t nHDF5cols = dsA->ncols();
const double N = static_cast<double>(nHDF5rows) *
static_cast<double>(nHDF5cols);
if (N < 2.0)
return std::numeric_limits<double>::quiet_NaN();
const hsize_t bs = agg_block_size(wsize, nHDF5rows, nHDF5cols);
std::vector<hsize_t> starts, sizes;
agg_make_blocks(nHDF5rows, bs, starts, sizes);
const std::vector<hsize_t> stride = {1, 1}, blk = {1, 1};
const int nthreads = static_cast<int>(
BigDataStatMeth::get_threads(bparal, threads));
double total_sum = 0.0;
double total_sumsq = 0.0;
#pragma omp parallel for schedule(dynamic) num_threads(nthreads) \
shared(dsA, starts, sizes) \
reduction(+:total_sum, total_sumsq)
for (hsize_t bi = 0; bi < starts.size(); bi++) {
std::vector<double> vd(sizes[bi] * nHDF5cols);
//.. 20260325 - remove critical ..// #pragma omp critical(accessFile)
//.. 20260325 - remove critical ..// {
dsA->readDatasetBlock({starts[bi], 0}, {sizes[bi], nHDF5cols}, stride, blk, vd.data());
//.. 20260325 - remove critical ..// }
Eigen::Map<const RMMatd> X(vd.data(),
static_cast<Eigen::Index>(sizes[bi]),
static_cast<Eigen::Index>(nHDF5cols));
total_sum += X.sum();
total_sumsq += X.array().square().sum();
}
// Computational formula: var = (sum_sq - sum^2/N) / (N-1)
return (total_sumsq - total_sum * total_sum / N) / (N - 1.0);
} catch (H5::FileIException& e) {
throw std::runtime_error("c++ exception get_HDF5_scalar_var (File IException): "
+ std::string(e.getDetailMsg()));
} catch (H5::DataSetIException& e) {
throw std::runtime_error("c++ exception get_HDF5_scalar_var (DataSet IException): "
+ std::string(e.getDetailMsg()));
} catch (std::exception& e) {
throw std::runtime_error(std::string("c++ exception get_HDF5_scalar_var: ")
+ e.what());
}
}8 Usage Example
#include "BigDataStatMeth.hpp"
// Example usage
auto result = get_HDF5_scalar_var(...);