get_HDF5_colVars

C++ Function Reference

1 Signature

Eigen::VectorXd BigDataStatMeth::get_HDF5_colVars(BigDataStatMeth::hdf5Dataset *dsA, bool bparal, Rcpp::Nullable< int > wsize, Rcpp::Nullable< int > threads)

2 Description

Column variances of an HDF5 matrix (block-wise, parallel).

3 Parameters

  • dsA (BigDataStatMeth::hdf5Dataset *): Open HDF5 dataset.
  • bparal (bool): Enable OpenMP parallelism.
  • wsize (Rcpp::Nullable< int >): Block size (NULL = auto).
  • threads (Rcpp::Nullable< int >): Thread count (NULL = auto).

4 Returns

Vector of length ncols_R.

5 Details

Equivalent to apply(X, 2, var) — uses Bessel’s correction (n-1). If nrow_R == 1 the result is a vector of NAs, matching base R behaviour.

6 Call Graph

Function dependencies

7 Source Code

File: inst/include/hdf5Algebra/matrixAggregations.hppLines 406-467

inline Eigen::VectorXd get_HDF5_colVars(BigDataStatMeth::hdf5Dataset* dsA,
                                         bool bparal,
                                         Rcpp::Nullable<int> wsize,
                                         Rcpp::Nullable<int> threads)
{
    try {
        const hsize_t nHDF5rows = dsA->nrows();  // R ncols (iterated)
        const hsize_t nHDF5cols = dsA->ncols();  // R nrows (fixed)
        const double n = static_cast<double>(nHDF5cols);

        // var undefined for n < 2 — return NaN vector (same as R)
        if (nHDF5cols < 2) {
            return Eigen::VectorXd::Constant(nHDF5rows,
                                             std::numeric_limits<double>::quiet_NaN());
        }

        const hsize_t bs = agg_block_size(wsize, nHDF5rows, nHDF5cols);

        std::vector<hsize_t> starts, sizes;
        agg_make_blocks(nHDF5rows, bs, starts, sizes);

        const std::vector<hsize_t> stride = {1, 1}, blk = {1, 1};
        const int nthreads = static_cast<int>(
            BigDataStatMeth::get_threads(bparal, threads));

        Eigen::VectorXd result(nHDF5rows);

        #pragma omp parallel for schedule(dynamic) num_threads(nthreads) \
                shared(dsA, starts, sizes, result)
        for (hsize_t bi = 0; bi < starts.size(); bi++) {
            std::vector<double> vd(sizes[bi] * nHDF5cols);
            //.. 20260325 - remove critical ..// #pragma omp critical(accessFile)
            //.. 20260325 - remove critical ..// { 
            dsA->readDatasetBlock({starts[bi], 0}, {sizes[bi], nHDF5cols},stride, blk, vd.data()); 
            //.. 20260325 - remove critical ..// }

            Eigen::Map<const RMMatd> X(vd.data(),
                static_cast<Eigen::Index>(sizes[bi]),
                static_cast<Eigen::Index>(nHDF5cols));

            // Computational formula: var = (sum_sq - sum^2/n) / (n-1)
            // Each row of X corresponds to one R-column (all n R-rows present)
            const Eigen::VectorXd colsum   = X.rowwise().sum();
            const Eigen::VectorXd colsumsq = X.rowwise().squaredNorm();

            result.segment(starts[bi], sizes[bi]) =
                (colsumsq.array() - colsum.array().square() / n) / (n - 1.0);
        }

        return result;

    } catch (H5::FileIException& e) {
        throw std::runtime_error("c++ exception get_HDF5_colVars (File IException): "
                                 + std::string(e.getDetailMsg()));
    } catch (H5::DataSetIException& e) {
        throw std::runtime_error("c++ exception get_HDF5_colVars (DataSet IException): "
                                 + std::string(e.getDetailMsg()));
    } catch (std::exception& e) {
        throw std::runtime_error(std::string("c++ exception get_HDF5_colVars: ")
                                 + e.what());
    }
}

8 Usage Example

#include "BigDataStatMeth.hpp"

// Example usage
auto result = get_HDF5_colVars(...);