RcppTSQRHdf5

C++ Function Reference

1 Signature

void BigDataStatMeth::RcppTSQRHdf5(BigDataStatMeth::hdf5Dataset *dsA, BigDataStatMeth::hdf5Dataset *dsQ, BigDataStatMeth::hdf5Dataset *dsR, bool bthin, Rcpp::Nullable< int > block_size, Rcpp::Nullable< int > threads)

2 Description

TSQR (Tall-Skinny QR) decomposition for HDF5 matrices.

3 Parameters

  • dsA (BigDataStatMeth::hdf5Dataset *): Input matrix dataset (m × n, m >> n recommended)
  • dsQ (BigDataStatMeth::hdf5Dataset *): Output Q matrix dataset
  • dsR (BigDataStatMeth::hdf5Dataset *): Output R matrix dataset
  • bthin (bool): If true, Q is m×k (thin); if false, Q is m×m (full)
  • block_size (Rcpp::Nullable< int >): Minimum rows per block (NULL = auto based on threads)
  • threads (Rcpp::Nullable< int >): OpenMP threads for the local QR phase (NULL = auto)

4 Details

Parallel QR for tall-skinny matrices (m >> n). Divides A into row blocks, factorises each block in parallel via OpenMP, then reduces the local R factors with a sequential QR on their stack. Assembles the global thin Q from local Q factors and the reduction Q.

5 Call Graph

Function dependencies

6 Source Code

File: inst/include/hdf5Algebra/matrixQR.hppLines 424-644

inline void RcppTSQRHdf5( BigDataStatMeth::hdf5Dataset* dsA,
                           BigDataStatMeth::hdf5Dataset* dsQ,
                           BigDataStatMeth::hdf5Dataset* dsR,
                           bool bthin,
                           Rcpp::Nullable<int> block_size = R_NilValue,
                           Rcpp::Nullable<int> threads    = R_NilValue )
{
    try {

        // ── HDF5 dimension convention (package-wide) ──────────────────────────
        // nrows()   = HDF5 first dim  = R ncols  (n)
        // ncols()   = HDF5 second dim = R nrows  (m)
        const int n = static_cast<int>(dsA->nrows());   // R columns
        const int m = static_cast<int>(dsA->ncols());   // R rows

        if (n <= 0 || m <= 0)
            throw std::runtime_error("RcppTSQRHdf5: empty matrix");
        if (m < n)
            throw std::runtime_error(
                "RcppTSQRHdf5: matrix must have m >= n (use method='lapack' for wide/square matrices)");
        
        if (m < n)
            throw std::runtime_error("RcppTSQRHdf5: matrix must have m >= n");
        
        // Warn if not genuinely tall-skinny — LAPACK will be more efficient
        if (m < 2 * n)
            Rf_warning("method='tsqr' is inefficient for nearly-square matrices "
                           "(m < 2*n); consider method='lapack' or method='auto'");
        

        // ── Thread count (CRAN-compliant via get_number_threads) ──────────────
        const int nthreads = static_cast<int>( get_number_threads(threads, R_NilValue));
        
        // Guard: nthreads must be >= 1
        // get_number_threads returns unsigned; if cast gives <= 0, clamp to 1
        const int safe_nthreads = (nthreads > 0) ? nthreads : 1;
        
        // ── Block size: each block needs at least n rows for a rank-n local QR ─
        int bsize;
        if (block_size.isNotNull()) {
            bsize = std::max(n, Rcpp::as<int>(block_size));
        } else {
            // Auto: spread m rows evenly across threads, min n rows per block
            //.. 20260428 ..// bsize = std::max(n, static_cast<int>((m + nthreads - 1) / nthreads));
            bsize = std::max(n, static_cast<int>((m + safe_nthreads - 1) / safe_nthreads));
        }

        const int nblocks = (m + bsize - 1) / bsize;

        // ── Storage for local Q and R factors ─────────────────────────────────
        std::vector<Eigen::MatrixXd> local_R(nblocks);
        std::vector<Eigen::MatrixXd> local_Q(nblocks);

        const std::vector<hsize_t> stride = {1, 1}, blk = {1, 1};

        // ── Step 1: parallel local QR per row block ───────────────────────────
        //.. 20260428 ..// #pragma omp parallel for num_threads(nthreads) schedule(dynamic)
        #pragma omp parallel for num_threads(safe_nthreads) schedule(dynamic)
        for (int b = 0; b < nblocks; ++b) {
            const int row_start = b * bsize;
            const int brows     = std::min(bsize, m - row_start);

            // Read block A[row_start : row_start+brows, :]  from HDF5.
            // In HDF5 storage (transposed): columns are R rows, rows are R cols.
            //   offset = {0, row_start}   (HDF5 dim0=R-cols, HDF5 dim1=R-rows)
            //   count  = {n, brows}
            std::vector<double> vd(static_cast<std::size_t>(n) * brows);
            #pragma omp critical(hdf5_tsqr_read)
            {
                dsA->readDatasetBlock(
                    {0, static_cast<hsize_t>(row_start)},
                    {static_cast<hsize_t>(n), static_cast<hsize_t>(brows)},
                    stride, blk, vd.data());
            }

            // Map to Eigen row-major (n × brows) then transpose → brows × n
            Eigen::MatrixXd Ablock =
                Eigen::Map<Eigen::Matrix<double, Eigen::Dynamic,
                                         Eigen::Dynamic, Eigen::RowMajor>>(
                    vd.data(), n, brows);
            Ablock.transposeInPlace();   // now brows × n  (R dimensions)

            // Local thin QR:  Ablock (brows×n) = Q_b (brows×n) · R_b (n×n)
            Eigen::HouseholderQR<Eigen::MatrixXd> qr_local(Ablock);
            local_R[b] = qr_local.matrixQR().topRows(n)
                                  .template triangularView<Eigen::Upper>();
            // Thin Q: brows × n
            local_Q[b] = qr_local.householderQ() *
                         Eigen::MatrixXd::Identity(brows, n);
        }

        // ── Step 2: stack local R factors and compute top-level QR ───────────
        // R_stack is (nblocks·n) × n
        Eigen::MatrixXd R_stack(nblocks * n, n);
        for (int b = 0; b < nblocks; ++b)
            R_stack.block(b * n, 0, n, n) = local_R[b];

        Eigen::HouseholderQR<Eigen::MatrixXd> qr_top(R_stack);

        // Final R (n×n), upper triangular
        const Eigen::MatrixXd R_final =
            qr_top.matrixQR().topRows(n)
                              .template triangularView<Eigen::Upper>();

        // Q_top: (nblocks·n) × n  (thin factor of R_stack)
        const Eigen::MatrixXd Q_top =
            qr_top.householderQ() *
            Eigen::MatrixXd::Identity(nblocks * n, n);

        // ── Step 3: assemble global thin Q (m×n) ─────────────────────────────
        // Q[block_b rows] = local_Q[b]  ·  Q_top[b·n : (b+1)·n, :]
        Eigen::MatrixXd Q_thin(m, n);
        for (int b = 0; b < nblocks; ++b) {
            const int row_start = b * bsize;
            const int brows     = std::min(bsize, m - row_start);
            Q_thin.block(row_start, 0, brows, n) =
                local_Q[b] * Q_top.block(b * n, 0, n, n);
        }
        
        // ── Step 4 + 5: write Q and R ─────────────────────────────────────────────
        // R is written ONCE with the correct final dimensions — never twice.
        if (bthin) {
            // Thin R: n × n
            dsR->inheritCompressionLevel(dsA->getCompressionLevel());
            dsR->createDataset(n, n, "real");
            dsR->writeDataset(Rcpp::wrap(R_final));
            
            // Thin Q: m × n
            dsQ->inheritCompressionLevel(dsA->getCompressionLevel());
            dsQ->createDataset(m, n, "real");
            dsQ->writeDataset(Rcpp::wrap(Q_thin));
            
        } else {
            // Full Q: m × m via basis completion
            Eigen::HouseholderQR<Eigen::MatrixXd> qr_full(Q_thin);
            Eigen::MatrixXd Q_full = qr_full.householderQ()
                * Eigen::MatrixXd::Identity(m, m);
            const Eigen::VectorXd diag_signs =
            qr_full.matrixQR().diagonal().head(n).array().sign().matrix();
            for (int i = 0; i < n; ++i)
                Q_full.col(i) *= diag_signs(i);
            
            dsQ->inheritCompressionLevel(dsA->getCompressionLevel());
            dsQ->createDataset(m, m, "real");
            dsQ->writeDataset(Rcpp::wrap(Q_full));
            
            // Full R: m × n (padded with zeros below row n) — created ONCE
            Eigen::MatrixXd R_padded = Eigen::MatrixXd::Zero(m, n);
            R_padded.topRows(n) = R_final;
            dsR->inheritCompressionLevel(dsA->getCompressionLevel());
            dsR->createDataset(m, n, "real");
            dsR->writeDataset(Rcpp::wrap(R_padded));
        }

        // // ── Step 4: write R ───────────────────────────────────────────────────
        // dsR->inheritCompressionLevel(dsA->getCompressionLevel());
        // dsR->createDataset(n, n, "real");
        // dsR->writeDataset(Rcpp::wrap(R_final));
        // 
        // // ── Step 5: write Q ───────────────────────────────────────────────────
        // if (bthin) {
        //     // Thin Q: m × n
        //     dsQ->inheritCompressionLevel(dsA->getCompressionLevel());
        //     dsQ->createDataset(m, n, "real");
        //     dsQ->writeDataset(Rcpp::wrap(Q_thin));
        // } else {
        //     // Full Q: m × m
        //     // Basis completion: compute QR of Q_thin directly.
        //     // Since Q_thin already has orthonormal columns, its Householder QR
        //     // has R = I_n, so householderQ() returns a full m×m orthogonal matrix
        //     // whose first n columns ARE Q_thin.  This guarantees:
        //     //   Q_full[:, 1:n]  = Q_thin
        //     //   Q_full %*% R_padded = Q_thin %*% R_final = A  (exact reconstruction)
        //     //
        //     // Previous approach (QR of [Q_thin | random]) was INCORRECT because
        //     // Householder reorders column pivots and Q_thin is not preserved
        //     // as the first n columns of the result. (20260304)
        //     //
        //     // Cost: O(m² n) — expensive for large m; use thin=TRUE when possible.
        //     
        //     //.. 20260304 ..// Eigen::HouseholderQR<Eigen::MatrixXd> qr_full(Q_thin);
        //     //.. 20260304 ..// const Eigen::MatrixXd Q_full = qr_full.householderQ();
        // 
        //     // Gram-Schmidt: Q_full[:,1:n] = Q_thin por construcción → garantiza
        //     // Q_full * R_padded = Q_thin * R_final = A  (20260304)
        //     
        //     Eigen::HouseholderQR<Eigen::MatrixXd> qr_full(Q_thin);
        //     Eigen::MatrixXd Q_full = qr_full.householderQ()
        //         * Eigen::MatrixXd::Identity(m, m);
        //     // Correct sign flips: Q_full[:,i] *= sign(R_ii) for i=0..n-1
        //     // This makes Q_full[:,1:n] == Q_thin exactly (R diagonal is ±1 since Q_thin is orthonormal)
        //     const Eigen::VectorXd diag_signs =
        //     qr_full.matrixQR().diagonal().head(n).array().sign().matrix();
        //     for (int i = 0; i < n; ++i)
        //         Q_full.col(i) *= diag_signs(i);
        //     
        //     dsQ->inheritCompressionLevel(dsA->getCompressionLevel());
        //     dsQ->createDataset(m, m, "real");
        //     dsQ->writeDataset(Rcpp::wrap(Q_full));
        // 
        //     // Pad R to m × n (zeros below row n) to match full-QR convention
        //     Eigen::MatrixXd R_padded = Eigen::MatrixXd::Zero(m, n);
        //     R_padded.topRows(n) = R_final;
        //     // Re-create R dataset with updated dimensions
        //     dsR->inheritCompressionLevel(dsA->getCompressionLevel());
        //     dsR->createDataset(m, n, "real");
        //     dsR->writeDataset(Rcpp::wrap(R_padded));
        // }

    } catch( H5::FileIException& error ) {
        throw std::runtime_error("c++ exception RcppTSQRHdf5 (File IException)");
    } catch( H5::GroupIException& error ) {
        throw std::runtime_error("c++ exception RcppTSQRHdf5 (Group IException)");
    } catch( H5::DataSetIException& error ) {
        throw std::runtime_error("c++ exception RcppTSQRHdf5 (DataSet IException)");
    } catch(std::exception& ex) {
        throw std::runtime_error(std::string("c++ exception RcppTSQRHdf5: ") + ex.what());
    }

    return void();
}

7 Usage Example

#include "BigDataStatMeth.hpp"

// Example usage
auto result = RcppTSQRHdf5(...);