RcppTSQRHdf5

C++ Function Reference

1 Signature

void BigDataStatMeth::RcppTSQRHdf5(BigDataStatMeth::hdf5Dataset *dsA, BigDataStatMeth::hdf5Dataset *dsQ, BigDataStatMeth::hdf5Dataset *dsR, bool bthin, Rcpp::Nullable< int > block_size, Rcpp::Nullable< int > threads)

2 Description

TSQR (Tall-Skinny QR) decomposition for HDF5 matrices.

3 Parameters

dsA (BigDataStatMeth::hdf5Dataset *): Input matrix dataset (m × n, m >> n recommended)
dsQ (BigDataStatMeth::hdf5Dataset *): Output Q matrix dataset
dsR (BigDataStatMeth::hdf5Dataset *): Output R matrix dataset
bthin (bool): If true, Q is m×k (thin); if false, Q is m×m (full)
block_size (Rcpp::Nullable< int >): Minimum rows per block (NULL = auto based on threads)
threads (Rcpp::Nullable< int >): OpenMP threads for the local QR phase (NULL = auto)

4 Details

Parallel QR for tall-skinny matrices (m >> n). Divides A into row blocks, factorises each block in parallel via OpenMP, then reduces the local R factors with a sequential QR on their stack. Assembles the global thin Q from local Q factors and the reduction Q.

5 Call Graph

6 Source Code

Implementation

File: inst/include/hdf5Algebra/matrixQR.hpp • Lines 424-644

inline void RcppTSQRHdf5( BigDataStatMeth::hdf5Dataset* dsA,
                           BigDataStatMeth::hdf5Dataset* dsQ,
                           BigDataStatMeth::hdf5Dataset* dsR,
                           bool bthin,
                           Rcpp::Nullable<int> block_size = R_NilValue,
                           Rcpp::Nullable<int> threads    = R_NilValue )
{
    try {

        // ── HDF5 dimension convention (package-wide) ──────────────────────────
        // nrows()   = HDF5 first dim  = R ncols  (n)
        // ncols()   = HDF5 second dim = R nrows  (m)
        const int n = static_cast<int>(dsA->nrows());   // R columns
        const int m = static_cast<int>(dsA->ncols());   // R rows

        if (n <= 0 || m <= 0)
            throw std::runtime_error("RcppTSQRHdf5: empty matrix");
        if (m < n)
            throw std::runtime_error(
                "RcppTSQRHdf5: matrix must have m >= n (use method='lapack' for wide/square matrices)");
        
        if (m < n)
            throw std::runtime_error("RcppTSQRHdf5: matrix must have m >= n");
        
        // Warn if not genuinely tall-skinny — LAPACK will be more efficient
        if (m < 2 * n)
            Rf_warning("method='tsqr' is inefficient for nearly-square matrices "
                           "(m < 2*n); consider method='lapack' or method='auto'");
        

        // ── Thread count (CRAN-compliant via get_number_threads) ──────────────
        const int nthreads = static_cast<int>( get_number_threads(threads, R_NilValue));
        
        // Guard: nthreads must be >= 1
        // get_number_threads returns unsigned; if cast gives <= 0, clamp to 1
        const int safe_nthreads = (nthreads > 0) ? nthreads : 1;
        
        // ── Block size: each block needs at least n rows for a rank-n local QR ─
        int bsize;
        if (block_size.isNotNull()) {
            bsize = std::max(n, Rcpp::as<int>(block_size));
        } else {
            // Auto: spread m rows evenly across threads, min n rows per block
            //.. 20260428 ..// bsize = std::max(n, static_cast<int>((m + nthreads - 1) / nthreads));
            bsize = std::max(n, static_cast<int>((m + safe_nthreads - 1) / safe_nthreads));
        }

        const int nblocks = (m + bsize - 1) / bsize;

        // ── Storage for local Q and R factors ─────────────────────────────────
        std::vector<Eigen::MatrixXd> local_R(nblocks);
        std::vector<Eigen::MatrixXd> local_Q(nblocks);

        const std::vector<hsize_t> stride = {1, 1}, blk = {1, 1};

        // ── Step 1: parallel local QR per row block ───────────────────────────
        //.. 20260428 ..// #pragma omp parallel for num_threads(nthreads) schedule(dynamic)
        #pragma omp parallel for num_threads(safe_nthreads) schedule(dynamic)
        for (int b = 0; b < nblocks; ++b) {
            const int row_start = b * bsize;
            const int brows     = std::min(bsize, m - row_start);

            // Read block A[row_start : row_start+brows, :]  from HDF5.
            // In HDF5 storage (transposed): columns are R rows, rows are R cols.
            //   offset = {0, row_start}   (HDF5 dim0=R-cols, HDF5 dim1=R-rows)
            //   count  = {n, brows}
            std::vector<double> vd(static_cast<std::size_t>(n) * brows);
            #pragma omp critical(hdf5_tsqr_read)
            {
                dsA->readDatasetBlock(
                    {0, static_cast<hsize_t>(row_start)},
                    {static_cast<hsize_t>(n), static_cast<hsize_t>(brows)},
                    stride, blk, vd.data());
            }

            // Map to Eigen row-major (n × brows) then transpose → brows × n
            Eigen::MatrixXd Ablock =
                Eigen::Map<Eigen::Matrix<double, Eigen::Dynamic,
                                         Eigen::Dynamic, Eigen::RowMajor>>(
                    vd.data(), n, brows);
            Ablock.transposeInPlace();   // now brows × n  (R dimensions)

            // Local thin QR:  Ablock (brows×n) = Q_b (brows×n) · R_b (n×n)
            Eigen::HouseholderQR<Eigen::MatrixXd> qr_local(Ablock);
            local_R[b] = qr_local.matrixQR().topRows(n)
                                  .template triangularView<Eigen::Upper>();
            // Thin Q: brows × n
            local_Q[b] = qr_local.householderQ() *
                         Eigen::MatrixXd::Identity(brows, n);
        }

        // ── Step 2: stack local R factors and compute top-level QR ───────────
        // R_stack is (nblocks·n) × n
        Eigen::MatrixXd R_stack(nblocks * n, n);
        for (int b = 0; b < nblocks; ++b)
            R_stack.block(b * n, 0, n, n) = local_R[b];

        Eigen::HouseholderQR<Eigen::MatrixXd> qr_top(R_stack);

        // Final R (n×n), upper triangular
        const Eigen::MatrixXd R_final =
            qr_top.matrixQR().topRows(n)
                              .template triangularView<Eigen::Upper>();

        // Q_top: (nblocks·n) × n  (thin factor of R_stack)
        const Eigen::MatrixXd Q_top =
            qr_top.householderQ() *
            Eigen::MatrixXd::Identity(nblocks * n, n);

        // ── Step 3: assemble global thin Q (m×n) ─────────────────────────────
        // Q[block_b rows] = local_Q[b]  ·  Q_top[b·n : (b+1)·n, :]
        Eigen::MatrixXd Q_thin(m, n);
        for (int b = 0; b < nblocks; ++b) {
            const int row_start = b * bsize;
            const int brows     = std::min(bsize, m - row_start);
            Q_thin.block(row_start, 0, brows, n) =
                local_Q[b] * Q_top.block(b * n, 0, n, n);
        }
        
        // ── Step 4 + 5: write Q and R ─────────────────────────────────────────────
        // R is written ONCE with the correct final dimensions — never twice.
        if (bthin) {
            // Thin R: n × n
            dsR->inheritCompressionLevel(dsA->getCompressionLevel());
            dsR->createDataset(n, n, "real");
            dsR->writeDataset(Rcpp::wrap(R_final));
            
            // Thin Q: m × n
            dsQ->inheritCompressionLevel(dsA->getCompressionLevel());
            dsQ->createDataset(m, n, "real");
            dsQ->writeDataset(Rcpp::wrap(Q_thin));
            
        } else {
            // Full Q: m × m via basis completion
            Eigen::HouseholderQR<Eigen::MatrixXd> qr_full(Q_thin);
            Eigen::MatrixXd Q_full = qr_full.householderQ()
                * Eigen::MatrixXd::Identity(m, m);
            const Eigen::VectorXd diag_signs =
            qr_full.matrixQR().diagonal().head(n).array().sign().matrix();
            for (int i = 0; i < n; ++i)
                Q_full.col(i) *= diag_signs(i);
            
            dsQ->inheritCompressionLevel(dsA->getCompressionLevel());
            dsQ->createDataset(m, m, "real");
            dsQ->writeDataset(Rcpp::wrap(Q_full));
            
            // Full R: m × n (padded with zeros below row n) — created ONCE
            Eigen::MatrixXd R_padded = Eigen::MatrixXd::Zero(m, n);
            R_padded.topRows(n) = R_final;
            dsR->inheritCompressionLevel(dsA->getCompressionLevel());
            dsR->createDataset(m, n, "real");
            dsR->writeDataset(Rcpp::wrap(R_padded));
        }

        // // ── Step 4: write R ───────────────────────────────────────────────────
        // dsR->inheritCompressionLevel(dsA->getCompressionLevel());
        // dsR->createDataset(n, n, "real");
        // dsR->writeDataset(Rcpp::wrap(R_final));
        // 
        // // ── Step 5: write Q ───────────────────────────────────────────────────
        // if (bthin) {
        //     // Thin Q: m × n
        //     dsQ->inheritCompressionLevel(dsA->getCompressionLevel());
        //     dsQ->createDataset(m, n, "real");
        //     dsQ->writeDataset(Rcpp::wrap(Q_thin));
        // } else {
        //     // Full Q: m × m
        //     // Basis completion: compute QR of Q_thin directly.
        //     // Since Q_thin already has orthonormal columns, its Householder QR
        //     // has R = I_n, so householderQ() returns a full m×m orthogonal matrix
        //     // whose first n columns ARE Q_thin.  This guarantees:
        //     //   Q_full[:, 1:n]  = Q_thin
        //     //   Q_full %*% R_padded = Q_thin %*% R_final = A  (exact reconstruction)
        //     //
        //     // Previous approach (QR of [Q_thin | random]) was INCORRECT because
        //     // Householder reorders column pivots and Q_thin is not preserved
        //     // as the first n columns of the result. (20260304)
        //     //
        //     // Cost: O(m² n) — expensive for large m; use thin=TRUE when possible.
        //     
        //     //.. 20260304 ..// Eigen::HouseholderQR<Eigen::MatrixXd> qr_full(Q_thin);
        //     //.. 20260304 ..// const Eigen::MatrixXd Q_full = qr_full.householderQ();
        // 
        //     // Gram-Schmidt: Q_full[:,1:n] = Q_thin por construcción → garantiza
        //     // Q_full * R_padded = Q_thin * R_final = A  (20260304)
        //     
        //     Eigen::HouseholderQR<Eigen::MatrixXd> qr_full(Q_thin);
        //     Eigen::MatrixXd Q_full = qr_full.householderQ()
        //         * Eigen::MatrixXd::Identity(m, m);
        //     // Correct sign flips: Q_full[:,i] *= sign(R_ii) for i=0..n-1
        //     // This makes Q_full[:,1:n] == Q_thin exactly (R diagonal is ±1 since Q_thin is orthonormal)
        //     const Eigen::VectorXd diag_signs =
        //     qr_full.matrixQR().diagonal().head(n).array().sign().matrix();
        //     for (int i = 0; i < n; ++i)
        //         Q_full.col(i) *= diag_signs(i);
        //     
        //     dsQ->inheritCompressionLevel(dsA->getCompressionLevel());
        //     dsQ->createDataset(m, m, "real");
        //     dsQ->writeDataset(Rcpp::wrap(Q_full));
        // 
        //     // Pad R to m × n (zeros below row n) to match full-QR convention
        //     Eigen::MatrixXd R_padded = Eigen::MatrixXd::Zero(m, n);
        //     R_padded.topRows(n) = R_final;
        //     // Re-create R dataset with updated dimensions
        //     dsR->inheritCompressionLevel(dsA->getCompressionLevel());
        //     dsR->createDataset(m, n, "real");
        //     dsR->writeDataset(Rcpp::wrap(R_padded));
        // }

    } catch( H5::FileIException& error ) {
        throw std::runtime_error("c++ exception RcppTSQRHdf5 (File IException)");
    } catch( H5::GroupIException& error ) {
        throw std::runtime_error("c++ exception RcppTSQRHdf5 (Group IException)");
    } catch( H5::DataSetIException& error ) {
        throw std::runtime_error("c++ exception RcppTSQRHdf5 (DataSet IException)");
    } catch(std::exception& ex) {
        throw std::runtime_error(std::string("c++ exception RcppTSQRHdf5: ") + ex.what());
    }

    return void();
}

7 Usage Example

#include "BigDataStatMeth.hpp"

// Example usage
auto result = RcppTSQRHdf5(...);

--- title: "RcppTSQRHdf5" subtitle: "C++ Function Reference" --- ## Signature ```cpp void BigDataStatMeth::RcppTSQRHdf5(BigDataStatMeth::hdf5Dataset *dsA, BigDataStatMeth::hdf5Dataset *dsQ, BigDataStatMeth::hdf5Dataset *dsR, bool bthin, Rcpp::Nullable< int > block_size, Rcpp::Nullable< int > threads) ``` ## Description TSQR (Tall-Skinny QR) decomposition for HDF5 matrices. ## Parameters - `dsA` (`BigDataStatMeth::hdf5Dataset *`): Input matrix dataset (m × n, m >> n recommended) - `dsQ` (`BigDataStatMeth::hdf5Dataset *`): Output Q matrix dataset - `dsR` (`BigDataStatMeth::hdf5Dataset *`): Output R matrix dataset - `bthin` (`bool`): If true, Q is m×k (thin); if false, Q is m×m (full) - `block_size` (`Rcpp::Nullable< int >`): Minimum rows per block (NULL = auto based on threads) - `threads` (`Rcpp::Nullable< int >`): OpenMP threads for the local QR phase (NULL = auto) ## Details Parallel QR for tall-skinny matrices (m >> n). Divides A into row blocks, factorises each block in parallel via OpenMP, then reduces the local R factors with a sequential QR on their stack. Assembles the global thin Q from local Q factors and the reduction Q. ## Call Graph ::: {.diagram-section} <object type="image/svg+xml" data="images/namespace_big_data_stat_meth_a9782a58cbfdaeb0648cd0f4aafbb648c_cgraph.svg" class="class-diagram"> <img src="images/namespace_big_data_stat_meth_a9782a58cbfdaeb0648cd0f4aafbb648c_cgraph.svg" alt="Function dependencies"/> </object> ::: ## Source Code ::: {.callout-note collapse="true"} ### Implementation **File:** `inst/include/hdf5Algebra/matrixQR.hpp` • **Lines 424-644** ```cpp inline void RcppTSQRHdf5( BigDataStatMeth::hdf5Dataset* dsA, BigDataStatMeth::hdf5Dataset* dsQ, BigDataStatMeth::hdf5Dataset* dsR, bool bthin, Rcpp::Nullable<int> block_size = R_NilValue, Rcpp::Nullable<int> threads = R_NilValue ) { try { // ── HDF5 dimension convention (package-wide) ────────────────────────── // nrows() = HDF5 first dim = R ncols (n) // ncols() = HDF5 second dim = R nrows (m) const int n = static_cast<int>(dsA->nrows()); // R columns const int m = static_cast<int>(dsA->ncols()); // R rows if (n <= 0 || m <= 0) throw std::runtime_error("RcppTSQRHdf5: empty matrix"); if (m < n) throw std::runtime_error( "RcppTSQRHdf5: matrix must have m >= n (use method='lapack' for wide/square matrices)"); if (m < n) throw std::runtime_error("RcppTSQRHdf5: matrix must have m >= n"); // Warn if not genuinely tall-skinny — LAPACK will be more efficient if (m < 2 * n) Rf_warning("method='tsqr' is inefficient for nearly-square matrices " "(m < 2*n); consider method='lapack' or method='auto'"); // ── Thread count (CRAN-compliant via get_number_threads) ────────────── const int nthreads = static_cast<int>( get_number_threads(threads, R_NilValue)); // Guard: nthreads must be >= 1 // get_number_threads returns unsigned; if cast gives <= 0, clamp to 1 const int safe_nthreads = (nthreads > 0) ? nthreads : 1; // ── Block size: each block needs at least n rows for a rank-n local QR ─ int bsize; if (block_size.isNotNull()) { bsize = std::max(n, Rcpp::as<int>(block_size)); } else { // Auto: spread m rows evenly across threads, min n rows per block //.. 20260428 ..// bsize = std::max(n, static_cast<int>((m + nthreads - 1) / nthreads)); bsize = std::max(n, static_cast<int>((m + safe_nthreads - 1) / safe_nthreads)); } const int nblocks = (m + bsize - 1) / bsize; // ── Storage for local Q and R factors ───────────────────────────────── std::vector<Eigen::MatrixXd> local_R(nblocks); std::vector<Eigen::MatrixXd> local_Q(nblocks); const std::vector<hsize_t> stride = {1, 1}, blk = {1, 1}; // ── Step 1: parallel local QR per row block ─────────────────────────── //.. 20260428 ..// #pragma omp parallel for num_threads(nthreads) schedule(dynamic) #pragma omp parallel for num_threads(safe_nthreads) schedule(dynamic) for (int b = 0; b < nblocks; ++b) { const int row_start = b * bsize; const int brows = std::min(bsize, m - row_start); // Read block A[row_start : row_start+brows, :] from HDF5. // In HDF5 storage (transposed): columns are R rows, rows are R cols. // offset = {0, row_start} (HDF5 dim0=R-cols, HDF5 dim1=R-rows) // count = {n, brows} std::vector<double> vd(static_cast<std::size_t>(n) * brows); #pragma omp critical(hdf5_tsqr_read) { dsA->readDatasetBlock( {0, static_cast<hsize_t>(row_start)}, {static_cast<hsize_t>(n), static_cast<hsize_t>(brows)}, stride, blk, vd.data()); } // Map to Eigen row-major (n × brows) then transpose → brows × n Eigen::MatrixXd Ablock = Eigen::Map<Eigen::Matrix<double, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor>>( vd.data(), n, brows); Ablock.transposeInPlace(); // now brows × n (R dimensions) // Local thin QR: Ablock (brows×n) = Q_b (brows×n) · R_b (n×n) Eigen::HouseholderQR<Eigen::MatrixXd> qr_local(Ablock); local_R[b] = qr_local.matrixQR().topRows(n) .template triangularView<Eigen::Upper>(); // Thin Q: brows × n local_Q[b] = qr_local.householderQ() * Eigen::MatrixXd::Identity(brows, n); } // ── Step 2: stack local R factors and compute top-level QR ─────────── // R_stack is (nblocks·n) × n Eigen::MatrixXd R_stack(nblocks * n, n); for (int b = 0; b < nblocks; ++b) R_stack.block(b * n, 0, n, n) = local_R[b]; Eigen::HouseholderQR<Eigen::MatrixXd> qr_top(R_stack); // Final R (n×n), upper triangular const Eigen::MatrixXd R_final = qr_top.matrixQR().topRows(n) .template triangularView<Eigen::Upper>(); // Q_top: (nblocks·n) × n (thin factor of R_stack) const Eigen::MatrixXd Q_top = qr_top.householderQ() * Eigen::MatrixXd::Identity(nblocks * n, n); // ── Step 3: assemble global thin Q (m×n) ───────────────────────────── // Q[block_b rows] = local_Q[b] · Q_top[b·n : (b+1)·n, :] Eigen::MatrixXd Q_thin(m, n); for (int b = 0; b < nblocks; ++b) { const int row_start = b * bsize; const int brows = std::min(bsize, m - row_start); Q_thin.block(row_start, 0, brows, n) = local_Q[b] * Q_top.block(b * n, 0, n, n); } // ── Step 4 + 5: write Q and R ───────────────────────────────────────────── // R is written ONCE with the correct final dimensions — never twice. if (bthin) { // Thin R: n × n dsR->inheritCompressionLevel(dsA->getCompressionLevel()); dsR->createDataset(n, n, "real"); dsR->writeDataset(Rcpp::wrap(R_final)); // Thin Q: m × n dsQ->inheritCompressionLevel(dsA->getCompressionLevel()); dsQ->createDataset(m, n, "real"); dsQ->writeDataset(Rcpp::wrap(Q_thin)); } else { // Full Q: m × m via basis completion Eigen::HouseholderQR<Eigen::MatrixXd> qr_full(Q_thin); Eigen::MatrixXd Q_full = qr_full.householderQ() * Eigen::MatrixXd::Identity(m, m); const Eigen::VectorXd diag_signs = qr_full.matrixQR().diagonal().head(n).array().sign().matrix(); for (int i = 0; i < n; ++i) Q_full.col(i) *= diag_signs(i); dsQ->inheritCompressionLevel(dsA->getCompressionLevel()); dsQ->createDataset(m, m, "real"); dsQ->writeDataset(Rcpp::wrap(Q_full)); // Full R: m × n (padded with zeros below row n) — created ONCE Eigen::MatrixXd R_padded = Eigen::MatrixXd::Zero(m, n); R_padded.topRows(n) = R_final; dsR->inheritCompressionLevel(dsA->getCompressionLevel()); dsR->createDataset(m, n, "real"); dsR->writeDataset(Rcpp::wrap(R_padded)); } // // ── Step 4: write R ─────────────────────────────────────────────────── // dsR->inheritCompressionLevel(dsA->getCompressionLevel()); // dsR->createDataset(n, n, "real"); // dsR->writeDataset(Rcpp::wrap(R_final)); // // // ── Step 5: write Q ─────────────────────────────────────────────────── // if (bthin) { // // Thin Q: m × n // dsQ->inheritCompressionLevel(dsA->getCompressionLevel()); // dsQ->createDataset(m, n, "real"); // dsQ->writeDataset(Rcpp::wrap(Q_thin)); // } else { // // Full Q: m × m // // Basis completion: compute QR of Q_thin directly. // // Since Q_thin already has orthonormal columns, its Householder QR // // has R = I_n, so householderQ() returns a full m×m orthogonal matrix // // whose first n columns ARE Q_thin. This guarantees: // // Q_full[:, 1:n] = Q_thin // // Q_full %*% R_padded = Q_thin %*% R_final = A (exact reconstruction) // // // // Previous approach (QR of [Q_thin | random]) was INCORRECT because // // Householder reorders column pivots and Q_thin is not preserved // // as the first n columns of the result. (20260304) // // // // Cost: O(m² n) — expensive for large m; use thin=TRUE when possible. // // //.. 20260304 ..// Eigen::HouseholderQR<Eigen::MatrixXd> qr_full(Q_thin); // //.. 20260304 ..// const Eigen::MatrixXd Q_full = qr_full.householderQ(); // // // Gram-Schmidt: Q_full[:,1:n] = Q_thin por construcción → garantiza // // Q_full * R_padded = Q_thin * R_final = A (20260304) // // Eigen::HouseholderQR<Eigen::MatrixXd> qr_full(Q_thin); // Eigen::MatrixXd Q_full = qr_full.householderQ() // * Eigen::MatrixXd::Identity(m, m); // // Correct sign flips: Q_full[:,i] *= sign(R_ii) for i=0..n-1 // // This makes Q_full[:,1:n] == Q_thin exactly (R diagonal is ±1 since Q_thin is orthonormal) // const Eigen::VectorXd diag_signs = // qr_full.matrixQR().diagonal().head(n).array().sign().matrix(); // for (int i = 0; i < n; ++i) // Q_full.col(i) *= diag_signs(i); // // dsQ->inheritCompressionLevel(dsA->getCompressionLevel()); // dsQ->createDataset(m, m, "real"); // dsQ->writeDataset(Rcpp::wrap(Q_full)); // // // Pad R to m × n (zeros below row n) to match full-QR convention // Eigen::MatrixXd R_padded = Eigen::MatrixXd::Zero(m, n); // R_padded.topRows(n) = R_final; // // Re-create R dataset with updated dimensions // dsR->inheritCompressionLevel(dsA->getCompressionLevel()); // dsR->createDataset(m, n, "real"); // dsR->writeDataset(Rcpp::wrap(R_padded)); // } } catch( H5::FileIException& error ) { throw std::runtime_error("c++ exception RcppTSQRHdf5 (File IException)"); } catch( H5::GroupIException& error ) { throw std::runtime_error("c++ exception RcppTSQRHdf5 (Group IException)"); } catch( H5::DataSetIException& error ) { throw std::runtime_error("c++ exception RcppTSQRHdf5 (DataSet IException)"); } catch(std::exception& ex) { throw std::runtime_error(std::string("c++ exception RcppTSQRHdf5: ") + ex.what()); } return void(); } ``` ::: ## Usage Example ```cpp #include "BigDataStatMeth.hpp" // Example usage auto result = RcppTSQRHdf5(...); ```