RcppTSQRHdf5
C++ Function Reference
1 Signature
void BigDataStatMeth::RcppTSQRHdf5(BigDataStatMeth::hdf5Dataset *dsA, BigDataStatMeth::hdf5Dataset *dsQ, BigDataStatMeth::hdf5Dataset *dsR, bool bthin, Rcpp::Nullable< int > block_size, Rcpp::Nullable< int > threads)2 Description
TSQR (Tall-Skinny QR) decomposition for HDF5 matrices.
3 Parameters
dsA(BigDataStatMeth::hdf5Dataset *): Input matrix dataset (m × n, m >> n recommended)dsQ(BigDataStatMeth::hdf5Dataset *): Output Q matrix datasetdsR(BigDataStatMeth::hdf5Dataset *): Output R matrix datasetbthin(bool): If true, Q is m×k (thin); if false, Q is m×m (full)block_size(Rcpp::Nullable< int >): Minimum rows per block (NULL = auto based on threads)threads(Rcpp::Nullable< int >): OpenMP threads for the local QR phase (NULL = auto)
4 Details
Parallel QR for tall-skinny matrices (m >> n). Divides A into row blocks, factorises each block in parallel via OpenMP, then reduces the local R factors with a sequential QR on their stack. Assembles the global thin Q from local Q factors and the reduction Q.
5 Call Graph
6 Source Code
NoteImplementation
File: inst/include/hdf5Algebra/matrixQR.hpp • Lines 424-644
inline void RcppTSQRHdf5( BigDataStatMeth::hdf5Dataset* dsA,
BigDataStatMeth::hdf5Dataset* dsQ,
BigDataStatMeth::hdf5Dataset* dsR,
bool bthin,
Rcpp::Nullable<int> block_size = R_NilValue,
Rcpp::Nullable<int> threads = R_NilValue )
{
try {
// ── HDF5 dimension convention (package-wide) ──────────────────────────
// nrows() = HDF5 first dim = R ncols (n)
// ncols() = HDF5 second dim = R nrows (m)
const int n = static_cast<int>(dsA->nrows()); // R columns
const int m = static_cast<int>(dsA->ncols()); // R rows
if (n <= 0 || m <= 0)
throw std::runtime_error("RcppTSQRHdf5: empty matrix");
if (m < n)
throw std::runtime_error(
"RcppTSQRHdf5: matrix must have m >= n (use method='lapack' for wide/square matrices)");
if (m < n)
throw std::runtime_error("RcppTSQRHdf5: matrix must have m >= n");
// Warn if not genuinely tall-skinny — LAPACK will be more efficient
if (m < 2 * n)
Rf_warning("method='tsqr' is inefficient for nearly-square matrices "
"(m < 2*n); consider method='lapack' or method='auto'");
// ── Thread count (CRAN-compliant via get_number_threads) ──────────────
const int nthreads = static_cast<int>( get_number_threads(threads, R_NilValue));
// Guard: nthreads must be >= 1
// get_number_threads returns unsigned; if cast gives <= 0, clamp to 1
const int safe_nthreads = (nthreads > 0) ? nthreads : 1;
// ── Block size: each block needs at least n rows for a rank-n local QR ─
int bsize;
if (block_size.isNotNull()) {
bsize = std::max(n, Rcpp::as<int>(block_size));
} else {
// Auto: spread m rows evenly across threads, min n rows per block
//.. 20260428 ..// bsize = std::max(n, static_cast<int>((m + nthreads - 1) / nthreads));
bsize = std::max(n, static_cast<int>((m + safe_nthreads - 1) / safe_nthreads));
}
const int nblocks = (m + bsize - 1) / bsize;
// ── Storage for local Q and R factors ─────────────────────────────────
std::vector<Eigen::MatrixXd> local_R(nblocks);
std::vector<Eigen::MatrixXd> local_Q(nblocks);
const std::vector<hsize_t> stride = {1, 1}, blk = {1, 1};
// ── Step 1: parallel local QR per row block ───────────────────────────
//.. 20260428 ..// #pragma omp parallel for num_threads(nthreads) schedule(dynamic)
#pragma omp parallel for num_threads(safe_nthreads) schedule(dynamic)
for (int b = 0; b < nblocks; ++b) {
const int row_start = b * bsize;
const int brows = std::min(bsize, m - row_start);
// Read block A[row_start : row_start+brows, :] from HDF5.
// In HDF5 storage (transposed): columns are R rows, rows are R cols.
// offset = {0, row_start} (HDF5 dim0=R-cols, HDF5 dim1=R-rows)
// count = {n, brows}
std::vector<double> vd(static_cast<std::size_t>(n) * brows);
#pragma omp critical(hdf5_tsqr_read)
{
dsA->readDatasetBlock(
{0, static_cast<hsize_t>(row_start)},
{static_cast<hsize_t>(n), static_cast<hsize_t>(brows)},
stride, blk, vd.data());
}
// Map to Eigen row-major (n × brows) then transpose → brows × n
Eigen::MatrixXd Ablock =
Eigen::Map<Eigen::Matrix<double, Eigen::Dynamic,
Eigen::Dynamic, Eigen::RowMajor>>(
vd.data(), n, brows);
Ablock.transposeInPlace(); // now brows × n (R dimensions)
// Local thin QR: Ablock (brows×n) = Q_b (brows×n) · R_b (n×n)
Eigen::HouseholderQR<Eigen::MatrixXd> qr_local(Ablock);
local_R[b] = qr_local.matrixQR().topRows(n)
.template triangularView<Eigen::Upper>();
// Thin Q: brows × n
local_Q[b] = qr_local.householderQ() *
Eigen::MatrixXd::Identity(brows, n);
}
// ── Step 2: stack local R factors and compute top-level QR ───────────
// R_stack is (nblocks·n) × n
Eigen::MatrixXd R_stack(nblocks * n, n);
for (int b = 0; b < nblocks; ++b)
R_stack.block(b * n, 0, n, n) = local_R[b];
Eigen::HouseholderQR<Eigen::MatrixXd> qr_top(R_stack);
// Final R (n×n), upper triangular
const Eigen::MatrixXd R_final =
qr_top.matrixQR().topRows(n)
.template triangularView<Eigen::Upper>();
// Q_top: (nblocks·n) × n (thin factor of R_stack)
const Eigen::MatrixXd Q_top =
qr_top.householderQ() *
Eigen::MatrixXd::Identity(nblocks * n, n);
// ── Step 3: assemble global thin Q (m×n) ─────────────────────────────
// Q[block_b rows] = local_Q[b] · Q_top[b·n : (b+1)·n, :]
Eigen::MatrixXd Q_thin(m, n);
for (int b = 0; b < nblocks; ++b) {
const int row_start = b * bsize;
const int brows = std::min(bsize, m - row_start);
Q_thin.block(row_start, 0, brows, n) =
local_Q[b] * Q_top.block(b * n, 0, n, n);
}
// ── Step 4 + 5: write Q and R ─────────────────────────────────────────────
// R is written ONCE with the correct final dimensions — never twice.
if (bthin) {
// Thin R: n × n
dsR->inheritCompressionLevel(dsA->getCompressionLevel());
dsR->createDataset(n, n, "real");
dsR->writeDataset(Rcpp::wrap(R_final));
// Thin Q: m × n
dsQ->inheritCompressionLevel(dsA->getCompressionLevel());
dsQ->createDataset(m, n, "real");
dsQ->writeDataset(Rcpp::wrap(Q_thin));
} else {
// Full Q: m × m via basis completion
Eigen::HouseholderQR<Eigen::MatrixXd> qr_full(Q_thin);
Eigen::MatrixXd Q_full = qr_full.householderQ()
* Eigen::MatrixXd::Identity(m, m);
const Eigen::VectorXd diag_signs =
qr_full.matrixQR().diagonal().head(n).array().sign().matrix();
for (int i = 0; i < n; ++i)
Q_full.col(i) *= diag_signs(i);
dsQ->inheritCompressionLevel(dsA->getCompressionLevel());
dsQ->createDataset(m, m, "real");
dsQ->writeDataset(Rcpp::wrap(Q_full));
// Full R: m × n (padded with zeros below row n) — created ONCE
Eigen::MatrixXd R_padded = Eigen::MatrixXd::Zero(m, n);
R_padded.topRows(n) = R_final;
dsR->inheritCompressionLevel(dsA->getCompressionLevel());
dsR->createDataset(m, n, "real");
dsR->writeDataset(Rcpp::wrap(R_padded));
}
// // ── Step 4: write R ───────────────────────────────────────────────────
// dsR->inheritCompressionLevel(dsA->getCompressionLevel());
// dsR->createDataset(n, n, "real");
// dsR->writeDataset(Rcpp::wrap(R_final));
//
// // ── Step 5: write Q ───────────────────────────────────────────────────
// if (bthin) {
// // Thin Q: m × n
// dsQ->inheritCompressionLevel(dsA->getCompressionLevel());
// dsQ->createDataset(m, n, "real");
// dsQ->writeDataset(Rcpp::wrap(Q_thin));
// } else {
// // Full Q: m × m
// // Basis completion: compute QR of Q_thin directly.
// // Since Q_thin already has orthonormal columns, its Householder QR
// // has R = I_n, so householderQ() returns a full m×m orthogonal matrix
// // whose first n columns ARE Q_thin. This guarantees:
// // Q_full[:, 1:n] = Q_thin
// // Q_full %*% R_padded = Q_thin %*% R_final = A (exact reconstruction)
// //
// // Previous approach (QR of [Q_thin | random]) was INCORRECT because
// // Householder reorders column pivots and Q_thin is not preserved
// // as the first n columns of the result. (20260304)
// //
// // Cost: O(m² n) — expensive for large m; use thin=TRUE when possible.
//
// //.. 20260304 ..// Eigen::HouseholderQR<Eigen::MatrixXd> qr_full(Q_thin);
// //.. 20260304 ..// const Eigen::MatrixXd Q_full = qr_full.householderQ();
//
// // Gram-Schmidt: Q_full[:,1:n] = Q_thin por construcción → garantiza
// // Q_full * R_padded = Q_thin * R_final = A (20260304)
//
// Eigen::HouseholderQR<Eigen::MatrixXd> qr_full(Q_thin);
// Eigen::MatrixXd Q_full = qr_full.householderQ()
// * Eigen::MatrixXd::Identity(m, m);
// // Correct sign flips: Q_full[:,i] *= sign(R_ii) for i=0..n-1
// // This makes Q_full[:,1:n] == Q_thin exactly (R diagonal is ±1 since Q_thin is orthonormal)
// const Eigen::VectorXd diag_signs =
// qr_full.matrixQR().diagonal().head(n).array().sign().matrix();
// for (int i = 0; i < n; ++i)
// Q_full.col(i) *= diag_signs(i);
//
// dsQ->inheritCompressionLevel(dsA->getCompressionLevel());
// dsQ->createDataset(m, m, "real");
// dsQ->writeDataset(Rcpp::wrap(Q_full));
//
// // Pad R to m × n (zeros below row n) to match full-QR convention
// Eigen::MatrixXd R_padded = Eigen::MatrixXd::Zero(m, n);
// R_padded.topRows(n) = R_final;
// // Re-create R dataset with updated dimensions
// dsR->inheritCompressionLevel(dsA->getCompressionLevel());
// dsR->createDataset(m, n, "real");
// dsR->writeDataset(Rcpp::wrap(R_padded));
// }
} catch( H5::FileIException& error ) {
throw std::runtime_error("c++ exception RcppTSQRHdf5 (File IException)");
} catch( H5::GroupIException& error ) {
throw std::runtime_error("c++ exception RcppTSQRHdf5 (Group IException)");
} catch( H5::DataSetIException& error ) {
throw std::runtime_error("c++ exception RcppTSQRHdf5 (DataSet IException)");
} catch(std::exception& ex) {
throw std::runtime_error(std::string("c++ exception RcppTSQRHdf5: ") + ex.what());
}
return void();
}7 Usage Example
#include "BigDataStatMeth.hpp"
// Example usage
auto result = RcppTSQRHdf5(...);