Cholesky_decomposition_outofcore_hdf5
C++ Function Reference
1 Signature
int BigDataStatMeth::Cholesky_decomposition_outofcore_hdf5(BigDataStatMeth::hdf5Dataset *inDataset, BigDataStatMeth::hdf5Dataset *outDataset, int idim0, int idim1, long dElementsBlock, Rcpp::Nullable< int > threads)2 Description
Out-of-core Cholesky decomposition for large matrices using tiled algorithm.
3 Parameters
inDataset(BigDataStatMeth::hdf5Dataset *): Input matrix datasetoutDataset(BigDataStatMeth::hdf5Dataset *): Output dataset for Cholesky factor Lidim0(int): Number of rowsidim1(int): Number of columnsdElementsBlock(long): Block size (not used, tiles are fixed)threads(Rcpp::Nullable< int >): Number of threads for parallel processing (optional)
4 Returns
int 0 on success, 1 if not positive definite, 2 on errors
5 Details
inDatasetInput matrix dataset outDatasetOutput dataset for Cholesky factor L idim0Number of rows idim1Number of columns dElementsBlockBlock size (not used, tiles are fixed) threadsNumber of threads for parallel processing (optional) int 0 on success, 1 if not positive definite, 2 on errors Fixed-tile algorithm for constant memory usage:Processes matrix in 10k×10k tilesMemory usage: ~800 MB constant regardless of matrix sizeSuitable for matrices >250k dimensionsThree-step process: diagonal factorization, column solve, submatrix updateFixed-tile algorithm for constant memory usage with complete matrix handling
6 Call Graph
7 Source Code
File: inst/include/hdf5Algebra/matrixInvCholesky.hpp • Lines 545-692
inline int Cholesky_decomposition_outofcore_hdf5(BigDataStatMeth::hdf5Dataset* inDataset,
BigDataStatMeth::hdf5Dataset* outDataset,
int idim0, int idim1, long dElementsBlock,
Rcpp::Nullable<int> threads = R_NilValue)
{
// BigDataStatMeth::hdf5DatasetInternal* tmpA = nullptr;
try {
int dimensionSize = idim0;
int tileSize = 10000; // Fixed tile size for predictable memory usage
int numTiles = (dimensionSize + tileSize - 1) / tileSize;
std::vector<hsize_t> stride = {1,1}, block = {1,1};
// Create temporary working copy of input matrix
// Block-wise algorithm requires modification of A during computation
std::string tempAPath = inDataset->getGroup() + "/.tmp_chol_A_" + inDataset->getDatasetName();
BigDataStatMeth::hdf5DatasetInternal tempA(inDataset->getFileName(), tempAPath, true);
tempA.createDataset(idim0, idim1, "real");
// Copy complete symmetric matrix to temporary working dataset
// Need full matrix for proper block-wise updates
for (int i = 0; i < numTiles; i++) {
hsize_t iStart = i * tileSize;
hsize_t iSize = std::min(static_cast<hsize_t>(tileSize), static_cast<hsize_t>(dimensionSize - iStart));
for (int j = 0; j < numTiles; j++) { // Complete matrix, not just lower triangle
hsize_t jStart = j * tileSize;
hsize_t jSize = std::min(static_cast<hsize_t>(tileSize), static_cast<hsize_t>(dimensionSize - jStart));
Eigen::MatrixXd Aij(iSize, jSize);
std::vector<double> vAij(iSize * jSize);
if (j <= i) {
// Read from lower triangle of input matrix
inDataset->readDatasetBlock({iStart, jStart}, {iSize, jSize}, stride, block, vAij.data());
Aij = Eigen::Map<Eigen::MatrixXd>(vAij.data(), iSize, jSize);
} else {
// Use symmetry: A(i,j) = A(j,i)^T for upper triangle tiles
std::vector<double> vAji(jSize * iSize);
inDataset->readDatasetBlock({jStart, iStart}, {jSize, iSize}, stride, block, vAji.data());
Eigen::MatrixXd Aji = Eigen::Map<Eigen::MatrixXd>(vAji.data(), jSize, iSize);
Aij = Aji.transpose();
}
tempA.writeDatasetBlock(Rcpp::wrap(Aij), {iStart, jStart}, {iSize, jSize}, stride, block, false);
}
}
// Block-wise Cholesky algorithm: A = L * L^T where L is lower triangular
// Process tiles in order: diagonal factorization -> column solve -> submatrix update
for (int k = 0; k < numTiles; k++) {
hsize_t kStart = k * tileSize;
hsize_t kSize = std::min(static_cast<hsize_t>(tileSize), static_cast<hsize_t>(dimensionSize - kStart));
// Step 1: Diagonal factorization - L(k,k) = cholesky(A(k,k))
Eigen::MatrixXd Akk(kSize, kSize);
std::vector<double> vAkk(kSize * kSize);
tempA.readDatasetBlock({kStart, kStart}, {kSize, kSize}, stride, block, vAkk.data());
Akk = Eigen::Map<Eigen::MatrixXd>(vAkk.data(), kSize, kSize);
// In-place Cholesky factorization of diagonal tile
Eigen::LLT<Eigen::MatrixXd> llt(Akk);
if (llt.info() != Eigen::Success) {
tempA.remove();
Rcpp::Rcout << "\nMatrix not positive definite at tile " << k << "\n";
return 1;
}
Eigen::MatrixXd Lkk = llt.matrixL();
// Write diagonal Cholesky factor to output
outDataset->writeDatasetBlock(Rcpp::wrap(Lkk), {kStart, kStart}, {kSize, kSize}, stride, block, false);
// Step 2: Column solve - L(i,k) = A(i,k) * inv(L(k,k)^T)
// Columns are independent, can be parallelized
#pragma omp parallel for num_threads(get_number_threads(threads, R_NilValue)) schedule(dynamic)
for (int i = k + 1; i < numTiles; i++) {
hsize_t iStart = i * tileSize;
hsize_t iSize = std::min(static_cast<hsize_t>(tileSize), static_cast<hsize_t>(dimensionSize - iStart));
Eigen::MatrixXd Aik(iSize, kSize);
std::vector<double> vAik(iSize * kSize);
tempA.readDatasetBlock({iStart, kStart}, {iSize, kSize}, stride, block, vAik.data());
Aik = Eigen::Map<Eigen::MatrixXd>(vAik.data(), iSize, kSize);
// Solve triangular system: Lkk^T * Lik^T = Aik^T
Eigen::MatrixXd Lik = Lkk.triangularView<Eigen::Lower>().solve(Aik.transpose()).transpose();
// Write column factor to output
outDataset->writeDatasetBlock(Rcpp::wrap(Lik), {iStart, kStart}, {iSize, kSize}, stride, block, false);
}
// Step 3: Submatrix update - A(i,j) -= L(i,k) * L(j,k)^T
// Outer loop over tiles can be parallelized (tiles are independent)
#pragma omp parallel for num_threads(get_number_threads(threads, R_NilValue)) schedule(dynamic)
for (int i = k + 1; i < numTiles; i++) {
hsize_t iStart = i * tileSize;
hsize_t iSize = std::min(static_cast<hsize_t>(tileSize), static_cast<hsize_t>(dimensionSize - iStart));
// Load computed L(i,k) factor
Eigen::MatrixXd Lik(iSize, kSize);
std::vector<double> vLik(iSize * kSize);
outDataset->readDatasetBlock({iStart, kStart}, {iSize, kSize}, stride, block, vLik.data());
Lik = Eigen::Map<Eigen::MatrixXd>(vLik.data(), iSize, kSize);
// Update tiles in current row (inner loop cannot be parallelized due to dependencies)
for (int j = k + 1; j <= i; j++) {
hsize_t jStart = j * tileSize;
hsize_t jSize = std::min(static_cast<hsize_t>(tileSize), static_cast<hsize_t>(dimensionSize - jStart));
// Load computed L(j,k) factor
Eigen::MatrixXd Ljk(jSize, kSize);
std::vector<double> vLjk(jSize * kSize);
outDataset->readDatasetBlock({jStart, kStart}, {jSize, kSize}, stride, block, vLjk.data());
Ljk = Eigen::Map<Eigen::MatrixXd>(vLjk.data(), jSize, kSize);
// Load current A(i,j) tile for update
Eigen::MatrixXd Aij(iSize, jSize);
std::vector<double> vAij(iSize * jSize);
tempA.readDatasetBlock({iStart, jStart}, {iSize, jSize}, stride, block, vAij.data());
Aij = Eigen::Map<Eigen::MatrixXd>(vAij.data(), iSize, jSize);
// Apply rank-k update: A(i,j) -= L(i,k) * L(j,k)^T
Aij -= Lik * Ljk.transpose();
// Write updated tile back to working matrix
tempA.writeDatasetBlock(Rcpp::wrap(Aij), {iStart, jStart}, {iSize, jSize}, stride, block, false);
}
}
}
// Clean up temporary working dataset
tempA.remove();
} catch(std::exception& ex) {
Rcpp::Rcerr << "c++ exception Cholesky_decomposition_outofcore_hdf5: " << ex.what();
return 2;
}
return 0;
}8 Usage Example
#include "BigDataStatMeth.hpp"
// Example usage
auto result = Cholesky_decomposition_outofcore_hdf5(...);