Cholesky_decomposition_outofcore_hdf5

C++ Function Reference

1 Signature

int BigDataStatMeth::Cholesky_decomposition_outofcore_hdf5(BigDataStatMeth::hdf5Dataset *inDataset, BigDataStatMeth::hdf5Dataset *outDataset, int idim0, int idim1, long dElementsBlock, Rcpp::Nullable< int > threads)

2 Description

Out-of-core Cholesky decomposition for large matrices using tiled algorithm.

3 Parameters

  • inDataset (BigDataStatMeth::hdf5Dataset *): Input matrix dataset
  • outDataset (BigDataStatMeth::hdf5Dataset *): Output dataset for Cholesky factor L
  • idim0 (int): Number of rows
  • idim1 (int): Number of columns
  • dElementsBlock (long): Block size (not used, tiles are fixed)
  • threads (Rcpp::Nullable< int >): Number of threads for parallel processing (optional)

4 Returns

int 0 on success, 1 if not positive definite, 2 on errors

5 Details

inDatasetInput matrix dataset outDatasetOutput dataset for Cholesky factor L idim0Number of rows idim1Number of columns dElementsBlockBlock size (not used, tiles are fixed) threadsNumber of threads for parallel processing (optional) int 0 on success, 1 if not positive definite, 2 on errors Fixed-tile algorithm for constant memory usage:Processes matrix in 10k×10k tilesMemory usage: ~800 MB constant regardless of matrix sizeSuitable for matrices >250k dimensionsThree-step process: diagonal factorization, column solve, submatrix updateFixed-tile algorithm for constant memory usage with complete matrix handling

6 Call Graph

Function dependencies

7 Source Code

File: inst/include/hdf5Algebra/matrixInvCholesky.hppLines 545-692

inline int Cholesky_decomposition_outofcore_hdf5(BigDataStatMeth::hdf5Dataset* inDataset,  
                                                 BigDataStatMeth::hdf5Dataset* outDataset, 
                                                 int idim0, int idim1, long dElementsBlock, 
                                                 Rcpp::Nullable<int> threads = R_NilValue)
{
    
    // BigDataStatMeth::hdf5DatasetInternal* tmpA = nullptr;
    
    try {
        int dimensionSize = idim0;
        int tileSize = 10000; // Fixed tile size for predictable memory usage
        int numTiles = (dimensionSize + tileSize - 1) / tileSize;
        
        std::vector<hsize_t> stride = {1,1}, block = {1,1};
        
        // Create temporary working copy of input matrix
        // Block-wise algorithm requires modification of A during computation
        std::string tempAPath = inDataset->getGroup() + "/.tmp_chol_A_" + inDataset->getDatasetName();
        BigDataStatMeth::hdf5DatasetInternal tempA(inDataset->getFileName(), tempAPath, true);
        
        tempA.createDataset(idim0, idim1, "real");

        // Copy complete symmetric matrix to temporary working dataset
        // Need full matrix for proper block-wise updates
        for (int i = 0; i < numTiles; i++) {
            
            hsize_t iStart = i * tileSize;
            hsize_t iSize = std::min(static_cast<hsize_t>(tileSize), static_cast<hsize_t>(dimensionSize - iStart));
            
            for (int j = 0; j < numTiles; j++) {  // Complete matrix, not just lower triangle
                hsize_t jStart = j * tileSize;
                hsize_t jSize = std::min(static_cast<hsize_t>(tileSize), static_cast<hsize_t>(dimensionSize - jStart));
                
                Eigen::MatrixXd Aij(iSize, jSize);
                std::vector<double> vAij(iSize * jSize);
                
                if (j <= i) {
                    // Read from lower triangle of input matrix
                    inDataset->readDatasetBlock({iStart, jStart}, {iSize, jSize}, stride, block, vAij.data());
                    Aij = Eigen::Map<Eigen::MatrixXd>(vAij.data(), iSize, jSize);
                } else {
                    // Use symmetry: A(i,j) = A(j,i)^T for upper triangle tiles
                    std::vector<double> vAji(jSize * iSize);
                    inDataset->readDatasetBlock({jStart, iStart}, {jSize, iSize}, stride, block, vAji.data());
                    Eigen::MatrixXd Aji = Eigen::Map<Eigen::MatrixXd>(vAji.data(), jSize, iSize);
                    Aij = Aji.transpose();
                }
                
                tempA.writeDatasetBlock(Rcpp::wrap(Aij), {iStart, jStart}, {iSize, jSize}, stride, block, false);
            }
        }
        
        // Block-wise Cholesky algorithm: A = L * L^T where L is lower triangular
        // Process tiles in order: diagonal factorization -> column solve -> submatrix update
        for (int k = 0; k < numTiles; k++) {
            
            hsize_t kStart = k * tileSize;
            hsize_t kSize = std::min(static_cast<hsize_t>(tileSize), static_cast<hsize_t>(dimensionSize - kStart));
            
            // Step 1: Diagonal factorization - L(k,k) = cholesky(A(k,k))
            Eigen::MatrixXd Akk(kSize, kSize);
            std::vector<double> vAkk(kSize * kSize);
            tempA.readDatasetBlock({kStart, kStart}, {kSize, kSize}, stride, block, vAkk.data());
            Akk = Eigen::Map<Eigen::MatrixXd>(vAkk.data(), kSize, kSize);
            
            // In-place Cholesky factorization of diagonal tile
            Eigen::LLT<Eigen::MatrixXd> llt(Akk);
            if (llt.info() != Eigen::Success) {
                tempA.remove();
                Rcpp::Rcout << "\nMatrix not positive definite at tile " << k << "\n";
                return 1;
            }
            Eigen::MatrixXd Lkk = llt.matrixL();
            
            // Write diagonal Cholesky factor to output
            outDataset->writeDatasetBlock(Rcpp::wrap(Lkk), {kStart, kStart}, {kSize, kSize}, stride, block, false);
            
            // Step 2: Column solve - L(i,k) = A(i,k) * inv(L(k,k)^T)
            // Columns are independent, can be parallelized
            #pragma omp parallel for num_threads(get_number_threads(threads, R_NilValue)) schedule(dynamic)
            for (int i = k + 1; i < numTiles; i++) {
                
                hsize_t iStart = i * tileSize;
                hsize_t iSize = std::min(static_cast<hsize_t>(tileSize), static_cast<hsize_t>(dimensionSize - iStart));
                
                Eigen::MatrixXd Aik(iSize, kSize);
                std::vector<double> vAik(iSize * kSize);
                tempA.readDatasetBlock({iStart, kStart}, {iSize, kSize}, stride, block, vAik.data());
                Aik = Eigen::Map<Eigen::MatrixXd>(vAik.data(), iSize, kSize);
                
                // Solve triangular system: Lkk^T * Lik^T = Aik^T
                Eigen::MatrixXd Lik = Lkk.triangularView<Eigen::Lower>().solve(Aik.transpose()).transpose();
                
                // Write column factor to output
                outDataset->writeDatasetBlock(Rcpp::wrap(Lik), {iStart, kStart}, {iSize, kSize}, stride, block, false);
            }
            
            // Step 3: Submatrix update - A(i,j) -= L(i,k) * L(j,k)^T
            // Outer loop over tiles can be parallelized (tiles are independent)
            #pragma omp parallel for num_threads(get_number_threads(threads, R_NilValue)) schedule(dynamic)
            for (int i = k + 1; i < numTiles; i++) {
                
                hsize_t iStart = i * tileSize;
                hsize_t iSize = std::min(static_cast<hsize_t>(tileSize), static_cast<hsize_t>(dimensionSize - iStart));
                
                // Load computed L(i,k) factor
                Eigen::MatrixXd Lik(iSize, kSize);
                std::vector<double> vLik(iSize * kSize);
                outDataset->readDatasetBlock({iStart, kStart}, {iSize, kSize}, stride, block, vLik.data());
                Lik = Eigen::Map<Eigen::MatrixXd>(vLik.data(), iSize, kSize);
                
                // Update tiles in current row (inner loop cannot be parallelized due to dependencies)
                for (int j = k + 1; j <= i; j++) {
                    
                    hsize_t jStart = j * tileSize;
                    hsize_t jSize = std::min(static_cast<hsize_t>(tileSize), static_cast<hsize_t>(dimensionSize - jStart));
                    
                    // Load computed L(j,k) factor
                    Eigen::MatrixXd Ljk(jSize, kSize);
                    std::vector<double> vLjk(jSize * kSize);
                    outDataset->readDatasetBlock({jStart, kStart}, {jSize, kSize}, stride, block, vLjk.data());
                    Ljk = Eigen::Map<Eigen::MatrixXd>(vLjk.data(), jSize, kSize);
                    
                    // Load current A(i,j) tile for update
                    Eigen::MatrixXd Aij(iSize, jSize);
                    std::vector<double> vAij(iSize * jSize);
                    tempA.readDatasetBlock({iStart, jStart}, {iSize, jSize}, stride, block, vAij.data());
                    Aij = Eigen::Map<Eigen::MatrixXd>(vAij.data(), iSize, jSize);
                    
                    // Apply rank-k update: A(i,j) -= L(i,k) * L(j,k)^T
                    Aij -= Lik * Ljk.transpose();
                    
                    // Write updated tile back to working matrix
                    tempA.writeDatasetBlock(Rcpp::wrap(Aij), {iStart, jStart}, {iSize, jSize}, stride, block, false);
                }
            }
        }
        
        // Clean up temporary working dataset
        tempA.remove();
        
    } catch(std::exception& ex) {
        Rcpp::Rcerr << "c++ exception Cholesky_decomposition_outofcore_hdf5: " << ex.what();
        return 2;
    }
    
    return 0;
}

8 Usage Example

#include "BigDataStatMeth.hpp"

// Example usage
auto result = Cholesky_decomposition_outofcore_hdf5(...);