Cholesky_decomposition_intermediate_hdf5

C++ Function Reference

1 Signature

int BigDataStatMeth::Cholesky_decomposition_intermediate_hdf5(BigDataStatMeth::hdf5Dataset *inDataset, BigDataStatMeth::hdf5Dataset *outDataset, int idim0, int idim1, long dElementsBlock, Rcpp::Nullable< int > threads)

2 Description

Performs Cholesky decomposition on a matrix stored in HDF5 format.

3 Parameters

inDataset (BigDataStatMeth::hdf5Dataset *): Input matrix dataset
outDataset (BigDataStatMeth::hdf5Dataset *): Output dataset for Cholesky factor L
idim0 (int): Number of rows
idim1 (int): Number of columns
dElementsBlock (long): Block size for processing
threads (Rcpp::Nullable< int >): Number of threads for parallel processing (optional)

4 Returns

int 0 on success, 1 if matrix is not positive definite, 2 on HDF5 errors

5 Details

inDatasetInput matrix dataset outDatasetOutput dataset for Cholesky factor L idim0Number of rows idim1Number of columns dElementsBlockBlock size for processing threadsNumber of threads for parallel processing (optional) int 0 on success, 1 if matrix is not positive definite, 2 on HDF5 errors Implements block-wise Cholesky decomposition algorithm:Processes matrix in blocks to manage memory usageComputes lower triangular factor L where A = LL^TUses parallel processing for row computations

6 Call Graph

7 Source Code

Implementation

File: inst/include/hdf5Algebra/matrixInvCholesky.hpp • Lines 335-538

inline int Cholesky_decomposition_intermediate_hdf5( BigDataStatMeth::hdf5Dataset* inDataset,  
                           BigDataStatMeth::hdf5Dataset* outDataset, 
                           int idim0,  int idim1,  long dElementsBlock, 
                           Rcpp::Nullable<int> threads  = R_NilValue)
{
    
    try {
        
        int dimensionSize = idim0,
            readedRows = 0,
            rowstoRead,
            minimumBlockSize;
            // chunk = 1,
            
        bool bcancel = false;
        double sum = 0;
        
        std::vector<hsize_t> offset = {0,0},
                             count = {1,1},
                             stride = {1,1},
                             block = {1,1};
        
        // Set minimum elements in block (mandatory : minimum = 2 * longest line)
        if( dElementsBlock < dimensionSize * 2 ) {
            minimumBlockSize = dimensionSize * 2;
        } else {
            minimumBlockSize = dElementsBlock;
        }
        
        // Rcpp::Rcout<<"\nDebug 2: minimumBlockSize=" << minimumBlockSize;
        // Poso el codi per llegir els blocks aquí i desprès a dins hauria d'anar-hi la j
        if( idim0 == idim1)
        {
            
            // Rcpp::Rcout<<"\nDins Cholesky_decomposition -> idim0 == idim1";
            while ( readedRows < dimensionSize ) {
                
                rowstoRead = ( -2 * readedRows - 1 + std::sqrt( pow(2*readedRows, 2) - 4 * readedRows + 8 * minimumBlockSize + 1) ) / 2;
                
                if (rowstoRead <= 0) {
                    rowstoRead = minimumBlockSize; // Minimum progress to avoid infinite loop
                }

                if( readedRows + rowstoRead > idim0) { // Max size bigger than data to read ?
                    rowstoRead = idim0 - readedRows;
                }
                
                if (readedRows == 0) {
                    // Primer bloque: count[0] = dimensionSize, count[1] = rowstoRead
                    size_t potential_elements = static_cast<size_t>(dimensionSize) * static_cast<size_t>(rowstoRead);
                    size_t optimalSize = getOptimalBlockElements();
                    if (potential_elements > optimalSize) {
                        rowstoRead = static_cast<int>(MAXCHOLBLOCKSIZE / dimensionSize);
                        if (rowstoRead < 1) rowstoRead = 1;
                    }
                } else {
                    // Bloques siguientes: count[0] = dimensionSize - readedRows + 1, count[1] = readedRows + rowstoRead
                    size_t potential_elements = static_cast<size_t>(dimensionSize - readedRows + 1) * static_cast<size_t>(readedRows + rowstoRead);
                    size_t optimalSize = getOptimalBlockElements();
                    if (potential_elements > optimalSize) {
                        int max_cols = static_cast<int>(MAXCHOLBLOCKSIZE / (dimensionSize - readedRows + 1));
                        rowstoRead = max_cols - readedRows;
                        if (rowstoRead < 1) rowstoRead = 1;
                    }
                }
                
                offset[0] = readedRows;
                
                if( readedRows == 0) {
                    count[0] = dimensionSize - readedRows;
                    count[1] = rowstoRead;
                } else {
                    offset[0] = offset[0] - 1; // We need results from previous line to compute next line results
                    count[1] = readedRows + rowstoRead;
                    count[0] = dimensionSize - readedRows + 1;
                }
                
                readedRows = readedRows + rowstoRead; // Ho preparem perquè desprès necessitarem llegir a partir de la línea anterior
                
                Eigen::MatrixXd A, L;
                
                // Rcpp::Rcout<<"\nDebug 7: A L";
                
                // CAMBIO QUIRÚRGICO: Verificar tamaño de bloque para big-matrix
                size_t block_elements = static_cast<size_t>(count[0]) * static_cast<size_t>(count[1]);
                // size_t max_elements = MAXCHOLBLOCKSIZE; // ~400 MB por vector, 800 MB total
                
                if (block_elements > MAXCHOLBLOCKSIZE) {
                    // Recalcular rowstoRead para bloque más pequeño
                    rowstoRead = static_cast<int>(MAXCHOLBLOCKSIZE / count[0]);
                    if (rowstoRead < 1) rowstoRead = 1;
                    
                    // Recalcular count[1]
                    if( readedRows == 0) {
                        count[1] = rowstoRead;
                    } else {
                        count[1] = readedRows + rowstoRead;
                    }
                    
                    // Actualizar readedRows
                    readedRows = (readedRows > rowstoRead) ? (readedRows - rowstoRead + rowstoRead) : readedRows + rowstoRead;
                }
                
                std::vector<double> vdA( count[0] * count[1] ); 
                inDataset->readDatasetBlock( {offset[0], offset[1]}, {count[0], count[1]}, stride, block, vdA.data() );
                A = Eigen::Map<Eigen::Matrix<double, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor>> (vdA.data(), count[0], count[1] );
                
                std::vector<double> vdL( count[0] * count[1] ); 
                outDataset->readDatasetBlock( {offset[0], offset[1]}, {count[0], count[1]}, stride, block, vdL.data() );
                L = Eigen::Map<Eigen::Matrix<double, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor>> (vdL.data(), count[0], count[1] );
                
                if( offset[0] != 0 )
                    rowstoRead = rowstoRead + 1;
                
                if( rowstoRead > static_cast<int>(count[1]) ) {
                    rowstoRead = count[1];
                }
                
                //. 2025/11/21 .// for (int j = 0; j < rowstoRead; j++)  {
                for (int j = 0; j < rowstoRead; j++)  {
                    if( j + offset[0] == 0) {
                        L(j, j) = std::sqrt(A(j,j));
                    } else {
                        L(j, j + offset[0]) = std::sqrt(A(j,j + offset[0]) - (L.row(j).head(j + offset[0]).array().pow(2).sum() ));    
                    }
                    
                    
                    #pragma omp parallel for num_threads( get_number_threads(threads, R_NilValue) ) private(sum) shared (A,L,j) schedule(dynamic) if (j < readedRows - 1)
                    //. 2025/11/21 .// for ( int i = j + 1; i < dimensionSize - offset[0]  ; i++ )
                    for ( int i = j + 1; i < dimensionSize - static_cast<int>(offset[0])  ; i++ )
                    {
                        if(bcancel == false) {
                            if( j + static_cast<int>(offset[0]) > 0) {
                                sum = (L.block(i, 0, 1, j + static_cast<int>(offset[0])).array() * L.block(j, 0, 1, j + static_cast<int>(offset[0])).array()).array().sum();
                                if( sum != sum ) {
                                    Rcpp::Rcout<<"\n Can't get inverse matrix using Cholesky decomposition matrix is not positive definite\n";
                                    bcancel = true;
                                }
                            } else {
                                sum = 0;
                            }
                            if(!bcancel){
                                L(i,j + static_cast<int>(offset[0])) =  (1/L(j,j + static_cast<int>(offset[0]))*(A(i,j + static_cast<int>(offset[0])) - sum));    
                            }    
                        }
                    }
                }
                if(bcancel == true) {
                    return(1);
                }
                
                if( offset[0] != 0) {
                    offset[0] = offset[0] + 1;
                    count[0] = count[0] - 1;
                    
                    outDataset->writeDatasetBlock( Rcpp::wrap(L.block(1, 0, L.rows()-1, L.cols())), offset, count, stride, block, false);
                    
                } else {
                    outDataset->writeDatasetBlock( Rcpp::wrap(L), offset, count, stride, block, false);
                }
                
                // Rcpp::Rcout<<"\nDebug 13";
                
                offset[0] = offset[0] + count[0] - 1;
                
                // Rcpp::Rcout<<"\nDins Cholesky_decomposition -> Despres escriptura ";
            }
            
        } else {
            throw std::range_error("non-conformable arguments");
        }
        
        // Rcpp::Rcout<<"\nDins Cholesky_decomposition -> Bye Bye...";
        
        
    } catch( H5::FileIException& error ) { 
        checkClose_file(inDataset, outDataset);
        inDataset = outDataset = nullptr;
        Rcpp::Rcerr<<"c++ exception Cholesky_decomposition_intermediate_hdf5 (File IException)";
        return(2);
    } catch( H5::GroupIException & error ) { 
        checkClose_file(inDataset, outDataset);
        inDataset = outDataset = nullptr;
        Rcpp::Rcerr << "c++ exception Cholesky_decomposition_intermediate_hdf5 (Group IException)";
        return(2);
    } catch( H5::DataSetIException& error ) { 
        checkClose_file(inDataset, outDataset);
        inDataset = outDataset = nullptr;
        Rcpp::Rcerr << "c++ exception Cholesky_decomposition_intermediate_hdf5 (DataSet IException)";
        return(2);
    } catch(std::exception& ex) {
        checkClose_file(inDataset, outDataset);
        inDataset = outDataset = nullptr;
        Rcpp::Rcerr << "c++ exception Cholesky_decomposition_intermediate_hdf5" << ex.what();
        return(2);
    } catch (...) {
        checkClose_file(inDataset, outDataset);
        inDataset = outDataset = nullptr;
        Rcpp::Rcerr<<"\nC++ exception Cholesky_decomposition_intermediate_hdf5 (unknown reason)";
        return(2);
    }
    return(0);
    
}

8 Usage Example

#include "BigDataStatMeth.hpp"

// Example usage
auto result = Cholesky_decomposition_intermediate_hdf5(...);

--- title: "Cholesky_decomposition_intermediate_hdf5" subtitle: "C++ Function Reference" --- ## Signature ```cpp int BigDataStatMeth::Cholesky_decomposition_intermediate_hdf5(BigDataStatMeth::hdf5Dataset *inDataset, BigDataStatMeth::hdf5Dataset *outDataset, int idim0, int idim1, long dElementsBlock, Rcpp::Nullable< int > threads) ``` ## Description Performs Cholesky decomposition on a matrix stored in HDF5 format. ## Parameters - `inDataset` (`BigDataStatMeth::hdf5Dataset *`): Input matrix dataset - `outDataset` (`BigDataStatMeth::hdf5Dataset *`): Output dataset for Cholesky factor L - `idim0` (`int`): Number of rows - `idim1` (`int`): Number of columns - `dElementsBlock` (`long`): Block size for processing - `threads` (`Rcpp::Nullable< int >`): Number of threads for parallel processing (optional) ## Returns int 0 on success, 1 if matrix is not positive definite, 2 on HDF5 errors ## Details inDatasetInput matrix dataset outDatasetOutput dataset for Cholesky factor L idim0Number of rows idim1Number of columns dElementsBlockBlock size for processing threadsNumber of threads for parallel processing (optional) int 0 on success, 1 if matrix is not positive definite, 2 on HDF5 errors Implements block-wise Cholesky decomposition algorithm:Processes matrix in blocks to manage memory usageComputes lower triangular factor L where A = LL^TUses parallel processing for row computations ## Call Graph ::: {.diagram-section} <object type="image/svg+xml" data="images/namespace_big_data_stat_meth_a6474d6e774e790655800beb3a03a4f42_cgraph.svg" class="class-diagram"> <img src="images/namespace_big_data_stat_meth_a6474d6e774e790655800beb3a03a4f42_cgraph.svg" alt="Function dependencies"/> </object> ::: ## Source Code ::: {.callout-note collapse="true"} ### Implementation **File:** `inst/include/hdf5Algebra/matrixInvCholesky.hpp` • **Lines 335-538** ```cpp inline int Cholesky_decomposition_intermediate_hdf5( BigDataStatMeth::hdf5Dataset* inDataset, BigDataStatMeth::hdf5Dataset* outDataset, int idim0, int idim1, long dElementsBlock, Rcpp::Nullable<int> threads = R_NilValue) { try { int dimensionSize = idim0, readedRows = 0, rowstoRead, minimumBlockSize; // chunk = 1, bool bcancel = false; double sum = 0; std::vector<hsize_t> offset = {0,0}, count = {1,1}, stride = {1,1}, block = {1,1}; // Set minimum elements in block (mandatory : minimum = 2 * longest line) if( dElementsBlock < dimensionSize * 2 ) { minimumBlockSize = dimensionSize * 2; } else { minimumBlockSize = dElementsBlock; } // Rcpp::Rcout<<"\nDebug 2: minimumBlockSize=" << minimumBlockSize; // Poso el codi per llegir els blocks aquí i desprès a dins hauria d'anar-hi la j if( idim0 == idim1) { // Rcpp::Rcout<<"\nDins Cholesky_decomposition -> idim0 == idim1"; while ( readedRows < dimensionSize ) { rowstoRead = ( -2 * readedRows - 1 + std::sqrt( pow(2*readedRows, 2) - 4 * readedRows + 8 * minimumBlockSize + 1) ) / 2; if (rowstoRead <= 0) { rowstoRead = minimumBlockSize; // Minimum progress to avoid infinite loop } if( readedRows + rowstoRead > idim0) { // Max size bigger than data to read ? rowstoRead = idim0 - readedRows; } if (readedRows == 0) { // Primer bloque: count[0] = dimensionSize, count[1] = rowstoRead size_t potential_elements = static_cast<size_t>(dimensionSize) * static_cast<size_t>(rowstoRead); size_t optimalSize = getOptimalBlockElements(); if (potential_elements > optimalSize) { rowstoRead = static_cast<int>(MAXCHOLBLOCKSIZE / dimensionSize); if (rowstoRead < 1) rowstoRead = 1; } } else { // Bloques siguientes: count[0] = dimensionSize - readedRows + 1, count[1] = readedRows + rowstoRead size_t potential_elements = static_cast<size_t>(dimensionSize - readedRows + 1) * static_cast<size_t>(readedRows + rowstoRead); size_t optimalSize = getOptimalBlockElements(); if (potential_elements > optimalSize) { int max_cols = static_cast<int>(MAXCHOLBLOCKSIZE / (dimensionSize - readedRows + 1)); rowstoRead = max_cols - readedRows; if (rowstoRead < 1) rowstoRead = 1; } } offset[0] = readedRows; if( readedRows == 0) { count[0] = dimensionSize - readedRows; count[1] = rowstoRead; } else { offset[0] = offset[0] - 1; // We need results from previous line to compute next line results count[1] = readedRows + rowstoRead; count[0] = dimensionSize - readedRows + 1; } readedRows = readedRows + rowstoRead; // Ho preparem perquè desprès necessitarem llegir a partir de la línea anterior Eigen::MatrixXd A, L; // Rcpp::Rcout<<"\nDebug 7: A L"; // CAMBIO QUIRÚRGICO: Verificar tamaño de bloque para big-matrix size_t block_elements = static_cast<size_t>(count[0]) * static_cast<size_t>(count[1]); // size_t max_elements = MAXCHOLBLOCKSIZE; // ~400 MB por vector, 800 MB total if (block_elements > MAXCHOLBLOCKSIZE) { // Recalcular rowstoRead para bloque más pequeño rowstoRead = static_cast<int>(MAXCHOLBLOCKSIZE / count[0]); if (rowstoRead < 1) rowstoRead = 1; // Recalcular count[1] if( readedRows == 0) { count[1] = rowstoRead; } else { count[1] = readedRows + rowstoRead; } // Actualizar readedRows readedRows = (readedRows > rowstoRead) ? (readedRows - rowstoRead + rowstoRead) : readedRows + rowstoRead; } std::vector<double> vdA( count[0] * count[1] ); inDataset->readDatasetBlock( {offset[0], offset[1]}, {count[0], count[1]}, stride, block, vdA.data() ); A = Eigen::Map<Eigen::Matrix<double, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor>> (vdA.data(), count[0], count[1] ); std::vector<double> vdL( count[0] * count[1] ); outDataset->readDatasetBlock( {offset[0], offset[1]}, {count[0], count[1]}, stride, block, vdL.data() ); L = Eigen::Map<Eigen::Matrix<double, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor>> (vdL.data(), count[0], count[1] ); if( offset[0] != 0 ) rowstoRead = rowstoRead + 1; if( rowstoRead > static_cast<int>(count[1]) ) { rowstoRead = count[1]; } //. 2025/11/21 .// for (int j = 0; j < rowstoRead; j++) { for (int j = 0; j < rowstoRead; j++) { if( j + offset[0] == 0) { L(j, j) = std::sqrt(A(j,j)); } else { L(j, j + offset[0]) = std::sqrt(A(j,j + offset[0]) - (L.row(j).head(j + offset[0]).array().pow(2).sum() )); } #pragma omp parallel for num_threads( get_number_threads(threads, R_NilValue) ) private(sum) shared (A,L,j) schedule(dynamic) if (j < readedRows - 1) //. 2025/11/21 .// for ( int i = j + 1; i < dimensionSize - offset[0] ; i++ ) for ( int i = j + 1; i < dimensionSize - static_cast<int>(offset[0]) ; i++ ) { if(bcancel == false) { if( j + static_cast<int>(offset[0]) > 0) { sum = (L.block(i, 0, 1, j + static_cast<int>(offset[0])).array() * L.block(j, 0, 1, j + static_cast<int>(offset[0])).array()).array().sum(); if( sum != sum ) { Rcpp::Rcout<<"\n Can't get inverse matrix using Cholesky decomposition matrix is not positive definite\n"; bcancel = true; } } else { sum = 0; } if(!bcancel){ L(i,j + static_cast<int>(offset[0])) = (1/L(j,j + static_cast<int>(offset[0]))*(A(i,j + static_cast<int>(offset[0])) - sum)); } } } } if(bcancel == true) { return(1); } if( offset[0] != 0) { offset[0] = offset[0] + 1; count[0] = count[0] - 1; outDataset->writeDatasetBlock( Rcpp::wrap(L.block(1, 0, L.rows()-1, L.cols())), offset, count, stride, block, false); } else { outDataset->writeDatasetBlock( Rcpp::wrap(L), offset, count, stride, block, false); } // Rcpp::Rcout<<"\nDebug 13"; offset[0] = offset[0] + count[0] - 1; // Rcpp::Rcout<<"\nDins Cholesky_decomposition -> Despres escriptura "; } } else { throw std::range_error("non-conformable arguments"); } // Rcpp::Rcout<<"\nDins Cholesky_decomposition -> Bye Bye..."; } catch( H5::FileIException& error ) { checkClose_file(inDataset, outDataset); inDataset = outDataset = nullptr; Rcpp::Rcerr<<"c++ exception Cholesky_decomposition_intermediate_hdf5 (File IException)"; return(2); } catch( H5::GroupIException & error ) { checkClose_file(inDataset, outDataset); inDataset = outDataset = nullptr; Rcpp::Rcerr << "c++ exception Cholesky_decomposition_intermediate_hdf5 (Group IException)"; return(2); } catch( H5::DataSetIException& error ) { checkClose_file(inDataset, outDataset); inDataset = outDataset = nullptr; Rcpp::Rcerr << "c++ exception Cholesky_decomposition_intermediate_hdf5 (DataSet IException)"; return(2); } catch(std::exception& ex) { checkClose_file(inDataset, outDataset); inDataset = outDataset = nullptr; Rcpp::Rcerr << "c++ exception Cholesky_decomposition_intermediate_hdf5" << ex.what(); return(2); } catch (...) { checkClose_file(inDataset, outDataset); inDataset = outDataset = nullptr; Rcpp::Rcerr<<"\nC++ exception Cholesky_decomposition_intermediate_hdf5 (unknown reason)"; return(2); } return(0); } ``` ::: ## Usage Example ```cpp #include "BigDataStatMeth.hpp" // Example usage auto result = Cholesky_decomposition_intermediate_hdf5(...); ```