Cholesky_decomposition_intermediate_hdf5

C++ Function Reference

1 Signature

int BigDataStatMeth::Cholesky_decomposition_intermediate_hdf5(BigDataStatMeth::hdf5Dataset *inDataset, BigDataStatMeth::hdf5Dataset *outDataset, int idim0, int idim1, long dElementsBlock, Rcpp::Nullable< int > threads)

2 Description

Performs Cholesky decomposition on a matrix stored in HDF5 format.

3 Parameters

  • inDataset (BigDataStatMeth::hdf5Dataset *): Input matrix dataset
  • outDataset (BigDataStatMeth::hdf5Dataset *): Output dataset for Cholesky factor L
  • idim0 (int): Number of rows
  • idim1 (int): Number of columns
  • dElementsBlock (long): Block size for processing
  • threads (Rcpp::Nullable< int >): Number of threads for parallel processing (optional)

4 Returns

int 0 on success, 1 if matrix is not positive definite, 2 on HDF5 errors

5 Details

inDatasetInput matrix dataset outDatasetOutput dataset for Cholesky factor L idim0Number of rows idim1Number of columns dElementsBlockBlock size for processing threadsNumber of threads for parallel processing (optional) int 0 on success, 1 if matrix is not positive definite, 2 on HDF5 errors Implements block-wise Cholesky decomposition algorithm:Processes matrix in blocks to manage memory usageComputes lower triangular factor L where A = LL^TUses parallel processing for row computations

6 Call Graph

Function dependencies

7 Source Code

File: inst/include/hdf5Algebra/matrixInvCholesky.hppLines 335-538

inline int Cholesky_decomposition_intermediate_hdf5( BigDataStatMeth::hdf5Dataset* inDataset,  
                           BigDataStatMeth::hdf5Dataset* outDataset, 
                           int idim0,  int idim1,  long dElementsBlock, 
                           Rcpp::Nullable<int> threads  = R_NilValue)
{
    
    try {
        
        int dimensionSize = idim0,
            readedRows = 0,
            rowstoRead,
            minimumBlockSize;
            // chunk = 1,
            
        bool bcancel = false;
        double sum = 0;
        
        std::vector<hsize_t> offset = {0,0},
                             count = {1,1},
                             stride = {1,1},
                             block = {1,1};
        
        // Set minimum elements in block (mandatory : minimum = 2 * longest line)
        if( dElementsBlock < dimensionSize * 2 ) {
            minimumBlockSize = dimensionSize * 2;
        } else {
            minimumBlockSize = dElementsBlock;
        }
        
        // Rcpp::Rcout<<"\nDebug 2: minimumBlockSize=" << minimumBlockSize;
        // Poso el codi per llegir els blocks aquí i desprès a dins hauria d'anar-hi la j
        if( idim0 == idim1)
        {
            
            // Rcpp::Rcout<<"\nDins Cholesky_decomposition -> idim0 == idim1";
            while ( readedRows < dimensionSize ) {
                
                rowstoRead = ( -2 * readedRows - 1 + std::sqrt( pow(2*readedRows, 2) - 4 * readedRows + 8 * minimumBlockSize + 1) ) / 2;
                
                if (rowstoRead <= 0) {
                    rowstoRead = minimumBlockSize; // Minimum progress to avoid infinite loop
                }

                if( readedRows + rowstoRead > idim0) { // Max size bigger than data to read ?
                    rowstoRead = idim0 - readedRows;
                }
                
                if (readedRows == 0) {
                    // Primer bloque: count[0] = dimensionSize, count[1] = rowstoRead
                    size_t potential_elements = static_cast<size_t>(dimensionSize) * static_cast<size_t>(rowstoRead);
                    size_t optimalSize = getOptimalBlockElements();
                    if (potential_elements > optimalSize) {
                        rowstoRead = static_cast<int>(MAXCHOLBLOCKSIZE / dimensionSize);
                        if (rowstoRead < 1) rowstoRead = 1;
                    }
                } else {
                    // Bloques siguientes: count[0] = dimensionSize - readedRows + 1, count[1] = readedRows + rowstoRead
                    size_t potential_elements = static_cast<size_t>(dimensionSize - readedRows + 1) * static_cast<size_t>(readedRows + rowstoRead);
                    size_t optimalSize = getOptimalBlockElements();
                    if (potential_elements > optimalSize) {
                        int max_cols = static_cast<int>(MAXCHOLBLOCKSIZE / (dimensionSize - readedRows + 1));
                        rowstoRead = max_cols - readedRows;
                        if (rowstoRead < 1) rowstoRead = 1;
                    }
                }
                
                offset[0] = readedRows;
                
                if( readedRows == 0) {
                    count[0] = dimensionSize - readedRows;
                    count[1] = rowstoRead;
                } else {
                    offset[0] = offset[0] - 1; // We need results from previous line to compute next line results
                    count[1] = readedRows + rowstoRead;
                    count[0] = dimensionSize - readedRows + 1;
                }
                
                readedRows = readedRows + rowstoRead; // Ho preparem perquè desprès necessitarem llegir a partir de la línea anterior
                
                Eigen::MatrixXd A, L;
                
                // Rcpp::Rcout<<"\nDebug 7: A L";
                
                // CAMBIO QUIRÚRGICO: Verificar tamaño de bloque para big-matrix
                size_t block_elements = static_cast<size_t>(count[0]) * static_cast<size_t>(count[1]);
                // size_t max_elements = MAXCHOLBLOCKSIZE; // ~400 MB por vector, 800 MB total
                
                if (block_elements > MAXCHOLBLOCKSIZE) {
                    // Recalcular rowstoRead para bloque más pequeño
                    rowstoRead = static_cast<int>(MAXCHOLBLOCKSIZE / count[0]);
                    if (rowstoRead < 1) rowstoRead = 1;
                    
                    // Recalcular count[1]
                    if( readedRows == 0) {
                        count[1] = rowstoRead;
                    } else {
                        count[1] = readedRows + rowstoRead;
                    }
                    
                    // Actualizar readedRows
                    readedRows = (readedRows > rowstoRead) ? (readedRows - rowstoRead + rowstoRead) : readedRows + rowstoRead;
                }
                
                std::vector<double> vdA( count[0] * count[1] ); 
                inDataset->readDatasetBlock( {offset[0], offset[1]}, {count[0], count[1]}, stride, block, vdA.data() );
                A = Eigen::Map<Eigen::Matrix<double, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor>> (vdA.data(), count[0], count[1] );
                
                std::vector<double> vdL( count[0] * count[1] ); 
                outDataset->readDatasetBlock( {offset[0], offset[1]}, {count[0], count[1]}, stride, block, vdL.data() );
                L = Eigen::Map<Eigen::Matrix<double, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor>> (vdL.data(), count[0], count[1] );
                
                if( offset[0] != 0 )
                    rowstoRead = rowstoRead + 1;
                
                if( rowstoRead > static_cast<int>(count[1]) ) {
                    rowstoRead = count[1];
                }
                
                //. 2025/11/21 .// for (int j = 0; j < rowstoRead; j++)  {
                for (int j = 0; j < rowstoRead; j++)  {
                    if( j + offset[0] == 0) {
                        L(j, j) = std::sqrt(A(j,j));
                    } else {
                        L(j, j + offset[0]) = std::sqrt(A(j,j + offset[0]) - (L.row(j).head(j + offset[0]).array().pow(2).sum() ));    
                    }
                    
                    
                    #pragma omp parallel for num_threads( get_number_threads(threads, R_NilValue) ) private(sum) shared (A,L,j) schedule(dynamic) if (j < readedRows - 1)
                    //. 2025/11/21 .// for ( int i = j + 1; i < dimensionSize - offset[0]  ; i++ )
                    for ( int i = j + 1; i < dimensionSize - static_cast<int>(offset[0])  ; i++ )
                    {
                        if(bcancel == false) {
                            if( j + static_cast<int>(offset[0]) > 0) {
                                sum = (L.block(i, 0, 1, j + static_cast<int>(offset[0])).array() * L.block(j, 0, 1, j + static_cast<int>(offset[0])).array()).array().sum();
                                if( sum != sum ) {
                                    Rcpp::Rcout<<"\n Can't get inverse matrix using Cholesky decomposition matrix is not positive definite\n";
                                    bcancel = true;
                                }
                            } else {
                                sum = 0;
                            }
                            if(!bcancel){
                                L(i,j + static_cast<int>(offset[0])) =  (1/L(j,j + static_cast<int>(offset[0]))*(A(i,j + static_cast<int>(offset[0])) - sum));    
                            }    
                        }
                    }
                }
                if(bcancel == true) {
                    return(1);
                }
                
                if( offset[0] != 0) {
                    offset[0] = offset[0] + 1;
                    count[0] = count[0] - 1;
                    
                    outDataset->writeDatasetBlock( Rcpp::wrap(L.block(1, 0, L.rows()-1, L.cols())), offset, count, stride, block, false);
                    
                } else {
                    outDataset->writeDatasetBlock( Rcpp::wrap(L), offset, count, stride, block, false);
                }
                
                // Rcpp::Rcout<<"\nDebug 13";
                
                offset[0] = offset[0] + count[0] - 1;
                
                // Rcpp::Rcout<<"\nDins Cholesky_decomposition -> Despres escriptura ";
            }
            
        } else {
            throw std::range_error("non-conformable arguments");
        }
        
        // Rcpp::Rcout<<"\nDins Cholesky_decomposition -> Bye Bye...";
        
        
    } catch( H5::FileIException& error ) { 
        checkClose_file(inDataset, outDataset);
        inDataset = outDataset = nullptr;
        Rcpp::Rcerr<<"c++ exception Cholesky_decomposition_intermediate_hdf5 (File IException)";
        return(2);
    } catch( H5::GroupIException & error ) { 
        checkClose_file(inDataset, outDataset);
        inDataset = outDataset = nullptr;
        Rcpp::Rcerr << "c++ exception Cholesky_decomposition_intermediate_hdf5 (Group IException)";
        return(2);
    } catch( H5::DataSetIException& error ) { 
        checkClose_file(inDataset, outDataset);
        inDataset = outDataset = nullptr;
        Rcpp::Rcerr << "c++ exception Cholesky_decomposition_intermediate_hdf5 (DataSet IException)";
        return(2);
    } catch(std::exception& ex) {
        checkClose_file(inDataset, outDataset);
        inDataset = outDataset = nullptr;
        Rcpp::Rcerr << "c++ exception Cholesky_decomposition_intermediate_hdf5" << ex.what();
        return(2);
    } catch (...) {
        checkClose_file(inDataset, outDataset);
        inDataset = outDataset = nullptr;
        Rcpp::Rcerr<<"\nC++ exception Cholesky_decomposition_intermediate_hdf5 (unknown reason)";
        return(2);
    }
    return(0);
    
}

8 Usage Example

#include "BigDataStatMeth.hpp"

// Example usage
auto result = Cholesky_decomposition_intermediate_hdf5(...);