Cholesky_decomposition_intermediate_hdf5
C++ Function Reference
1 Signature
int BigDataStatMeth::Cholesky_decomposition_intermediate_hdf5(BigDataStatMeth::hdf5Dataset *inDataset, BigDataStatMeth::hdf5Dataset *outDataset, int idim0, int idim1, long dElementsBlock, Rcpp::Nullable< int > threads)2 Description
Performs Cholesky decomposition on a matrix stored in HDF5 format.
3 Parameters
inDataset(BigDataStatMeth::hdf5Dataset *): Input matrix datasetoutDataset(BigDataStatMeth::hdf5Dataset *): Output dataset for Cholesky factor Lidim0(int): Number of rowsidim1(int): Number of columnsdElementsBlock(long): Block size for processingthreads(Rcpp::Nullable< int >): Number of threads for parallel processing (optional)
4 Returns
int 0 on success, 1 if matrix is not positive definite, 2 on HDF5 errors
5 Details
inDatasetInput matrix dataset outDatasetOutput dataset for Cholesky factor L idim0Number of rows idim1Number of columns dElementsBlockBlock size for processing threadsNumber of threads for parallel processing (optional) int 0 on success, 1 if matrix is not positive definite, 2 on HDF5 errors Implements block-wise Cholesky decomposition algorithm:Processes matrix in blocks to manage memory usageComputes lower triangular factor L where A = LL^TUses parallel processing for row computations
6 Call Graph
7 Source Code
NoteImplementation
File: inst/include/hdf5Algebra/matrixInvCholesky.hpp • Lines 335-538
inline int Cholesky_decomposition_intermediate_hdf5( BigDataStatMeth::hdf5Dataset* inDataset,
BigDataStatMeth::hdf5Dataset* outDataset,
int idim0, int idim1, long dElementsBlock,
Rcpp::Nullable<int> threads = R_NilValue)
{
try {
int dimensionSize = idim0,
readedRows = 0,
rowstoRead,
minimumBlockSize;
// chunk = 1,
bool bcancel = false;
double sum = 0;
std::vector<hsize_t> offset = {0,0},
count = {1,1},
stride = {1,1},
block = {1,1};
// Set minimum elements in block (mandatory : minimum = 2 * longest line)
if( dElementsBlock < dimensionSize * 2 ) {
minimumBlockSize = dimensionSize * 2;
} else {
minimumBlockSize = dElementsBlock;
}
// Rcpp::Rcout<<"\nDebug 2: minimumBlockSize=" << minimumBlockSize;
// Poso el codi per llegir els blocks aquí i desprès a dins hauria d'anar-hi la j
if( idim0 == idim1)
{
// Rcpp::Rcout<<"\nDins Cholesky_decomposition -> idim0 == idim1";
while ( readedRows < dimensionSize ) {
rowstoRead = ( -2 * readedRows - 1 + std::sqrt( pow(2*readedRows, 2) - 4 * readedRows + 8 * minimumBlockSize + 1) ) / 2;
if (rowstoRead <= 0) {
rowstoRead = minimumBlockSize; // Minimum progress to avoid infinite loop
}
if( readedRows + rowstoRead > idim0) { // Max size bigger than data to read ?
rowstoRead = idim0 - readedRows;
}
if (readedRows == 0) {
// Primer bloque: count[0] = dimensionSize, count[1] = rowstoRead
size_t potential_elements = static_cast<size_t>(dimensionSize) * static_cast<size_t>(rowstoRead);
size_t optimalSize = getOptimalBlockElements();
if (potential_elements > optimalSize) {
rowstoRead = static_cast<int>(MAXCHOLBLOCKSIZE / dimensionSize);
if (rowstoRead < 1) rowstoRead = 1;
}
} else {
// Bloques siguientes: count[0] = dimensionSize - readedRows + 1, count[1] = readedRows + rowstoRead
size_t potential_elements = static_cast<size_t>(dimensionSize - readedRows + 1) * static_cast<size_t>(readedRows + rowstoRead);
size_t optimalSize = getOptimalBlockElements();
if (potential_elements > optimalSize) {
int max_cols = static_cast<int>(MAXCHOLBLOCKSIZE / (dimensionSize - readedRows + 1));
rowstoRead = max_cols - readedRows;
if (rowstoRead < 1) rowstoRead = 1;
}
}
offset[0] = readedRows;
if( readedRows == 0) {
count[0] = dimensionSize - readedRows;
count[1] = rowstoRead;
} else {
offset[0] = offset[0] - 1; // We need results from previous line to compute next line results
count[1] = readedRows + rowstoRead;
count[0] = dimensionSize - readedRows + 1;
}
readedRows = readedRows + rowstoRead; // Ho preparem perquè desprès necessitarem llegir a partir de la línea anterior
Eigen::MatrixXd A, L;
// Rcpp::Rcout<<"\nDebug 7: A L";
// CAMBIO QUIRÚRGICO: Verificar tamaño de bloque para big-matrix
size_t block_elements = static_cast<size_t>(count[0]) * static_cast<size_t>(count[1]);
// size_t max_elements = MAXCHOLBLOCKSIZE; // ~400 MB por vector, 800 MB total
if (block_elements > MAXCHOLBLOCKSIZE) {
// Recalcular rowstoRead para bloque más pequeño
rowstoRead = static_cast<int>(MAXCHOLBLOCKSIZE / count[0]);
if (rowstoRead < 1) rowstoRead = 1;
// Recalcular count[1]
if( readedRows == 0) {
count[1] = rowstoRead;
} else {
count[1] = readedRows + rowstoRead;
}
// Actualizar readedRows
readedRows = (readedRows > rowstoRead) ? (readedRows - rowstoRead + rowstoRead) : readedRows + rowstoRead;
}
std::vector<double> vdA( count[0] * count[1] );
inDataset->readDatasetBlock( {offset[0], offset[1]}, {count[0], count[1]}, stride, block, vdA.data() );
A = Eigen::Map<Eigen::Matrix<double, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor>> (vdA.data(), count[0], count[1] );
std::vector<double> vdL( count[0] * count[1] );
outDataset->readDatasetBlock( {offset[0], offset[1]}, {count[0], count[1]}, stride, block, vdL.data() );
L = Eigen::Map<Eigen::Matrix<double, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor>> (vdL.data(), count[0], count[1] );
if( offset[0] != 0 )
rowstoRead = rowstoRead + 1;
if( rowstoRead > static_cast<int>(count[1]) ) {
rowstoRead = count[1];
}
//. 2025/11/21 .// for (int j = 0; j < rowstoRead; j++) {
for (int j = 0; j < rowstoRead; j++) {
if( j + offset[0] == 0) {
L(j, j) = std::sqrt(A(j,j));
} else {
L(j, j + offset[0]) = std::sqrt(A(j,j + offset[0]) - (L.row(j).head(j + offset[0]).array().pow(2).sum() ));
}
#pragma omp parallel for num_threads( get_number_threads(threads, R_NilValue) ) private(sum) shared (A,L,j) schedule(dynamic) if (j < readedRows - 1)
//. 2025/11/21 .// for ( int i = j + 1; i < dimensionSize - offset[0] ; i++ )
for ( int i = j + 1; i < dimensionSize - static_cast<int>(offset[0]) ; i++ )
{
if(bcancel == false) {
if( j + static_cast<int>(offset[0]) > 0) {
sum = (L.block(i, 0, 1, j + static_cast<int>(offset[0])).array() * L.block(j, 0, 1, j + static_cast<int>(offset[0])).array()).array().sum();
if( sum != sum ) {
Rcpp::Rcout<<"\n Can't get inverse matrix using Cholesky decomposition matrix is not positive definite\n";
bcancel = true;
}
} else {
sum = 0;
}
if(!bcancel){
L(i,j + static_cast<int>(offset[0])) = (1/L(j,j + static_cast<int>(offset[0]))*(A(i,j + static_cast<int>(offset[0])) - sum));
}
}
}
}
if(bcancel == true) {
return(1);
}
if( offset[0] != 0) {
offset[0] = offset[0] + 1;
count[0] = count[0] - 1;
outDataset->writeDatasetBlock( Rcpp::wrap(L.block(1, 0, L.rows()-1, L.cols())), offset, count, stride, block, false);
} else {
outDataset->writeDatasetBlock( Rcpp::wrap(L), offset, count, stride, block, false);
}
// Rcpp::Rcout<<"\nDebug 13";
offset[0] = offset[0] + count[0] - 1;
// Rcpp::Rcout<<"\nDins Cholesky_decomposition -> Despres escriptura ";
}
} else {
throw std::range_error("non-conformable arguments");
}
// Rcpp::Rcout<<"\nDins Cholesky_decomposition -> Bye Bye...";
} catch( H5::FileIException& error ) {
checkClose_file(inDataset, outDataset);
inDataset = outDataset = nullptr;
Rcpp::Rcerr<<"c++ exception Cholesky_decomposition_intermediate_hdf5 (File IException)";
return(2);
} catch( H5::GroupIException & error ) {
checkClose_file(inDataset, outDataset);
inDataset = outDataset = nullptr;
Rcpp::Rcerr << "c++ exception Cholesky_decomposition_intermediate_hdf5 (Group IException)";
return(2);
} catch( H5::DataSetIException& error ) {
checkClose_file(inDataset, outDataset);
inDataset = outDataset = nullptr;
Rcpp::Rcerr << "c++ exception Cholesky_decomposition_intermediate_hdf5 (DataSet IException)";
return(2);
} catch(std::exception& ex) {
checkClose_file(inDataset, outDataset);
inDataset = outDataset = nullptr;
Rcpp::Rcerr << "c++ exception Cholesky_decomposition_intermediate_hdf5" << ex.what();
return(2);
} catch (...) {
checkClose_file(inDataset, outDataset);
inDataset = outDataset = nullptr;
Rcpp::Rcerr<<"\nC++ exception Cholesky_decomposition_intermediate_hdf5 (unknown reason)";
return(2);
}
return(0);
}8 Usage Example
#include "BigDataStatMeth.hpp"
// Example usage
auto result = Cholesky_decomposition_intermediate_hdf5(...);