RcppbdSVD_hdf5_Block
C++ Function Reference
1 Signature
void BigDataStatMeth::RcppbdSVD_hdf5_Block(BigDataStatMeth::hdf5Dataset *dsA, BigDataStatMeth::hdf5Dataset *dsu, BigDataStatMeth::hdf5Dataset *dsv, BigDataStatMeth::hdf5Dataset *dsd, int k, int q, int nev, bool bcenter, bool bscale, int irows, int icols, double dthreshold, Rcpp::Nullable< int > threads=R_NilValue)2 Description
Block-wise SVD decomposition for large HDF5 matrices.
3 Parameters
dsA(BigDataStatMeth::hdf5Dataset *): Input HDF5 dataset containing the matrixdsu(BigDataStatMeth::hdf5Dataset *): Output HDF5 dataset for left singular vectors (U)dsv(BigDataStatMeth::hdf5Dataset *): Output HDF5 dataset for right singular vectors (V)dsd(BigDataStatMeth::hdf5Dataset *): Output HDF5 dataset for singular values (S)k(int): Number of local SVDs to concatenate at each levelq(int): Number of decomposition levelsnev(int): Number of eigenvalues per blockbcenter(bool): Whether to center the databscale(bool): Whether to scale the datairows(int): Number of rows in input matrixicols(int): Number of columns in input matrixdthreshold(double): Threshold for numerical computationsthreads(Rcpp::Nullable< int >): Number of parallel threads (optional)
4 Details
Performs SVD computation on large matrices using block-wise decomposition strategy. This function handles matrices too large to fit in memory by processing them in blocks and combining results.
5 Call Graph
6 Source Code
NoteImplementation
File: inst/include/hdf5Algebra/matrixSvd.hpp • Lines 239-441
inline void RcppbdSVD_hdf5_Block( BigDataStatMeth::hdf5Dataset* dsA,
BigDataStatMeth::hdf5Dataset* dsu, BigDataStatMeth::hdf5Dataset* dsv,
BigDataStatMeth::hdf5Dataset* dsd, int k, int q, int nev, bool bcenter, bool bscale,
int irows, int icols, double dthreshold, Rcpp::Nullable<int> threads = R_NilValue )
{
try{
std::unique_ptr<BigDataStatMeth::hdf5Dataset> dsnormalizedData(nullptr);
std::unique_ptr<BigDataStatMeth::hdf5DatasetInternal> dsJoined(nullptr);
std::unique_ptr<BigDataStatMeth::hdf5DatasetInternal> dsnormalizedData_i(nullptr);
std::vector<hsize_t> stride = {1, 1},
block = {1, 1};
svdeig retsvd;
// Eigen::MatrixXd nX;
Eigen::MatrixXd matlast;
Eigen::MatrixXd v;
bool transp = false;
std::string strGroupName = "tmpgroup",
strPrefix; // Outpu group name for temporal data
Rcpp::CharacterVector strvmatnames = {"A","B","C","D","E","F","G","H","I","J","K",
"L","M","N","O","P","Q","R","S","T","U","V",
"W","X","Y","Z"};
strPrefix = strvmatnames[q-1];
if(irows >= icols) {
transp = true;
}
First_level_SvdBlock_decomposition_hdf5( dsA, strGroupName, k, q, nev, bcenter, bscale, dthreshold, threads);
for(int j = 1; j < q; j++) { // For each decomposition level :
Next_level_SvdBlock_decomposition_hdf5( dsA, strGroupName, k, j, dthreshold, threads);
}
// Get dataset names
Rcpp::StringVector joindata = dsA->getDatasetNames(strGroupName, strPrefix, "");
// 1.- Join matrix and remove parts from file
std::string strnewdataset = std::string((joindata[0])).substr(0,1);
// dsJoined = new hdf5DatasetInternal(dsA->getFullPath(), strGroupName, strnewdataset, true);
dsJoined.reset( new BigDataStatMeth::hdf5DatasetInternal(dsA->getFullPath(), strGroupName, strnewdataset, true) );
join_datasets( dsJoined.get(), strGroupName, joindata, false, true );
// 2.- Get SVD from Blocks full mattrix
hsize_t* dims_out = dsJoined->dim();
std::vector<double> vdreaded( dims_out[0] * dims_out[1] );
dsJoined->readDatasetBlock( {0, 0}, {dims_out[0], dims_out[1]}, {1, 1}, {1, 1}, vdreaded.data() );
matlast = Eigen::Map<Eigen::Matrix<double, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor>> (vdreaded.data(), dims_out[0], dims_out[1] );
// delete dsJoined; dsJoined = nullptr;
retsvd = RcppbdSVD_lapack(matlast, false, false, false);
// Write results to hdf5 file : in folder "SVD" and dataset "SVD".<name input dataset>
dsd->inheritCompressionLevel(dsA->getCompressionLevel());
dsd->createDataset( 1, retsvd.d.size(), "real");
dsd->writeDataset( Rcpp::wrap(retsvd.d) );
// 3.- Compute v = source * (u / d) block-wise — never loads full matrix into RAM
{
// Pre-divide u by d (tiny operation, u is n_small × k)
Eigen::MatrixXd u_div_d = retsvd.u.array().rowwise() /
Eigen::Map<Eigen::RowVectorXd>(retsvd.d.data(), retsvd.d.size()).array();
// Write u_div_d to a small temp HDF5 dataset
std::unique_ptr<BigDataStatMeth::hdf5Dataset> ds_udivd(nullptr);
ds_udivd.reset( new BigDataStatMeth::hdf5Dataset(dsA->getFullPath(), strGroupName, "udivd", true) );
ds_udivd->setCompressionLevel(0);
ds_udivd->createDataset( u_div_d.rows(), u_div_d.cols(), "real" );
ds_udivd->writeDataset( Rcpp::wrap(u_div_d) );
// Temp dataset for the product result (also small: n_small × k)
std::unique_ptr<BigDataStatMeth::hdf5Dataset> ds_vtmp(nullptr);
ds_vtmp.reset( new BigDataStatMeth::hdf5Dataset(dsA->getFullPath(), strGroupName, "vtmp", true) );
// if( bcenter == true || bscale == true ) {
// dsnormalizedData.reset( new BigDataStatMeth::hdf5Dataset( dsA->getFullPath(), strGroupName, "normalmatrix", false) );
// dsnormalizedData->openDataset();
//
// multiplication( dsnormalizedData.get(), ds_udivd.get(), ds_vtmp.get(),
// transp, false, R_NilValue, R_NilValue, threads );
// } else if( dsA->getGroupName().find("NORMALIZED_T") != std::string::npos ) {
// dsnormalizedData_i.reset( new BigDataStatMeth::hdf5DatasetInternal( dsA->getFullPath(), dsA->getGroupName(), dsA->getDatasetName(), false) );
// dsnormalizedData_i->openDataset();
// multiplication( dsnormalizedData_i.get(), ds_udivd.get(), ds_vtmp.get(),
// false, false, R_NilValue, R_NilValue, threads );
// } else {
// multiplication( dsA, ds_udivd.get(), ds_vtmp.get(),
// !transp, false, R_NilValue, R_NilValue, threads );
// }
//
// // Read result back — it is n_small × k, always fits in RAM
// hsize_t* vdims = ds_vtmp->dim();
// std::vector<double> vdbuf( vdims[0] * vdims[1] );
// ds_vtmp->readDatasetBlock( {0,0}, {vdims[0], vdims[1]}, stride, block, vdbuf.data() );
// v = Eigen::Map<Eigen::MatrixXd>( vdbuf.data(), vdims[1], vdims[0] );
if( bcenter == true || bscale == true ) {
dsnormalizedData.reset( new BigDataStatMeth::hdf5Dataset( dsA->getFullPath(), strGroupName, "normalmatrix", false) );
dsnormalizedData->openDataset();
if( transp == false ) {
// normalmatrix dims [R_nrows, R_ncols] — multiplication() works correctly here
multiplication( dsnormalizedData.get(), ds_udivd.get(), ds_vtmp.get(),
false, false, R_NilValue, R_NilValue, threads );
} else {
// transp=true: multiplication() reads out of bounds for this storage convention.
// v = normalmatrix_R * u_div_d = (R_nrows × R_ncols) * (R_ncols × k).
// Read normalmatrix in row-blocks — never fully in RAM.
const hsize_t nR = dsnormalizedData->nrows(); // R_nrows
const hsize_t nC = dsnormalizedData->ncols(); // R_ncols
const hsize_t blk = std::max( (hsize_t)1, MAXELEMSINBLOCK / nC );
v = Eigen::MatrixXd::Zero( nR, u_div_d.cols() );
for( hsize_t off = 0; off < nR; off += blk ) {
const hsize_t cnt = std::min( blk, nR - off );
std::vector<double> buf( cnt * nC );
dsnormalizedData->readDatasetBlock( {off, 0}, {cnt, nC}, stride, block, buf.data() );
Eigen::Map<Eigen::Matrix<double, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor>>
Ablock( buf.data(), cnt, nC );
v.middleRows( off, cnt ).noalias() = Ablock * u_div_d;
}
}
} else if( dsA->getGroupName().find("NORMALIZED_T") != std::string::npos ) {
// hdf5Dataset ==> Mirall plot
dsnormalizedData_i.reset( new BigDataStatMeth::hdf5DatasetInternal( dsA->getFullPath(), dsA->getGroupName(), dsA->getDatasetName(), false) );
dsnormalizedData_i->openDataset();
// hsize_t* tmp_dim = dsnormalizedData_i->dim();
// std::unique_ptr<BigDataStatMeth::hdf5Dataset> dsnormalizedData_ni(nullptr);
// dsnormalizedData_ni.reset( new BigDataStatMeth::hdf5Dataset( dsA->getFullPath(), dsA->getGroupName(), dsA->getDatasetName(), false) );
// dsnormalizedData_ni->openDataset();
//
// tmp_dim = dsnormalizedData_ni->dim();
multiplication( dsnormalizedData_i.get(), ds_udivd.get(), ds_vtmp.get(), true, false, R_NilValue, R_NilValue, threads );
} else {
multiplication( dsA, ds_udivd.get(), ds_vtmp.get(),
!transp, false, R_NilValue, R_NilValue, threads );
}
// Read result back from ds_vtmp — only when populated by multiplication()
if( !( (bcenter == true || bscale == true) && transp == true ) ) {
hsize_t* vdims = ds_vtmp->dim();
std::vector<double> vdbuf( vdims[0] * vdims[1] );
ds_vtmp->readDatasetBlock( {0,0}, {vdims[0], vdims[1]}, stride, block, vdbuf.data() );
v = Eigen::Map<Eigen::MatrixXd>( vdbuf.data(), vdims[1], vdims[0] );
}
}
// 4.- resuls / svdA$d
// v = v.array().rowwise()/(retsvd.d).transpose().array();
//.. 2026/05/03 ..//v = v.array().rowwise() / Eigen::Map<Eigen::RowVectorXd>(retsvd.d.data(), (retsvd.d).size()).array();
if (transp == true) {
dsu->inheritCompressionLevel(dsA->getCompressionLevel());
dsu->createDataset( v.rows(), v.cols(), "real");
dsv->inheritCompressionLevel(dsA->getCompressionLevel());
dsv->createDataset( retsvd.u.rows(), retsvd.u.cols(), "real");
dsu->writeDataset(Rcpp::wrap(v));
dsv->writeDataset(Rcpp::wrap(retsvd.u));
} else {
dsu->inheritCompressionLevel(dsA->getCompressionLevel());
dsu->createDataset( retsvd.u.rows(), retsvd.u.cols(), "real");
dsv->inheritCompressionLevel(dsA->getCompressionLevel());
dsv->createDataset( v.rows(), v.cols(), "real");
dsu->writeDataset(Rcpp::wrap(retsvd.u));
dsv->writeDataset(Rcpp::wrap(v));
}
remove_elements(dsA->getFileptr(), strGroupName);
} catch( H5::FileIException& error ) { // catch failure caused by the H5File operations
// checkClose_file(dsA, dsd, dsu, dsv, dsnormalizedData, dsJoined, dsnormalizedData_i);
throw std::runtime_error("c++ exception RcppbdSVD_hdf5_Block (File IException)");
} catch( H5::DataSetIException& error ) { // catch failure caused by the DataSet operations
// checkClose_file(dsA, dsd, dsu, dsv, dsnormalizedData, dsJoined, dsnormalizedData_i);
throw std::runtime_error("c++ exception RcppbdSVD_hdf5_Block (DataSet IException)");
} catch(std::exception &ex) {
// checkClose_file(dsA, dsd, dsu, dsv, dsnormalizedData, dsJoined, dsnormalizedData_i);
throw std::runtime_error(std::string("C++ exception RcppbdSVD_hdf5_Block: ") + ex.what());
} catch (...) {
// checkClose_file(dsA, dsd, dsu, dsv, dsnormalizedData, dsJoined, dsnormalizedData_i);
throw std::runtime_error("C++ exception RcppbdSVD_hdf5_Block (unknown reason)");
}
return void();
}7 Usage Example
#include "BigDataStatMeth.hpp"
// Example usage
auto result = RcppbdSVD_hdf5_Block(...);