RcppbdSVD_hdf5
C++ Function Reference
1 Signature
void BigDataStatMeth::RcppbdSVD_hdf5(std::string filename, std::string strsubgroup, std::string strdataset, int k, int q, int nev, bool bcenter, bool bscale, double dthreshold, bool bforce, bool asRowMajor, Rcpp::Nullable< Rcpp::CharacterVector > method=R_NilValue, Rcpp::Nullable< int > ithreads=R_NilValue)2 Description
Main SVD computation function for HDF5 matrices.
3 Parameters
filename(std::string): Path to the HDF5 file containing input matrixstrsubgroup(std::string): Group path within the HDF5 filestrdataset(std::string): Dataset name containing the matrixk(int): Number of local SVDs to concatenate at each levelq(int): Number of decomposition levelsnev(int): Number of eigenvalues per blockbcenter(bool): Whether to center the data before SVDbscale(bool): Whether to scale the data before SVDdthreshold(double): Numerical threshold for computationsbforce(bool): Whether to overwrite existing resultsasRowMajor(bool): Whether to interpret matrix as row-majormethod(Rcpp::Nullable< Rcpp::CharacterVector >): Computation method (“auto”, “blocks”, “full”)ithreads(Rcpp::Nullable< int >): Number of parallel threads (optional)
4 Details
This is the main interface for computing SVD of matrices stored in HDF5 format. It automatically selects between direct LAPACK computation for small matrices and block-wise decomposition for large matrices.
5 Call Graph
6 Source Code
NoteImplementation
File: inst/include/hdf5Algebra/matrixSvd.hpp • Lines 370-475
inline void RcppbdSVD_hdf5( std::string filename, std::string strsubgroup, std::string strdataset,
int k, int q, int nev, bool bcenter, bool bscale, double dthreshold,
bool bforce, bool asRowMajor,
Rcpp::Nullable<Rcpp::CharacterVector> method = R_NilValue,
Rcpp::Nullable<int> ithreads = R_NilValue)
{
BigDataStatMeth::hdf5Dataset* dsA = nullptr;
BigDataStatMeth::hdf5Dataset* dsu = nullptr;
BigDataStatMeth::hdf5Dataset* dsv = nullptr;
BigDataStatMeth::hdf5Dataset* dsd = nullptr;
try {
std::string strMethod;
std::vector<hsize_t> stride = {1, 1},
block = {1, 1},
offset = {0, 0},
count = {0, 0};
std::vector<std::string> strMethods = {"auto", "blocks", "full"};
if(method.isNull()) strMethod = "auto" ;
else strMethod = Rcpp::as<std::string>(method);
dsA = new BigDataStatMeth::hdf5Dataset(filename, strsubgroup, strdataset, false);
dsA->openDataset();
if( dsA->getDatasetptr() != nullptr ) {
// Create results folder
std::string stroutgroup = "SVD/"+ strdataset;
std::vector<hsize_t> dims_out = {dsA->nrows(), dsA->ncols()};;
count = { dims_out[0], dims_out[1]};
// Small matrices ==> Direct SVD (lapack)
if( (dims_out[0] * dims_out[1] < (MAXELEMSINBLOCK / 20) && strMethod == "auto") || strMethod == "full" ) {
// Rcpp::Rcout<<"\nEste, aquà - 1";
Eigen::MatrixXd X;
svdeig retsvd;
std::vector<double> vdA( count[0] * count[1] );
dsA->readDatasetBlock( {offset[0], offset[1]}, {count[0], count[1]}, stride, block, vdA.data() );
if(asRowMajor == true) {
Eigen::Map<Eigen::Matrix<double, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor>> X (vdA.data(), count[0], count[1] );
retsvd = RcppbdSVD_lapack(X, bcenter, bscale, false);
} else {
Eigen::Map<Eigen::Matrix<double, Eigen::Dynamic, Eigen::Dynamic, Eigen::ColMajor>> X (vdA.data(), count[1], count[0] );
retsvd = RcppbdSVD_lapack(X, bcenter, bscale, false);
}
dsu = new hdf5Dataset(filename, stroutgroup, "u", bforce);
dsu->createDataset( retsvd.u.rows(), retsvd.u.cols(), "real");
dsu->writeDataset( Rcpp::wrap(retsvd.u) );
dsv = new hdf5Dataset(filename, stroutgroup, "v", bforce);
dsv->createDataset( retsvd.v.rows(), retsvd.v.cols(), "real");
dsv->writeDataset( Rcpp::wrap(retsvd.v) );
dsd = new hdf5Dataset(filename, stroutgroup, "d", bforce);
//.. COMENTAT 2025-02-06 ..// dsd->createDataset( retsvd.d.size(), 1, "real");
dsd->createDataset( 1, retsvd.d.size(), "real");
dsd->writeDataset( Rcpp::wrap(retsvd.d) );
} else {
// Rcpp::Rcout<<"\nEste, aquà - 2";
dsu = new BigDataStatMeth::hdf5Dataset(filename, stroutgroup, "u", true);
dsv = new BigDataStatMeth::hdf5Dataset(filename, stroutgroup, "v", true);
dsd = new BigDataStatMeth::hdf5Dataset(filename, stroutgroup, "d", true);
if( dsA->getDatasetptr() != nullptr) {
RcppbdSVD_hdf5_Block( dsA, dsu, dsv, dsd, k, q, nev, bcenter, bscale, count[1], count[0], dthreshold, ithreads );
}
}
}
delete dsu; dsu = nullptr;
delete dsv; dsv = nullptr;
delete dsd; dsd = nullptr;
delete dsA; dsA = nullptr;
} catch( H5::FileIException& error ) { // catch failure caused by the H5File operations
checkClose_file(dsA, dsd, dsu, dsv);
Rcpp::Rcerr<<"\nc++ exception RcppbdSVD_hdf5 (File IException)\n";
return void();
} catch( H5::DataSetIException& error ) { // catch failure caused by the DataSet operations
checkClose_file(dsA, dsd, dsu, dsv);
Rcpp::Rcerr<<"\nc++ exception RcppbdSVD_hdf5 (DataSet IException)\n";
return void();
} catch(std::exception &ex) {
checkClose_file(dsA, dsd, dsu, dsv);
Rcpp::Rcerr<<"c++ exception RcppbdSVD_hdf5 \n"<< ex.what();
return void();
} catch (...) {
checkClose_file(dsA, dsd, dsu, dsv);
Rcpp::Rcerr<<"\nC++ exception RcppbdSVD_hdf5 (unknown reason)";
return void();
}
return void();
}7 Usage Example
#include "BigDataStatMeth.hpp"
// Example usage
auto result = RcppbdSVD_hdf5(...);