RcppbdSVD_hdf5_Block
C++ Function Reference
1 Signature
void BigDataStatMeth::RcppbdSVD_hdf5_Block(BigDataStatMeth::hdf5Dataset *dsA, BigDataStatMeth::hdf5Dataset *dsu, BigDataStatMeth::hdf5Dataset *dsv, BigDataStatMeth::hdf5Dataset *dsd, int k, int q, int nev, bool bcenter, bool bscale, int irows, int icols, double dthreshold, Rcpp::Nullable< int > threads=R_NilValue)2 Description
Block-wise SVD decomposition for large HDF5 matrices.
3 Parameters
dsA(BigDataStatMeth::hdf5Dataset *): Input HDF5 dataset containing the matrixdsu(BigDataStatMeth::hdf5Dataset *): Output HDF5 dataset for left singular vectors (U)dsv(BigDataStatMeth::hdf5Dataset *): Output HDF5 dataset for right singular vectors (V)dsd(BigDataStatMeth::hdf5Dataset *): Output HDF5 dataset for singular values (S)k(int): Number of local SVDs to concatenate at each levelq(int): Number of decomposition levelsnev(int): Number of eigenvalues per blockbcenter(bool): Whether to center the databscale(bool): Whether to scale the datairows(int): Number of rows in input matrixicols(int): Number of columns in input matrixdthreshold(double): Threshold for numerical computationsthreads(Rcpp::Nullable< int >): Number of parallel threads (optional)
4 Details
Performs SVD computation on large matrices using block-wise decomposition strategy. This function handles matrices too large to fit in memory by processing them in blocks and combining results.
5 Call Graph
6 Source Code
NoteImplementation
File: inst/include/hdf5Algebra/matrixSvd.hpp • Lines 171-332
inline void RcppbdSVD_hdf5_Block( BigDataStatMeth::hdf5Dataset* dsA,
BigDataStatMeth::hdf5Dataset* dsu, BigDataStatMeth::hdf5Dataset* dsv,
BigDataStatMeth::hdf5Dataset* dsd, int k, int q, int nev, bool bcenter, bool bscale,
int irows, int icols, double dthreshold, Rcpp::Nullable<int> threads = R_NilValue )
{
BigDataStatMeth::hdf5Dataset* dsnormalizedData = nullptr;
BigDataStatMeth::hdf5Dataset* dsJoined = nullptr;
BigDataStatMeth::hdf5DatasetInternal* dsnormalizedData_i = nullptr;
try{
std::vector<hsize_t> stride = {1, 1},
block = {1, 1};
svdeig retsvd;
// Eigen::MatrixXd nX;
Eigen::MatrixXd matlast;
Eigen::MatrixXd v;
bool transp = false;
std::string strGroupName = "tmpgroup",
strPrefix; // Outpu group name for temporal data
Rcpp::CharacterVector strvmatnames = {"A","B","C","D","E","F","G","H","I","J","K",
"L","M","N","O","P","Q","R","S","T","U","V",
"W","X","Y","Z"};
strPrefix = strvmatnames[q-1];
if(irows >= icols) {
transp = true;
}
First_level_SvdBlock_decomposition_hdf5( dsA, strGroupName, k, q, nev, bcenter, bscale, dthreshold, threads);
for(int j = 1; j < q; j++) { // For each decomposition level :
Next_level_SvdBlock_decomposition_hdf5( dsA, strGroupName, k, j, dthreshold, threads);
}
// Get dataset names
Rcpp::StringVector joindata = dsA->getDatasetNames(strGroupName, strPrefix, "");
// 1.- Join matrix and remove parts from file
std::string strnewdataset = std::string((joindata[0])).substr(0,1);
dsJoined = new hdf5DatasetInternal(dsA->getFullPath(), strGroupName, strnewdataset, true);
join_datasets( dsJoined, strGroupName, joindata, false, true );
// 2.- Get SVD from Blocks full mattrix
hsize_t* dims_out = dsJoined->dim();
std::vector<double> vdreaded( dims_out[0] * dims_out[1] );
dsJoined->readDatasetBlock( {0, 0}, {dims_out[0], dims_out[1]}, {1, 1}, {1, 1}, vdreaded.data() );
matlast = Eigen::Map<Eigen::Matrix<double, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor>> (vdreaded.data(), dims_out[0], dims_out[1] );
delete dsJoined; dsJoined = nullptr;
retsvd = RcppbdSVD_lapack(matlast, false, false, false);
// Write results to hdf5 file : in folder "SVD" and dataset "SVD".<name input dataset>
dsd->createDataset( 1, retsvd.d.size(), "real");
dsd->writeDataset( Rcpp::wrap(retsvd.d) );
// 3.- crossprod initial matrix and svdA$u
if( bcenter == true || bscale == true || (dsA->getGroupName().find("NORMALIZED_T") != std::string::npos) ) {
if(bcenter == true || bscale == true) {
dsnormalizedData = new BigDataStatMeth::hdf5Dataset( dsA->getFullPath(), strGroupName, "normalmatrix", false);
dsnormalizedData->openDataset();
dims_out = dsnormalizedData->dim();
Eigen::MatrixXd A = Eigen::MatrixXd::Zero(dims_out[1], dims_out[0]);
dsnormalizedData->readDatasetBlock( {0, 0}, {dims_out[0], dims_out[1]}, stride, block, A.data() );
delete dsnormalizedData; dsnormalizedData = nullptr;
if(transp == false) {
v = A * retsvd.u;
} else {
v = A.transpose() * retsvd.u;
// v = Rcpp_block_matrix_mul_parallel(A,v retsvd.u, false, false, R_NilValue, threads); // multiplication
}
// v = Rcpp_block_matrix_mul_parallel(A, retsvd.u, false, false, R_NilValue, threads);
} else {
dsnormalizedData_i = new BigDataStatMeth::hdf5DatasetInternal( dsA->getFullPath(), dsA->getGroupName(), dsA->getDatasetName(), false);
dsnormalizedData_i->openDataset();
dims_out = dsnormalizedData_i->dim();
std::vector<double> vdA( dims_out[0] * dims_out[1] );
dsnormalizedData_i->readDatasetBlock( {0, 0}, {dims_out[0], dims_out[1]}, stride, block, vdA.data() );
Eigen::Map<Eigen::Matrix<double, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor>> A (vdA.data(), dims_out[0], dims_out[1]);
delete dsnormalizedData_i; dsnormalizedData_i= nullptr;
v = Rcpp_block_matrix_mul_parallel(A, retsvd.u, false, false, R_NilValue, threads);
}
} else {
dims_out = dsA->dim();
Eigen::MatrixXd A;
std::vector<double> vdA( dims_out[0] * dims_out[1] );
dsA->readDatasetBlock( {0,0}, {dims_out[0], dims_out[1] }, stride, block, vdA.data() );
A = Eigen::Map<Eigen::Matrix<double, Eigen::Dynamic, Eigen::Dynamic, Eigen::ColMajor>> (vdA.data(), dims_out[1], dims_out[0] );
if(transp == false) {
v = Rcpp_block_matrix_mul_parallel(A, retsvd.u, true, false, R_NilValue, threads); // crossprod
} else {
v = Rcpp_block_matrix_mul_parallel(A, retsvd.u, false, false, R_NilValue, threads); // multiplication
}
}
// 4.- resuls / svdA$d
// v = v.array().rowwise()/(retsvd.d).transpose().array();
v = v.array().rowwise() / Eigen::Map<Eigen::RowVectorXd>(retsvd.d.data(), (retsvd.d).size()).array();
if (transp == true) {
dsu->createDataset( v.rows(), v.cols(), "real");
dsv->createDataset( retsvd.u.rows(), retsvd.u.cols(), "real");
dsu->writeDataset(Rcpp::wrap(v));
dsv->writeDataset(Rcpp::wrap(retsvd.u));
} else {
dsu->createDataset( retsvd.u.rows(), retsvd.u.cols(), "real");
dsv->createDataset( v.rows(), v.cols(), "real");
dsu->writeDataset(Rcpp::wrap(retsvd.u));
dsv->writeDataset(Rcpp::wrap(v));
}
remove_elements(dsA->getFileptr(), strGroupName);
} catch( H5::FileIException& error ) { // catch failure caused by the H5File operations
checkClose_file(dsA, dsd, dsu, dsv, dsnormalizedData, dsJoined, dsnormalizedData_i);
Rcpp::Rcerr<<"\nc++ exception RcppbdSVD_hdf5_Block (File IException)\n";
return void();
} catch( H5::DataSetIException& error ) { // catch failure caused by the DataSet operations
checkClose_file(dsA, dsd, dsu, dsv, dsnormalizedData, dsJoined, dsnormalizedData_i);
Rcpp::Rcerr<<"\nc++ exception RcppbdSVD_hdf5_Block (DataSet IException)\n";
return void();
} catch(std::exception &ex) {
checkClose_file(dsA, dsd, dsu, dsv, dsnormalizedData, dsJoined, dsnormalizedData_i);
Rcpp::Rcerr<< "C++ exception RcppbdSVD_hdf5_Block : "<< ex.what();
return void();
} catch (...) {
checkClose_file(dsA, dsd, dsu, dsv, dsnormalizedData, dsJoined, dsnormalizedData_i);
Rcpp::Rcerr<<"\nC++ exception RcppbdSVD_hdf5_Block (unknown reason)";
return void();
}
return void();
}7 Usage Example
#include "BigDataStatMeth.hpp"
// Example usage
auto result = RcppbdSVD_hdf5_Block(...);