RcppReduce_dataset_hdf5
C++ Function Reference
1 Signature
void BigDataStatMeth::RcppReduce_dataset_hdf5(std::string filename, std::string stringroup, std::string stroutgroup, std::string stroutdataset, std::string strreducefunction, bool boverwrite, bool bremove, bool binternal)2 Description
Reduces multiple HDF5 datasets into a single dataset using specified operation.
3 Parameters
filename(std::string): HDF5 file pathstringroup(std::string): Input group containing datasets to reducestroutgroup(std::string): Output group for reduced datasetstroutdataset(std::string): Name of output datasetstrreducefunction(std::string): Reduction operation (“+” or “-”)boverwrite(bool): Whether to overwrite existing output datasetbremove(bool): Whether to remove input datasets after reductionbinternal(bool): Whether this is an internal call (affects data layout)
4 Details
filenameHDF5 file path stringroupInput group containing datasets to reduce stroutgroupOutput group for reduced dataset stroutdatasetName of output dataset strreducefunctionReduction operation (“+” or “-”) boverwriteWhether to overwrite existing output dataset bremoveWhether to remove input datasets after reduction binternalWhether this is an internal call (affects data layout) Implementation approach:Opens input HDF5 file and gets dataset listProcesses datasets sequentially:Reads each dataset into memoryAdjusts dimensions if necessaryApplies reduction operation Creates output dataset with reduced result
5 Call Graph
6 Source Code
File: inst/include/hdf5Utilities/hdf5ReduceDataset.hpp • Lines 101-254
inline void RcppReduce_dataset_hdf5 ( std::string filename,
std::string stringroup,
std::string stroutgroup,
std::string stroutdataset,
std::string strreducefunction,
bool boverwrite,
bool bremove,
bool binternal)
{
BigDataStatMeth::hdf5File* objFile = nullptr;
BigDataStatMeth::hdf5Dataset* dsIn = nullptr;
BigDataStatMeth::hdf5Dataset* dsOut = nullptr;
try {
hsize_t* dims_out;
std::vector<hsize_t> stride = {1, 1},
block = {1, 1},
offset = {0, 0};
Eigen::MatrixXd fullReduced;
Eigen::MatrixXd newRead;
int ndatasets;
objFile = new BigDataStatMeth::hdf5File(filename, false);
objFile->openFile("r");
// Get dataset names without prefix, all datasets inside the group
Rcpp::StringVector joindata = objFile->getDatasetNames(stringroup, "", "");
delete objFile; // Close file
ndatasets = joindata.size();
for ( int i=0; i< ndatasets; i++)
{
dsIn = new BigDataStatMeth::hdf5Dataset(filename, stringroup + "/" + joindata[i], false);
dsIn->openDataset();
if( dsIn->getDatasetptr() == nullptr) {
checkClose_file(dsIn);
Rcpp::Rcerr<< "c++ exception RcppReduce_dataset_hdf5 (Dataset IException )" << std::endl;
return void();
}
dims_out = dsIn->dim();
std::vector<double> vdIn( dims_out[0] * dims_out[1] );
dsIn->readDatasetBlock( {offset[0], offset[1]}, {dims_out[0], dims_out[1]}, stride, block, vdIn.data() );
if( i == 0 ) {
if(binternal == true)
fullReduced = Eigen::Map<Eigen::Matrix<double, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor>> (vdIn.data(), dims_out[0], dims_out[1] );
else
fullReduced = Eigen::Map<Eigen::Matrix<double, Eigen::Dynamic, Eigen::Dynamic, Eigen::ColMajor>> (vdIn.data(), dims_out[0], dims_out[1] );
} else {
if(binternal == true)
newRead = Eigen::Map<Eigen::Matrix<double, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor>> (vdIn.data(), dims_out[0], dims_out[1] );
else
newRead = Eigen::Map<Eigen::Matrix<double, Eigen::Dynamic, Eigen::Dynamic, Eigen::ColMajor>> (vdIn.data(), dims_out[0], dims_out[1] );
if( newRead.rows() != fullReduced.rows()){
int difference = std::abs(fullReduced.rows() - newRead.rows());
if( newRead.rows() > fullReduced.rows()) {
newRead.resize( newRead.rows() + difference, Eigen::NoChange);
} else {
fullReduced.resize( fullReduced.rows() + difference, Eigen::NoChange);
}
}
if( newRead.cols() != fullReduced.cols()){
int difference = std::abs(fullReduced.cols() - newRead.cols());
if( newRead.cols() > fullReduced.cols()){
newRead.resize( Eigen::NoChange, newRead.cols() + difference );
} else {
fullReduced.resize( Eigen::NoChange, fullReduced.cols() + difference );
}
}
// Reduce matrix
if( strreducefunction.compare("+")==0) {
fullReduced = fullReduced + newRead;
} else if (strreducefunction.compare("-")==0) {
fullReduced = fullReduced - newRead;
}
}
if( bremove == true){
dsIn->remove();
}
delete dsIn; dsIn = nullptr;
}
dsOut = new BigDataStatMeth::hdf5Dataset(filename, stroutgroup, stroutdataset, boverwrite);
if(binternal == true) {
dsOut->createDataset( fullReduced.rows() , fullReduced.cols(), "real");
// if( dsOut->getDatasetptr() != nullptr) {
// dsOut->writeDataset(Rcpp::wrap(fullReduced));
// } else {
// checkClose_file(dsOut);
// Rcpp::Rcerr<< "c++ exception RcppReduce_dataset_hdf5 (Dataset IException )" << std::endl;
// return void();
// }
} else {
dsOut->createDataset( fullReduced.cols() , fullReduced.rows(), "real");
fullReduced.transposeInPlace();
// if( dsOut->getDatasetptr() != nullptr) {
// fullReduced.transposeInPlace();
// dsOut->writeDataset(Rcpp::wrap(fullReduced));
// } else {
// checkClose_file(dsOut);
// Rcpp::Rcerr<< "c++ exception RcppReduce_dataset_hdf5 (Dataset IException )" << std::endl;
// return void();
// }
}
if( dsOut->getDatasetptr() != nullptr) {
dsOut->writeDataset(Rcpp::wrap(fullReduced));
} else {
checkClose_file(dsOut);
Rcpp::Rcerr<< "c++ exception RcppReduce_dataset_hdf5 (Dataset IException )" << std::endl;
return void();
}
delete dsOut; dsOut = nullptr;
}catch( H5::FileIException& error ) {
checkClose_file(dsIn, dsOut);
// ::Rf_error( "c++ exception RcppReduce_dataset_hdf5 (File IException )" );
Rcpp::Rcerr<< "c++ exception RcppReduce_dataset_hdf5 (File IException )" << std::endl;
return void();
} catch( H5::DataSetIException& error ) { // catch failure caused by the dstosplit operations
checkClose_file(dsIn, dsOut);
// ::Rf_error( "c++ exception RcppReduce_dataset_hdf5 (dstosplit IException )" );
Rcpp::Rcerr<< "c++ exception RcppReduce_dataset_hdf5 (dstosplit IException )" << std::endl;
return void();
} catch( H5::DataSpaceIException& error ) { // catch failure caused by the DataSpace operations
checkClose_file(dsIn, dsOut);
// ::Rf_error( "c++ exception RcppReduce_dataset_hdf5 (DataSpace IException )" );
Rcpp::Rcerr<< "c++ exception RcppReduce_dataset_hdf5 (DataSpace IException )" << std::endl;
return void();
}
return void();
}7 Usage Example
#include "BigDataStatMeth.hpp"
// Example usage
auto result = RcppReduce_dataset_hdf5(...);