RcppSplit_matrix_hdf5_internal
C++ Function Reference
1 Signature
void BigDataStatMeth::RcppSplit_matrix_hdf5_internal(BigDataStatMeth::hdf5Dataset *dstosplit, std::string stroutgroup, std::string stroutdataset, bool bycols, int nblocks, int iblocksize, int irows, int icols)2 Description
Splits an HDF5 dataset into multiple smaller datasets (internal C++ interface)
3 Parameters
dstosplit(BigDataStatMeth::hdf5Dataset *): Input dataset to splitstroutgroup(std::string): Output group pathstroutdataset(std::string): Base name for output datasetsbycols(bool): Whether to split by columns (true) or rows (false)nblocks(int): Number of blocks to create (if > 0)iblocksize(int): Size of each block (if > 0)irows(int): Number of rows in input dataseticols(int): Number of columns in input dataset
4 Details
dstosplitInput dataset to split stroutgroupOutput group path stroutdatasetBase name for output datasets bycolsWhether to split by columns (true) or rows (false) nblocksNumber of blocks to create (if > 0) iblocksizeSize of each block (if > 0) irowsNumber of rows in input dataset icolsNumber of columns in input dataset Implementation features:Flexible block specification:By number of blocks (nblocks > 0)By block size (iblocksize > 0) Automatic block size calculationEfficient memory usageError checking for parameters
5 Call Graph
6 Source Code
NoteImplementation
File: inst/include/hdf5Utilities/hdf5SplitDataset.hpp • Lines 196-299
inline void RcppSplit_matrix_hdf5_internal ( BigDataStatMeth::hdf5Dataset* dstosplit,
std::string stroutgroup, std::string stroutdataset,
bool bycols, int nblocks, int iblocksize,
int irows, int icols )
{
BigDataStatMeth::hdf5Dataset* dsOut = nullptr;
try {
std::string newDatasetName = "";
hsize_t inrows = irows,
incols = icols,
ii = 0,
kk = 0;
int blocks = nblocks,
blocksize = iblocksize;;
std::vector<hsize_t> stride = {1, 1},
block = {1, 1};
if( nblocks <= 0 && iblocksize <= 0 ){
checkClose_file(dstosplit);
Rf_error( "\n Block size or number of blocks are needed to proceed with matrix split. Please, review parameters");
return void();
} else if (nblocks > 0 && iblocksize > 0 ) {
checkClose_file(dstosplit);
Rf_error( "\nBlock size and number of blocks are defined, please define only one option, split by number of blocks or by block size");
return void();
} else if( nblocks > 0 ) {
double module;
if(bycols == true) {
blocksize = icols / blocks;
module = icols % blocksize;
} else {
blocksize = irows / blocks;
module = irows % blocksize;
}
if (module > 0) { blocksize = blocksize + 1; }
}
if(blocksize > 0) {
if( bycols == true ) {
blocks = (icols + blocksize - 1) / blocksize;
incols = blocksize;
} else {
blocks = (irows + blocksize - 1) / blocksize;
inrows = blocksize;
}
}
for ( int i=0; i<blocks; i++)
{
newDatasetName = stroutgroup + "/" + stroutdataset + "." + std::to_string(i);
if( bycols == true) {
kk = i * blocksize;
if( kk + static_cast<hsize_t>(blocksize) > static_cast<hsize_t>(icols)) {
incols = static_cast<hsize_t>(icols) - kk;
}
} else {
ii = i * blocksize;
if( ii + static_cast<hsize_t>(blocksize) > static_cast<hsize_t>(irows)) {
inrows = static_cast<hsize_t>(irows) - ii;
}
}
std::vector<double> vdts( inrows * incols );
dstosplit->readDatasetBlock( {ii, kk}, {inrows, incols}, stride, block, vdts.data() );
dsOut = new BigDataStatMeth::hdf5Dataset(dstosplit->getFileName(), newDatasetName, true);
dsOut->createDataset( incols, inrows, "real");
if( dsOut->getDatasetptr() != nullptr ){
dsOut->writeDataset(vdts.data());
}
delete dsOut; dsOut = nullptr;
}
} catch( H5::FileIException& error ) {
checkClose_file(dstosplit, dsOut);
Rf_error( "c++ exception RcppSplit_matrix_hdf5_internal(File IException )");
} catch( H5::DataSetIException& error ) {
checkClose_file(dstosplit, dsOut);
Rf_error( "c++ exception RcppSplit_matrix_hdf5_internal (DataSet IException )");
} catch( H5::DataSpaceIException& error ) {
checkClose_file(dstosplit, dsOut);
Rf_error( "c++ exception RcppSplit_matrix_hdf5_internal (DataSpace IException )");
} catch(std::exception &ex) {
checkClose_file(dstosplit, dsOut);
Rf_error( "\nC++ exception RcppSplit_matrix_hdf5_internal : %s",ex.what());
} catch (...) {
checkClose_file(dstosplit, dsOut);
Rf_error( "\nC++ exception RcppSplit_matrix_hdf5_internal (unknown reason)");
}
return void();
}7 Usage Example
#include "BigDataStatMeth.hpp"
// Example usage
auto result = RcppSplit_matrix_hdf5_internal(...);