RcppSplit_matrix_hdf5
C++ Function Reference
1 Signature
void BigDataStatMeth::RcppSplit_matrix_hdf5(BigDataStatMeth::hdf5Dataset *dstosplit, bool bycols, std::string stroutgroup, std::string stroutdataset, int blocksize, int irows, int icols)2 Description
Splits an HDF5 dataset into multiple smaller datasets (R interface)
3 Parameters
dstosplit(BigDataStatMeth::hdf5Dataset *): Input dataset to splitbycols(bool): Whether to split by columns (true) or rows (false)stroutgroup(std::string): Output group pathstroutdataset(std::string): Base name for output datasetsblocksize(int): Size of each blockirows(int): Number of rows in input dataseticols(int): Number of columns in input dataset
4 Details
dstosplitInput dataset to split bycolsWhether to split by columns (true) or rows (false) stroutgroupOutput group path stroutdatasetBase name for output datasets blocksizeSize of each block irowsNumber of rows in input dataset icolsNumber of columns in input dataset Implementation approach:Calculates number of blocks based on input dimensionsProcesses dataset in blocks:Reads block from input datasetCreates new dataset for blockWrites block to new dataset Handles edge cases for final blocks
5 Call Graph
6 Source Code
NoteImplementation
File: inst/include/hdf5Utilities/hdf5SplitDataset.hpp • Lines 65-140
inline void RcppSplit_matrix_hdf5 ( BigDataStatMeth::hdf5Dataset* dstosplit, bool bycols,
std::string stroutgroup, std::string stroutdataset,
int blocksize, int irows, int icols )
{
BigDataStatMeth::hdf5Dataset* dsOut = nullptr;
try {
int blocks;
hsize_t inrows = irows,
incols = icols,
ii = 0,
kk = 0;
std::vector<hsize_t> stride = {1, 1},
block = {1, 1};
std::string newDatasetName = "";
if( bycols == true ) {
blocks = (icols + blocksize - 1) / blocksize;
incols = blocksize;
} else {
blocks = (irows + blocksize - 1) / blocksize;
inrows = blocksize;
}
for ( int i=0; i<blocks; i++)
{
newDatasetName = stroutgroup + "/" + stroutdataset + "." + std::to_string(i);
if( bycols == true) {
kk = i * blocksize;
if( kk + static_cast<hsize_t>(blocksize) > static_cast<hsize_t>(icols))
{ incols = static_cast<hsize_t>(icols) - kk; }
} else {
ii = i * blocksize;
if( ii + static_cast<hsize_t>(blocksize) > static_cast<hsize_t>(irows))
{ inrows = static_cast<hsize_t>(irows) - ii; }
}
std::vector<double> vdts( inrows * incols );
dstosplit->readDatasetBlock( {kk, ii}, {incols, inrows}, stride, block, vdts.data() );
dsOut = new BigDataStatMeth::hdf5Dataset(dstosplit->getFileName(), newDatasetName, true);
dsOut->createDataset( inrows, incols, "real");
if( dsOut->getDatasetptr() != nullptr ){
dsOut->writeDataset(vdts.data());
}
delete dsOut; dsOut = nullptr;
}
} catch( H5::FileIException& error ) {
checkClose_file(dstosplit, dsOut);
Rf_error( "c++ exception RcppSplit_matrix_hdf5(File IException )");
} catch( H5::DataSetIException& error ) {
checkClose_file(dstosplit, dsOut);
Rf_error( "c++ exception RcppSplit_matrix_hdf5 (DataSet IException )");
} catch( H5::DataSpaceIException& error ) {
checkClose_file(dstosplit, dsOut);
Rf_error( "c++ exception RcppSplit_matrix_hdf5 (DataSpace IException )");
} catch(std::exception &ex) {
checkClose_file(dstosplit, dsOut);
Rf_error( "\nC++ exception RcppSplit_matrix_hdf5 : %s", ex.what());
} catch (...) {
checkClose_file(dstosplit, dsOut);
Rf_error( "\nC++ exception RcppSplit_matrix_hdf5 (unknown reason)");
}
return void();
}7 Usage Example
#include "BigDataStatMeth.hpp"
// Example usage
auto result = RcppSplit_matrix_hdf5(...);