RcppbdCorr_hdf5_single
C++ Function Reference
1 Signature
Rcpp::List BigDataStatMeth::RcppbdCorr_hdf5_single(const std::string &filename, const std::string &strsubgroup, const std::string &strdataset, const std::string &method, bool use_complete_obs, bool compute_pvalues, int block_size, bool bforce, const Rcpp::Nullable< Rcpp::CharacterVector > &output_group, const Rcpp::Nullable< Rcpp::CharacterVector > &output_dataset_corr, const Rcpp::Nullable< Rcpp::CharacterVector > &output_dataset_pval, bool trans_x, const Rcpp::Nullable< int > &threads)2 Description
Implementation of single matrix correlation computation for HDF5 matrices - OPTIMIZED.
3 Parameters
filename(const std::string &): Path to HDF5 file containing the matrixstrsubgroup(const std::string &): Group path within HDF5 filestrdataset(const std::string &): Dataset name within the groupmethod(const std::string &): Correlation method (“pearson” or “spearman”)use_complete_obs(bool): Whether to use only complete observationscompute_pvalues(bool): Whether to compute p-values (performance impact)block_size(int): Block size for large matrix processing (default: 1000)bforce(bool): Whether to overwrite existing resultsoutput_group(const Rcpp::Nullable< Rcpp::CharacterVector > &): Custom output group name (nullable)output_dataset_corr(const Rcpp::Nullable< Rcpp::CharacterVector > &): Custom correlation dataset name (nullable)output_dataset_pval(const Rcpp::Nullable< Rcpp::CharacterVector > &): Custom p-values dataset name (nullable)trans_x(bool): Whether to transpose the matrix (samples vs variables correlation)threads(const Rcpp::Nullable< int > &): Number of threads for parallel computation (nullable)
4 Returns
Rcpp::List containing dataset locations and computation metadata
5 Details
Optimized implementation that computes correlation matrix for a single HDF5 dataset. Uses intelligent method selection between direct computation and block-wise processing based on matrix size and available memory.
6 Call Graph
7 Source Code
NoteImplementation
File: inst/include/hdf5Algebra/matrixCorrelation.hpp • Lines 1403-1562
inline Rcpp::List RcppbdCorr_hdf5_single(const std::string& filename,
const std::string& strsubgroup,
const std::string& strdataset,
const std::string& method,
bool use_complete_obs,
bool compute_pvalues,
int block_size,
bool bforce,
const Rcpp::Nullable<Rcpp::CharacterVector>& output_group,
const Rcpp::Nullable<Rcpp::CharacterVector>& output_dataset_corr,
const Rcpp::Nullable<Rcpp::CharacterVector>& output_dataset_pval,
bool trans_x,
const Rcpp::Nullable<int>& threads) {
BigDataStatMeth::hdf5Dataset* dsA = nullptr;
BigDataStatMeth::hdf5Dataset* dsCorr = nullptr;
BigDataStatMeth::hdf5Dataset* dsPval = nullptr;
try {
std::vector<hsize_t> stride = {1, 1}, block = {1, 1}, offset = {0, 0}, count = {0, 0};
// Open input dataset
dsA = new BigDataStatMeth::hdf5Dataset(filename, strsubgroup, strdataset, false);
dsA->openDataset();
if (dsA->getDatasetptr() == nullptr) {
checkClose_file(dsA);
throw std::runtime_error("Failed to open input dataset");
}
// Determine output group and dataset names
std::string stroutgroup;
std::string corr_dataset_name;
std::string pval_dataset_name;
if (output_group.isNull()) {
stroutgroup = "CORR/" + strdataset;
} else {
Rcpp::CharacterVector group_vec = Rcpp::as<Rcpp::CharacterVector>(output_group);
stroutgroup = Rcpp::as<std::string>(group_vec[0]);
}
if (output_dataset_corr.isNull()) {
corr_dataset_name = "correlation";
} else {
Rcpp::CharacterVector corr_vec = Rcpp::as<Rcpp::CharacterVector>(output_dataset_corr);
corr_dataset_name = Rcpp::as<std::string>(corr_vec[0]);
}
if (output_dataset_pval.isNull()) {
pval_dataset_name = "pvalues";
} else {
Rcpp::CharacterVector pval_vec = Rcpp::as<Rcpp::CharacterVector>(output_dataset_pval);
pval_dataset_name = Rcpp::as<std::string>(pval_vec[0]);
}
// Get matrix dimensions
hsize_t n_rows_orig = dsA->nrows();
hsize_t n_cols_orig = dsA->ncols();
// Effective dimensions after transposition
hsize_t n_rows = trans_x ? n_cols_orig : n_rows_orig;
hsize_t n_cols = trans_x ? n_rows_orig : n_cols_orig;
count = {n_rows_orig, n_cols_orig};
// Create output datasets
try {
dsCorr = new BigDataStatMeth::hdf5Dataset(filename, stroutgroup, corr_dataset_name, bforce);
if (compute_pvalues) {
dsPval = new BigDataStatMeth::hdf5Dataset(filename, stroutgroup, pval_dataset_name, bforce);
}
} catch (const std::exception& e) {
Rcpp::Rcerr << "Error creating output datasets: " << e.what() << std::endl;
checkClose_file(dsA, dsCorr, dsPval);
return R_NilValue;
}
// Automatic method selection based on matrix size
const hsize_t DIRECT_COMPUTATION_THRESHOLD = MAXELEMSINBLOCK / 4;
if (n_rows * n_cols < DIRECT_COMPUTATION_THRESHOLD) {
// Direct computation for small matrices
std::vector<double> vdA(count[0] * count[1]);
dsA->readDatasetBlock({offset[0], offset[1]}, {count[0], count[1]}, stride, block, vdA.data());
// Convert to Eigen matrix (row-major)
Eigen::MatrixXd X = Eigen::Map<Eigen::Matrix<double, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor>>(
vdA.data(), count[0], count[1]);
// Compute correlation
corr_result result = RcppbdCorr_matrix_single(X, method, use_complete_obs, compute_pvalues, trans_x, threads);
if (!result.bcomputed) {
checkClose_file(dsA, dsCorr, dsPval);
throw std::runtime_error("Single matrix correlation computation failed");
}
// Write correlation matrix
if (dsCorr->getDatasetptr() == nullptr) {
dsCorr->createDataset(result.correlation_matrix.rows(), result.correlation_matrix.cols(), "real");
}
dsCorr->writeDataset(Rcpp::wrap(result.correlation_matrix));
// Write p-values matrix if computed
if (compute_pvalues && result.has_pvalues && dsPval && result.pvalues.rows() > 0 && result.pvalues.cols() > 0) {
if (dsPval->getDatasetptr() == nullptr) {
dsPval->createDataset(result.pvalues.rows(), result.pvalues.cols(), "real");
}
dsPval->writeDataset(Rcpp::wrap(result.pvalues));
}
} else {
// Block-wise computation for large matrices - CRITICAL path for big-omics
if (dsA->getDatasetptr() != nullptr) {
RcppbdCorr_hdf5_Block_single(dsA, dsCorr, dsPval, method, use_complete_obs,
compute_pvalues, trans_x, block_size, threads);
}
}
// Clean up datasets
delete dsA; dsA = nullptr;
delete dsCorr; dsCorr = nullptr;
delete dsPval; dsPval = nullptr;
// Return comprehensive result list
return Rcpp::List::create(
Rcpp::Named("filename") = filename,
Rcpp::Named("group") = stroutgroup,
Rcpp::Named("correlation") = corr_dataset_name,
Rcpp::Named("method") = method,
Rcpp::Named("correlation_type") = "single",
Rcpp::Named("n_variables") = (int)n_cols,
Rcpp::Named("n_observations") = (int)n_rows,
Rcpp::Named("use_complete_obs") = use_complete_obs,
Rcpp::Named("pvalues") = compute_pvalues ? pval_dataset_name : "",
Rcpp::Named("has_pvalues") = compute_pvalues
);
} catch(H5::FileIException& error) {
checkClose_file(dsA, dsCorr, dsPval);
Rcpp::Rcerr << "\nC++ exception RcppbdCorr_hdf5_single (File IException): " << error.getDetailMsg() << std::endl;
return R_NilValue;
} catch(H5::DataSetIException& error) {
checkClose_file(dsA, dsCorr, dsPval);
Rcpp::Rcerr << "\nC++ exception RcppbdCorr_hdf5_single (DataSet IException): " << error.getDetailMsg() << std::endl;
return R_NilValue;
} catch(std::exception &ex) {
checkClose_file(dsA, dsCorr, dsPval);
Rcpp::Rcerr << "C++ exception RcppbdCorr_hdf5_single: " << ex.what() << std::endl;
return R_NilValue;
} catch (...) {
checkClose_file(dsA, dsCorr, dsPval);
Rcpp::Rcerr << "\nC++ exception RcppbdCorr_hdf5_single (unknown reason)" << std::endl;
return R_NilValue;
}
}8 Usage Example
#include "BigDataStatMeth.hpp"
// Example usage
auto result = RcppbdCorr_hdf5_single(...);