RcppbdCorr_hdf5_cross
C++ Function Reference
1 Signature
Rcpp::List BigDataStatMeth::RcppbdCorr_hdf5_cross(const std::string &filename_a, const std::string &strsubgroup_a, const std::string &strdataset_a, const std::string &filename_b, const std::string &strsubgroup_b, const std::string &strdataset_b, const std::string &method, bool use_complete_obs, bool compute_pvalues, int block_size, bool bforce, const std::string &output_filename, const Rcpp::Nullable< Rcpp::CharacterVector > &output_group, const Rcpp::Nullable< Rcpp::CharacterVector > &output_dataset_corr, const Rcpp::Nullable< Rcpp::CharacterVector > &output_dataset_pval, bool trans_x, bool trans_y, const Rcpp::Nullable< int > &threads)2 Description
Implementation of cross-matrix correlation computation for HDF5 matrices - OPTIMIZED.
3 Parameters
filename_a(const std::string &): Path to HDF5 file containing first matrixstrsubgroup_a(const std::string &): Group path for first matrixstrdataset_a(const std::string &): Dataset name for first matrixfilename_b(const std::string &): Path to HDF5 file containing second matrixstrsubgroup_b(const std::string &): Group path for second matrixstrdataset_b(const std::string &): Dataset name for second matrixmethod(const std::string &): Correlation method (“pearson” or “spearman”)use_complete_obs(bool): Whether to use only complete observationscompute_pvalues(bool): Whether to compute p-valuesblock_size(int): Block size for processingbforce(bool): Whether to overwrite existing resultsoutput_filename(const std::string &): Output HDF5 file pathoutput_group(const Rcpp::Nullable< Rcpp::CharacterVector > &): Custom output group name (nullable)output_dataset_corr(const Rcpp::Nullable< Rcpp::CharacterVector > &): Custom correlation dataset name (nullable)output_dataset_pval(const Rcpp::Nullable< Rcpp::CharacterVector > &): Custom p-values dataset name (nullable)trans_x(bool): Whether to transpose first matrixtrans_y(bool): Whether to transpose second matrixthreads(const Rcpp::Nullable< int > &): Number of threads (nullable)
4 Returns
Rcpp::List containing dataset locations and computation metadata
5 Details
Optimized implementation that computes cross-correlation matrix between two HDF5 datasets. Uses intelligent method selection and efficient memory management for optimal performance.
6 Call Graph
7 Source Code
NoteImplementation
File: inst/include/hdf5Algebra/matrixCorrelation.hpp • Lines 1859-2053
inline Rcpp::List RcppbdCorr_hdf5_cross(const std::string& filename_a,
const std::string& strsubgroup_a,
const std::string& strdataset_a,
const std::string& filename_b,
const std::string& strsubgroup_b,
const std::string& strdataset_b,
const std::string& method,
bool use_complete_obs,
bool compute_pvalues,
int block_size,
bool bforce,
const std::string& output_filename,
const Rcpp::Nullable<Rcpp::CharacterVector>& output_group,
const Rcpp::Nullable<Rcpp::CharacterVector>& output_dataset_corr,
const Rcpp::Nullable<Rcpp::CharacterVector>& output_dataset_pval,
bool trans_x,
bool trans_y,
const Rcpp::Nullable<int>& threads) {
BigDataStatMeth::hdf5Dataset* dsA = nullptr;
BigDataStatMeth::hdf5Dataset* dsB = nullptr;
BigDataStatMeth::hdf5Dataset* dsCorr = nullptr;
BigDataStatMeth::hdf5Dataset* dsPval = nullptr;
try {
// std::vector<hsize_t> stride = {1, 1}, block = {1, 1}, offset = {0, 0};
// Open input datasets
dsA = new BigDataStatMeth::hdf5Dataset(filename_a, strsubgroup_a, strdataset_a, false);
dsA->openDataset();
if (dsA->getDatasetptr() == nullptr) {
checkClose_file(dsA);
throw std::runtime_error("Failed to open first input dataset");
}
dsB = new BigDataStatMeth::hdf5Dataset(filename_b, strsubgroup_b, strdataset_b, false);
dsB->openDataset();
if (dsB->getDatasetptr() == nullptr) {
checkClose_file(dsA, dsB);
throw std::runtime_error("Failed to open second input dataset");
}
// Get original dimensions
hsize_t n_rows_a_orig = dsA->nrows();
hsize_t n_cols_a_orig = dsA->ncols();
hsize_t n_rows_b_orig = dsB->nrows();
hsize_t n_cols_b_orig = dsB->ncols();
// Determine effective dimensions after transposition
hsize_t n_rows_a = trans_x ? n_cols_a_orig : n_rows_a_orig;
hsize_t n_cols_a = trans_x ? n_rows_a_orig : n_cols_a_orig;
hsize_t n_rows_b = trans_y ? n_cols_b_orig : n_rows_b_orig;
hsize_t n_cols_b = trans_y ? n_rows_b_orig : n_cols_b_orig;
// Validate dimensions
if (n_rows_a != n_rows_b) {
checkClose_file(dsA, dsB);
throw std::runtime_error("Matrices must have same number of observations after transposition");
}
// Determine output group and dataset names
std::string stroutgroup;
std::string corr_dataset_name;
std::string pval_dataset_name;
if (output_group.isNull()) {
std::string trans_suffix = "";
if (trans_x && !trans_y) trans_suffix = "_TX";
else if (!trans_x && trans_y) trans_suffix = "_TY";
else if (trans_x && trans_y) trans_suffix = "_TXTY";
stroutgroup = "CORR" + trans_suffix + "/" + strdataset_a + "_vs_" + strdataset_b;
} else {
Rcpp::CharacterVector group_vec = Rcpp::as<Rcpp::CharacterVector>(output_group);
stroutgroup = Rcpp::as<std::string>(group_vec[0]);
}
if (output_dataset_corr.isNull()) {
corr_dataset_name = "correlation";
} else {
Rcpp::CharacterVector corr_vec = Rcpp::as<Rcpp::CharacterVector>(output_dataset_corr);
corr_dataset_name = Rcpp::as<std::string>(corr_vec[0]);
}
if (output_dataset_pval.isNull()) {
pval_dataset_name = "pvalues";
} else {
Rcpp::CharacterVector pval_vec = Rcpp::as<Rcpp::CharacterVector>(output_dataset_pval);
pval_dataset_name = Rcpp::as<std::string>(pval_vec[0]);
}
// Create output datasets
try {
dsCorr = new BigDataStatMeth::hdf5Dataset(output_filename, stroutgroup, corr_dataset_name, bforce);
if (compute_pvalues) {
dsPval = new BigDataStatMeth::hdf5Dataset(output_filename, stroutgroup, pval_dataset_name, bforce);
}
} catch (const std::exception& e) {
Rcpp::Rcerr << "Error creating output datasets: " << e.what() << std::endl;
checkClose_file(dsA, dsB, dsCorr, dsPval);
return R_NilValue;
}
// Automatic method selection based on total matrix size
const hsize_t DIRECT_COMPUTATION_THRESHOLD = MAXELEMSINBLOCK / 4;
hsize_t total_elements = std::max(n_rows_a_orig * n_cols_a_orig, n_rows_b_orig * n_cols_b_orig);
if (total_elements < DIRECT_COMPUTATION_THRESHOLD) {
// Direct computation for smaller matrices
std::vector<double> vdA(n_rows_a_orig * n_cols_a_orig);
std::vector<double> vdB(n_rows_b_orig * n_cols_b_orig);
std::vector<hsize_t> stride = {1, 1}, block = {1, 1};
dsA->readDatasetBlock({0, 0}, {n_rows_a_orig, n_cols_a_orig}, stride, block, vdA.data());
dsB->readDatasetBlock({0, 0}, {n_rows_b_orig, n_cols_b_orig}, stride, block, vdB.data());
Eigen::MatrixXd X = Eigen::Map<Eigen::Matrix<double, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor>>(
vdA.data(), n_rows_a_orig, n_cols_a_orig);
Eigen::MatrixXd Y = Eigen::Map<Eigen::Matrix<double, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor>>(
vdB.data(), n_rows_b_orig, n_cols_b_orig);
corr_result result = RcppbdCorr_matrix_cross(X, Y, method, use_complete_obs, compute_pvalues, trans_x, trans_y, threads);
if (!result.bcomputed) {
checkClose_file(dsA, dsB, dsCorr, dsPval);
throw std::runtime_error("Cross-correlation computation failed");
}
// Write results
if (dsCorr->getDatasetptr() == nullptr) {
dsCorr->createDataset(result.correlation_matrix.rows(), result.correlation_matrix.cols(), "real");
}
dsCorr->writeDataset(Rcpp::wrap(result.correlation_matrix));
if (compute_pvalues && result.has_pvalues && dsPval && result.pvalues.rows() > 0 && result.pvalues.cols() > 0) {
if (dsPval->getDatasetptr() == nullptr) {
dsPval->createDataset(result.pvalues.rows(), result.pvalues.cols(), "real");
}
dsPval->writeDataset(Rcpp::wrap(result.pvalues));
}
} else {
// Block-wise computation for large matrices
if (dsA->getDatasetptr() != nullptr && dsB->getDatasetptr() != nullptr) {
RcppbdCorr_hdf5_Block_cross(dsA, dsB, dsCorr, dsPval, method, use_complete_obs,
compute_pvalues, block_size, trans_x, trans_y, threads);
}
}
// Clean up datasets
delete dsA; dsA = nullptr;
delete dsB; dsB = nullptr;
delete dsCorr; dsCorr = nullptr;
delete dsPval; dsPval = nullptr;
// Return comprehensive result list
return Rcpp::List::create(
Rcpp::Named("filename") = output_filename,
Rcpp::Named("group") = stroutgroup,
Rcpp::Named("correlation") = corr_dataset_name,
Rcpp::Named("method") = method,
Rcpp::Named("correlation_type") = "cross",
Rcpp::Named("trans_x") = trans_x,
Rcpp::Named("trans_y") = trans_y,
Rcpp::Named("n_variables_x") = (int)n_cols_a,
Rcpp::Named("n_variables_y") = (int)n_cols_b,
Rcpp::Named("n_observations") = (int)n_rows_a,
Rcpp::Named("pvalues") = compute_pvalues ? pval_dataset_name : "",
Rcpp::Named("has_pvalues") = compute_pvalues
);
} catch(H5::FileIException& error) {
checkClose_file(dsA, dsB, dsCorr, dsPval);
Rcpp::Rcerr << "\nC++ exception RcppbdCorr_hdf5_cross (File IException): " << error.getDetailMsg() << std::endl;
return R_NilValue;
} catch(H5::DataSetIException& error) {
checkClose_file(dsA, dsB, dsCorr, dsPval);
Rcpp::Rcerr << "\nC++ exception RcppbdCorr_hdf5_cross (DataSet IException): " << error.getDetailMsg() << std::endl;
return R_NilValue;
} catch(std::exception &ex) {
checkClose_file(dsA, dsB, dsCorr, dsPval);
Rcpp::Rcerr << "C++ exception RcppbdCorr_hdf5_cross: " << ex.what() << std::endl;
return R_NilValue;
} catch (...) {
checkClose_file(dsA, dsB, dsCorr, dsPval);
Rcpp::Rcerr << "\nC++ exception RcppbdCorr_hdf5_cross (unknown reason)" << std::endl;
return R_NilValue;
}
}8 Usage Example
#include "BigDataStatMeth.hpp"
// Example usage
auto result = RcppbdCorr_hdf5_cross(...);