RcppbdCorr_matrix_single
C++ Function Reference
1 Signature
corr_result BigDataStatMeth::RcppbdCorr_matrix_single(Eigen::MatrixXd &X, const std::string &method="pearson", bool use_complete_obs=true, bool compute_pvalues=false, bool trans_x=false, Rcpp::Nullable< int > threads=R_NilValue)2 Parameters
X(Eigen::MatrixXd &)method(const std::string &)use_complete_obs(bool)compute_pvalues(bool)trans_x(bool)threads(Rcpp::Nullable< int >)
3 Returns
Type: corr_result
4 Call Graph
5 Source Code
NoteImplementation
File: inst/include/hdf5Algebra/matrixCorrelation.hpp • Lines 728-943
inline corr_result RcppbdCorr_matrix_single(Eigen::MatrixXd& X,
const std::string& method = "pearson",
bool use_complete_obs = true,
bool compute_pvalues = false,
bool trans_x = false,
Rcpp::Nullable<int> threads = R_NilValue) {
corr_result result;
result.trans_x = trans_x;
try {
Eigen::MatrixXd Xt;
// Determine dimensions after potential transposition
int n_obs, n_vars;
if (trans_x) {
n_obs = X.cols(); // Original variables become observations
n_vars = X.rows(); // Original observations become variables
} else {
n_obs = X.rows(); // Standard: rows are observations
n_vars = X.cols(); // Standard: columns are variables
}
result.correlation_matrix = Eigen::MatrixXd::Identity(n_vars, n_vars);
result.method = method;
result.n_obs = n_obs;
result.n_vars_x = n_vars;
result.n_vars_y = n_vars;
result.has_pvalues = compute_pvalues; // Disabled for performance
if (method == "pearson") {
// Skip expensive allFinite check if user says use_complete_obs=false
bool has_missing = use_complete_obs ? !X.allFinite() : false;
if (!has_missing) {
if (trans_x) {
// For transpose case, work with rows (correlate observations)
Eigen::MatrixXd X_rows_centered = X.colwise() - X.rowwise().mean();
Eigen::MatrixXd cov = (X_rows_centered * X_rows_centered.transpose()) / (n_obs - 1);
// Convert covariance to correlation
Eigen::VectorXd inv_std_devs = cov.diagonal().array().sqrt().inverse();
// Handle near-zero standard deviations
for (int i = 0; i < n_vars; ++i) {
if (cov(i, i) <= 1e-28) { // std_dev^2 threshold
inv_std_devs(i) = 0.0;
}
}
// Vectorized correlation: R = D^(-1) * Cov * D^(-1)
result.correlation_matrix = inv_std_devs.asDiagonal() * cov * inv_std_devs.asDiagonal();
} else {
// Original path for normal case (correlate variables)
Eigen::MatrixXd X_centered = X.rowwise() - X.colwise().mean();
Eigen::MatrixXd cov = (X_centered.transpose() * X_centered) / (n_obs - 1);
// Convert covariance to correlation
Eigen::VectorXd inv_std_devs = cov.diagonal().array().sqrt().inverse();
// Handle near-zero standard deviations
for (int i = 0; i < n_vars; ++i) {
if (cov(i, i) <= 1e-28) { // std_dev^2 threshold
inv_std_devs(i) = 0.0;
}
}
// Vectorized correlation: R = D^(-1) * Cov * D^(-1)
result.correlation_matrix = inv_std_devs.asDiagonal() * cov * inv_std_devs.asDiagonal();
}
// Set diagonal to 1.0 and fix rows/cols with zero std dev
result.correlation_matrix.diagonal().setOnes();
for (int i = 0; i < n_vars; ++i) {
Eigen::VectorXd inv_std_devs = result.correlation_matrix.diagonal().array().sqrt().inverse();
if (inv_std_devs(i) == 0.0) {
result.correlation_matrix.row(i).setZero();
result.correlation_matrix.col(i).setZero();
result.correlation_matrix(i, i) = 1.0;
}
}
// Compute p-values using optimized vectorized function
if (compute_pvalues) {
result.pvalues = compute_pvalues_optimized(result.correlation_matrix, n_obs, true);
}
result.bcomputed = true;
return result;
}
}
if (n_vars <= 500) {
// Direct approach - faster than blocks for small/medium matrices
#pragma omp parallel for schedule(dynamic) if(n_vars > 100)
for (int i = 0; i < n_vars; ++i) {
for (int j = i + 1; j < n_vars; ++j) {
// OPTIMIZED: Intelligent access pattern - no matrix copy for transpose
Eigen::VectorXd vec_i, vec_j;
if (trans_x) {
vec_i = X.row(i); // Row access for transposed case
vec_j = X.row(j);
} else {
vec_i = X.col(i); // Column access for normal case
vec_j = X.col(j);
}
double corr_val;
if (method == "spearman") {
corr_val = spearman_correlation(vec_i, vec_j, use_complete_obs);
} else {
corr_val = pearson_correlation(vec_i, vec_j, use_complete_obs);
}
result.correlation_matrix(i, j) = corr_val;
result.correlation_matrix(j, i) = corr_val;
}
}
// Compute p-values using optimized vectorized function
if (compute_pvalues) {
result.pvalues = compute_pvalues_optimized(result.correlation_matrix, n_obs, true);
}
result.bcomputed = true;
return result;
}
// FALLBACK: Block-wise computation for large matrices (n_vars > 500)
int block_size = std::min(128, std::max(32, n_vars / 8));
int num_blocks = (n_vars + block_size - 1) / block_size;
#pragma omp parallel for schedule(dynamic) if(n_vars > 1000)
for (int i_block = 0; i_block < num_blocks; ++i_block) {
for (int j_block = i_block; j_block < num_blocks; ++j_block) {
int i_start = i_block * block_size;
int i_end = std::min(i_start + block_size, n_vars);
int i_size = i_end - i_start;
int j_start = j_block * block_size;
int j_end = std::min(j_start + block_size, n_vars);
int j_size = j_end - j_start;
if (i_block == j_block) {
// Diagonal block - compute upper triangle only
for (int i = 0; i < i_size; ++i) {
for (int j = i + 1; j < j_size; ++j) {
// OPTIMIZED: Direct access with transpose-aware pattern (no middleCols copies)
Eigen::VectorXd vec_i, vec_j;
if (trans_x) {
vec_i = X.row(i_start + i); // Row access for transpose
vec_j = X.row(j_start + j);
} else {
vec_i = X.col(i_start + i); // Column access for normal
vec_j = X.col(j_start + j);
}
double corr_val;
if (method == "spearman") {
corr_val = spearman_correlation(vec_i, vec_j, use_complete_obs);
} else {
corr_val = pearson_correlation(vec_i, vec_j, use_complete_obs);
}
result.correlation_matrix(i_start + i, j_start + j) = corr_val;
result.correlation_matrix(j_start + j, i_start + i) = corr_val;
}
}
} else {
// Off-diagonal block - compute all pairs
for (int i = 0; i < i_size; ++i) {
for (int j = 0; j < j_size; ++j) {
// OPTIMIZED: Direct access with transpose-aware pattern (no middleCols copies)
Eigen::VectorXd vec_i, vec_j;
if (trans_x) {
vec_i = X.row(i_start + i); // Row access for transpose
vec_j = X.row(j_start + j);
} else {
vec_i = X.col(i_start + i); // Column access for normal
vec_j = X.col(j_start + j);
}
double corr_val;
if (method == "spearman") {
corr_val = spearman_correlation(vec_i, vec_j, use_complete_obs);
} else {
corr_val = pearson_correlation(vec_i, vec_j, use_complete_obs);
}
result.correlation_matrix(i_start + i, j_start + j) = corr_val;
result.correlation_matrix(j_start + j, i_start + i) = corr_val;
}
}
}
}
}
// Compute p-values using optimized vectorized function
if (compute_pvalues) {
result.pvalues = compute_pvalues_optimized(result.correlation_matrix, n_obs, true);
}
result.bcomputed = true;
} catch (std::exception &ex) {
Rcpp::Rcerr << "C++ exception RcppbdCorr_matrix_single: " << ex.what() << std::endl;
result.bcomputed = false;
}
return result;
}6 Usage Example
#include "BigDataStatMeth.hpp"
// Example usage
auto result = RcppbdCorr_matrix_single(...);