RcppbdCorr_matrix_single

C++ Function Reference

1 Signature

corr_result BigDataStatMeth::RcppbdCorr_matrix_single(Eigen::MatrixXd &X, const std::string &method="pearson", bool use_complete_obs=true, bool compute_pvalues=false, bool trans_x=false, Rcpp::Nullable< int > threads=R_NilValue)

2 Parameters

  • X (Eigen::MatrixXd &)
  • method (const std::string &)
  • use_complete_obs (bool)
  • compute_pvalues (bool)
  • trans_x (bool)
  • threads (Rcpp::Nullable< int >)

3 Returns

Type: corr_result

4 Call Graph

Function dependencies

5 Source Code

File: inst/include/hdf5Algebra/matrixCorrelation.hppLines 728-943

inline corr_result RcppbdCorr_matrix_single(Eigen::MatrixXd& X,
                                        const std::string& method = "pearson",
                                        bool use_complete_obs = true,
                                        bool compute_pvalues = false,
                                        bool trans_x = false,
                                        Rcpp::Nullable<int> threads = R_NilValue) {
        
        corr_result result;
        result.trans_x = trans_x;
        
        try {
            
            Eigen::MatrixXd Xt;
            
            // Determine dimensions after potential transposition
            int n_obs, n_vars;
            if (trans_x) {
                n_obs = X.cols();  // Original variables become observations
                n_vars = X.rows(); // Original observations become variables
            } else {
                n_obs = X.rows();  // Standard: rows are observations
                n_vars = X.cols(); // Standard: columns are variables
            }
            
            result.correlation_matrix = Eigen::MatrixXd::Identity(n_vars, n_vars);
            result.method = method;
            result.n_obs = n_obs;
            result.n_vars_x = n_vars;
            result.n_vars_y = n_vars;
            result.has_pvalues = compute_pvalues; // Disabled for performance
            
            
            if (method == "pearson") {
                // Skip expensive allFinite check if user says use_complete_obs=false
                bool has_missing = use_complete_obs ? !X.allFinite() : false;
                
                if (!has_missing) {

                    if (trans_x) {
                        // For transpose case, work with rows (correlate observations)
                        Eigen::MatrixXd X_rows_centered = X.colwise() - X.rowwise().mean();
                        Eigen::MatrixXd cov = (X_rows_centered * X_rows_centered.transpose()) / (n_obs - 1);
                        
                        // Convert covariance to correlation
                        Eigen::VectorXd inv_std_devs = cov.diagonal().array().sqrt().inverse();
                        
                        // Handle near-zero standard deviations
                        for (int i = 0; i < n_vars; ++i) {
                            if (cov(i, i) <= 1e-28) { // std_dev^2 threshold
                                inv_std_devs(i) = 0.0;
                            }
                        }
                        
                        // Vectorized correlation: R = D^(-1) * Cov * D^(-1)
                        result.correlation_matrix = inv_std_devs.asDiagonal() * cov * inv_std_devs.asDiagonal();
                    } else {
                        // Original path for normal case (correlate variables)
                        Eigen::MatrixXd X_centered = X.rowwise() - X.colwise().mean();
                        Eigen::MatrixXd cov = (X_centered.transpose() * X_centered) / (n_obs - 1);
                        
                        // Convert covariance to correlation
                        Eigen::VectorXd inv_std_devs = cov.diagonal().array().sqrt().inverse();
                        
                        // Handle near-zero standard deviations
                        for (int i = 0; i < n_vars; ++i) {
                            if (cov(i, i) <= 1e-28) { // std_dev^2 threshold
                                inv_std_devs(i) = 0.0;
                            }
                        }
                        
                        // Vectorized correlation: R = D^(-1) * Cov * D^(-1)
                        result.correlation_matrix = inv_std_devs.asDiagonal() * cov * inv_std_devs.asDiagonal();
                    }
                    
                    // Set diagonal to 1.0 and fix rows/cols with zero std dev
                    result.correlation_matrix.diagonal().setOnes();
                    for (int i = 0; i < n_vars; ++i) {
                        Eigen::VectorXd inv_std_devs = result.correlation_matrix.diagonal().array().sqrt().inverse();
                        if (inv_std_devs(i) == 0.0) {
                            result.correlation_matrix.row(i).setZero();
                            result.correlation_matrix.col(i).setZero();
                            result.correlation_matrix(i, i) = 1.0;
                        }
                    }
                    
                    // Compute p-values using optimized vectorized function
                    if (compute_pvalues) {
                        result.pvalues = compute_pvalues_optimized(result.correlation_matrix, n_obs, true);
                    }
                    
                    result.bcomputed = true;
                    return result;
                }
            }
            
            if (n_vars <= 500) {
                // Direct approach - faster than blocks for small/medium matrices
                
#pragma omp parallel for schedule(dynamic) if(n_vars > 100)
                for (int i = 0; i < n_vars; ++i) {
                    for (int j = i + 1; j < n_vars; ++j) {
                        // OPTIMIZED: Intelligent access pattern - no matrix copy for transpose
                        Eigen::VectorXd vec_i, vec_j;
                        if (trans_x) {
                            vec_i = X.row(i);  // Row access for transposed case
                            vec_j = X.row(j);
                        } else {
                            vec_i = X.col(i);  // Column access for normal case
                            vec_j = X.col(j);
                        }
                        
                        double corr_val;
                        if (method == "spearman") {
                            corr_val = spearman_correlation(vec_i, vec_j, use_complete_obs);
                        } else {
                            corr_val = pearson_correlation(vec_i, vec_j, use_complete_obs);
                        }
                        
                        result.correlation_matrix(i, j) = corr_val;
                        result.correlation_matrix(j, i) = corr_val;
                    }
                }
                
                // Compute p-values using optimized vectorized function
                if (compute_pvalues) {
                    result.pvalues = compute_pvalues_optimized(result.correlation_matrix, n_obs, true);
                }
                
                result.bcomputed = true;
                return result;
            }
            
            
            // FALLBACK: Block-wise computation for large matrices (n_vars > 500)
                int block_size = std::min(128, std::max(32, n_vars / 8));
            int num_blocks = (n_vars + block_size - 1) / block_size;
            
#pragma omp parallel for schedule(dynamic) if(n_vars > 1000)
            for (int i_block = 0; i_block < num_blocks; ++i_block) {
                for (int j_block = i_block; j_block < num_blocks; ++j_block) {
                    int i_start = i_block * block_size;
                    int i_end = std::min(i_start + block_size, n_vars);
                    int i_size = i_end - i_start;
                    
                    int j_start = j_block * block_size;
                    int j_end = std::min(j_start + block_size, n_vars);
                    int j_size = j_end - j_start;
                    
                    if (i_block == j_block) {
                        // Diagonal block - compute upper triangle only
                        for (int i = 0; i < i_size; ++i) {
                            for (int j = i + 1; j < j_size; ++j) {
                                // OPTIMIZED: Direct access with transpose-aware pattern (no middleCols copies)
                                Eigen::VectorXd vec_i, vec_j;
                                if (trans_x) {
                                    vec_i = X.row(i_start + i);  // Row access for transpose
                                    vec_j = X.row(j_start + j);
                                } else {
                                    vec_i = X.col(i_start + i);  // Column access for normal
                                    vec_j = X.col(j_start + j);
                                }
                                
                                double corr_val;
                                if (method == "spearman") {
                                    corr_val = spearman_correlation(vec_i, vec_j, use_complete_obs);
                                } else {
                                    corr_val = pearson_correlation(vec_i, vec_j, use_complete_obs);
                                }
                                
                                result.correlation_matrix(i_start + i, j_start + j) = corr_val;
                                result.correlation_matrix(j_start + j, i_start + i) = corr_val;
                            }
                        }
                    } else {
                        // Off-diagonal block - compute all pairs
                        for (int i = 0; i < i_size; ++i) {
                            for (int j = 0; j < j_size; ++j) {
                                // OPTIMIZED: Direct access with transpose-aware pattern (no middleCols copies)
                                Eigen::VectorXd vec_i, vec_j;
                                if (trans_x) {
                                    vec_i = X.row(i_start + i);  // Row access for transpose
                                    vec_j = X.row(j_start + j);
                                } else {
                                    vec_i = X.col(i_start + i);  // Column access for normal
                                    vec_j = X.col(j_start + j);
                                }
                                
                                double corr_val;
                                if (method == "spearman") {
                                    corr_val = spearman_correlation(vec_i, vec_j, use_complete_obs);
                                } else {
                                    corr_val = pearson_correlation(vec_i, vec_j, use_complete_obs);
                                }
                                
                                result.correlation_matrix(i_start + i, j_start + j) = corr_val;
                                result.correlation_matrix(j_start + j, i_start + i) = corr_val;
                            }
                        }
                    }
                }
            }
            
            // Compute p-values using optimized vectorized function
            if (compute_pvalues) {
                result.pvalues = compute_pvalues_optimized(result.correlation_matrix, n_obs, true);
            }
            
            result.bcomputed = true;
            
        } catch (std::exception &ex) {
            Rcpp::Rcerr << "C++ exception RcppbdCorr_matrix_single: " << ex.what() << std::endl;
            result.bcomputed = false;
        }
        
        return result;
    }

6 Usage Example

#include "BigDataStatMeth.hpp"

// Example usage
auto result = RcppbdCorr_matrix_single(...);