pearson_correlation

C++ Function Reference

1 Signature

double BigDataStatMeth::pearson_correlation(const Eigen::VectorXd &x, const Eigen::VectorXd &y, bool use_complete_obs=true)

2 Parameters

  • x (const Eigen::VectorXd &)
  • y (const Eigen::VectorXd &)
  • use_complete_obs (bool)

3 Returns

Type: double

4 Caller Graph

Function dependencies

5 Source Code

File: inst/include/hdf5Algebra/matrixCorrelation.hppLines 481-542

inline double pearson_correlation(const Eigen::VectorXd& x, 
                                      const Eigen::VectorXd& y,
                                      bool use_complete_obs = true) 
    {
        
        if (x.size() != y.size()) return (std::numeric_limits<double>::quiet_NaN());
        
        int n = x.size();
        int valid_n = n;
        double correlation;
        
        if (use_complete_obs) {
            // Count valid pairs and compute in single pass
            double sum_x = 0.0, sum_y = 0.0, sum_xy = 0.0, sum_x2 = 0.0, sum_y2 = 0.0;
            int valid_n = 0;
            
            for (int i = 0; i < n; ++i) {
                if (std::isfinite(x(i)) && std::isfinite(y(i))) {
                    double xi = x(i), yi = y(i);
                    sum_x += xi;
                    sum_y += yi;
                    sum_xy += xi * yi;
                    sum_x2 += xi * xi;
                    sum_y2 += yi * yi;
                    valid_n++;
                }
            }
            
            if (valid_n < 3) return (std::numeric_limits<double>::quiet_NaN());
            
            double mean_x = sum_x / valid_n;
            double mean_y = sum_y / valid_n;
            
            double numerator = sum_xy - valid_n * mean_x * mean_y;
            double denom_x = sum_x2 - valid_n * mean_x * mean_x;
            double denom_y = sum_y2 - valid_n * mean_y * mean_y;
            
            double denom = std::sqrt(denom_x * denom_y);
            correlation = (denom < 1e-14) ? std::numeric_limits<double>::quiet_NaN() : numerator / denom;
            
        } else {
            // Direct computation assuming no missing values
            if (valid_n < 3) return (std::numeric_limits<double>::quiet_NaN());
            
            double mean_x = x.mean();
            double mean_y = y.mean();
            
            double numerator = 0.0, denom_x = 0.0, denom_y = 0.0;
            for (int i = 0; i < n; ++i) {
                double dx = x(i) - mean_x;
                double dy = y(i) - mean_y;
                numerator += dx * dy;
                denom_x += dx * dx;
                denom_y += dy * dy;
            }
            
            double denom = std::sqrt(denom_x * denom_y);
            correlation =  (denom < 1e-14) ? std::numeric_limits<double>::quiet_NaN() : numerator / denom;
        }
        
        return correlation;
    }

6 Usage Example

#include "BigDataStatMeth.hpp"

// Example usage
auto result = pearson_correlation(...);