Rcpp_block_matrix_mul_parallel

C++ Function Reference

1 Signature

Eigen::MatrixXd BigDataStatMeth::Rcpp_block_matrix_mul_parallel(T X, U Y, bool transpX, bool transpY, Rcpp::Nullable< int > iblock_size, Rcpp::Nullable< int > threads=R_NilValue)

2 Description

Parallel block-based matrix multiplication.

3 Parameters

  • X (T): First input matrix
  • Y (U): Second input matrix
  • transpX (bool): Whether to transpose X before multiplication
  • transpY (bool): Whether to transpose Y before multiplication
  • iblock_size (Rcpp::Nullable< int >): Block size for computation
  • threads (Rcpp::Nullable< int >): Number of threads for parallel processing

4 Returns

Result of matrix multiplication

5 Details

Implements parallel block-based matrix multiplication with configurable block sizes and thread count. This function:Supports matrix transposition before multiplicationUses OpenMP for parallel processingImplements cache-friendly block-based algorithmHandles edge cases for non-uniform block sizes

6 Call Graph

Function dependencies

7 Source Code

File: inst/include/memAlgebra/memMultiplication.hppLines 212-298

inline Eigen::MatrixXd Rcpp_block_matrix_mul_parallel( T X, U Y, 
                                                                  bool transpX,
                                                                  bool transpY,
                                                                  Rcpp::Nullable<int>  iblock_size, 
                                                                  Rcpp::Nullable<int> threads  = R_NilValue)
    {
        
        Eigen::MatrixXd C;
        try{
            static_assert(std::is_same<T, Eigen::MatrixXd >::value || 
                          std::is_same<T, Eigen::Map< Eigen::MatrixXd >>::value || 
                          std::is_same<T, Eigen::Map< Eigen::Matrix<double, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor>> >::value || 
                          std::is_same<T, Eigen::Map< Eigen::Matrix<double, Eigen::Dynamic, Eigen::Dynamic, Eigen::ColMajor>> >::value,
                          "Error - type not allowed");
            
            static_assert(std::is_same<U, Eigen::MatrixXd >::value || 
                          std::is_same<U, Eigen::Map< Eigen::MatrixXd >>::value || 
                          std::is_same<U, Eigen::Map< Eigen::Matrix<double, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor>> >::value || 
                          std::is_same<U, Eigen::Map< Eigen::Matrix<double, Eigen::Dynamic, Eigen::Dynamic, Eigen::ColMajor>> >::value,
                          "Error - type not allowed");
            
            Eigen::MatrixXd A = X,
                B = Y;
            
            if(transpX == true){ A = X.transpose(); }
            if(transpY == true){ B = Y.transpose(); }
            
            // int chunks;//, tid;
            hsize_t block_size;
            
            std::vector<hsize_t> vsizetoReadN, vstartN,
            vsizetoReadM, vstartM,
            vsizetoReadK, vstartK;
            
            // unsigned int ithreads;
            hsize_t M = A.rows();
            hsize_t K = A.cols();
            hsize_t N = B.cols();
            
            if( iblock_size.isNotNull()) {
                block_size =  Rcpp::as<int>(iblock_size);  
            } else {
                block_size =  MAXBLOCKSIZE/3;  
            }
            
            C = Eigen::MatrixXd::Zero(M,N) ;
            if(block_size > std::min( N, std::min(M,K)) )
                block_size = std::min( N, std::min(M,K)); 
            
            getBlockPositionsSizes( N, block_size, vstartN, vsizetoReadN );
            getBlockPositionsSizes( M, block_size, vstartM, vsizetoReadM );
            getBlockPositionsSizes( K, block_size, vstartK, vsizetoReadK );
            
            // CAMBIO 1: Eliminar collapse(2) para compatibilidad
            const int total_blocks = static_cast<int>(vstartM.size() * vstartN.size());
            
            #pragma omp parallel num_threads( get_number_threads(threads, R_NilValue) ) shared(A, B, C)
            {
                // CAMBIO 2: Usar bucle plano en lugar de collapse(2)
            #pragma omp for schedule(dynamic)
                for (int block_idx = 0; block_idx < total_blocks; block_idx++)
                {
                    // Convertir índice plano a coordenadas (ii, jj)
                    const int ii = block_idx / static_cast<int>(vstartN.size());
                    const int jj = block_idx % static_cast<int>(vstartN.size());
                    
                    // CAMBIO 3: Buffer local para evitar race condition (CLAVE para eliminar NaN)
                    Eigen::MatrixXd C_local = Eigen::MatrixXd::Zero(vsizetoReadM[ii], vsizetoReadN[jj]);
                    
                    // CAMBIO 4: Bucle k secuencial dentro del hilo (elimina race condition)
                    for(int kk = 0; kk < static_cast<int>(vstartK.size()); kk++)
                    {
                        // Acumulamos en buffer local (sin race condition)
                        C_local += (A.block(vstartM[ii], vstartK[kk], vsizetoReadM[ii], vsizetoReadK[kk]) * 
                            B.block(vstartK[kk], vstartN[jj], vsizetoReadK[kk], vsizetoReadN[jj]));
                    }
                    
                    // CAMBIO 5: Una sola escritura al final (sin race condition)
                    C.block(vstartM[ii], vstartN[jj], vsizetoReadM[ii], vsizetoReadN[jj]) = C_local;
                }
            }
        } catch(std::exception& ex) {
            Rcpp::Rcout<< "c++ exception Rcpp_block_matrix_mul_parallel: "<<ex.what()<< " \n";
        }
        
        return(C);
    }

8 Usage Example

#include "BigDataStatMeth.hpp"

// Example usage
auto result = Rcpp_block_matrix_mul_parallel(...);