Rcpp_block_matrix_mul_parallel

C++ Function Reference

1 Signature

Eigen::MatrixXd BigDataStatMeth::Rcpp_block_matrix_mul_parallel(T X, U Y, bool transpX, bool transpY, Rcpp::Nullable< int > iblock_size, Rcpp::Nullable< int > threads=R_NilValue)

2 Description

Parallel block-based matrix multiplication.

3 Parameters

X (T): First input matrix
Y (U): Second input matrix
transpX (bool): Whether to transpose X before multiplication
transpY (bool): Whether to transpose Y before multiplication
iblock_size (Rcpp::Nullable< int >): Block size for computation
threads (Rcpp::Nullable< int >): Number of threads for parallel processing

4 Returns

Result of matrix multiplication

5 Details

Implements parallel block-based matrix multiplication with configurable block sizes and thread count. This function:Supports matrix transposition before multiplicationUses OpenMP for parallel processingImplements cache-friendly block-based algorithmHandles edge cases for non-uniform block sizes

6 Call Graph

7 Source Code

Implementation

File: inst/include/memAlgebra/memMultiplication.hpp • Lines 212-298

inline Eigen::MatrixXd Rcpp_block_matrix_mul_parallel( T X, U Y, 
                                                                  bool transpX,
                                                                  bool transpY,
                                                                  Rcpp::Nullable<int>  iblock_size, 
                                                                  Rcpp::Nullable<int> threads  = R_NilValue)
    {
        
        Eigen::MatrixXd C;
        try{
            static_assert(std::is_same<T, Eigen::MatrixXd >::value || 
                          std::is_same<T, Eigen::Map< Eigen::MatrixXd >>::value || 
                          std::is_same<T, Eigen::Map< Eigen::Matrix<double, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor>> >::value || 
                          std::is_same<T, Eigen::Map< Eigen::Matrix<double, Eigen::Dynamic, Eigen::Dynamic, Eigen::ColMajor>> >::value,
                          "Error - type not allowed");
            
            static_assert(std::is_same<U, Eigen::MatrixXd >::value || 
                          std::is_same<U, Eigen::Map< Eigen::MatrixXd >>::value || 
                          std::is_same<U, Eigen::Map< Eigen::Matrix<double, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor>> >::value || 
                          std::is_same<U, Eigen::Map< Eigen::Matrix<double, Eigen::Dynamic, Eigen::Dynamic, Eigen::ColMajor>> >::value,
                          "Error - type not allowed");
            
            Eigen::MatrixXd A = X,
                B = Y;
            
            if(transpX == true){ A = X.transpose(); }
            if(transpY == true){ B = Y.transpose(); }
            
            // int chunks;//, tid;
            hsize_t block_size;
            
            std::vector<hsize_t> vsizetoReadN, vstartN,
            vsizetoReadM, vstartM,
            vsizetoReadK, vstartK;
            
            // unsigned int ithreads;
            hsize_t M = A.rows();
            hsize_t K = A.cols();
            hsize_t N = B.cols();
            
            if( iblock_size.isNotNull()) {
                block_size =  Rcpp::as<int>(iblock_size);  
            } else {
                block_size =  MAXBLOCKSIZE/3;  
            }
            
            C = Eigen::MatrixXd::Zero(M,N) ;
            if(block_size > std::min( N, std::min(M,K)) )
                block_size = std::min( N, std::min(M,K)); 
            
            getBlockPositionsSizes( N, block_size, vstartN, vsizetoReadN );
            getBlockPositionsSizes( M, block_size, vstartM, vsizetoReadM );
            getBlockPositionsSizes( K, block_size, vstartK, vsizetoReadK );
            
            // CAMBIO 1: Eliminar collapse(2) para compatibilidad
            const int total_blocks = static_cast<int>(vstartM.size() * vstartN.size());
            
            #pragma omp parallel num_threads( get_number_threads(threads, R_NilValue) ) shared(A, B, C)
            {
                // CAMBIO 2: Usar bucle plano en lugar de collapse(2)
            #pragma omp for schedule(dynamic)
                for (int block_idx = 0; block_idx < total_blocks; block_idx++)
                {
                    // Convertir índice plano a coordenadas (ii, jj)
                    const int ii = block_idx / static_cast<int>(vstartN.size());
                    const int jj = block_idx % static_cast<int>(vstartN.size());
                    
                    // CAMBIO 3: Buffer local para evitar race condition (CLAVE para eliminar NaN)
                    Eigen::MatrixXd C_local = Eigen::MatrixXd::Zero(vsizetoReadM[ii], vsizetoReadN[jj]);
                    
                    // CAMBIO 4: Bucle k secuencial dentro del hilo (elimina race condition)
                    for(int kk = 0; kk < static_cast<int>(vstartK.size()); kk++)
                    {
                        // Acumulamos en buffer local (sin race condition)
                        C_local += (A.block(vstartM[ii], vstartK[kk], vsizetoReadM[ii], vsizetoReadK[kk]) * 
                            B.block(vstartK[kk], vstartN[jj], vsizetoReadK[kk], vsizetoReadN[jj]));
                    }
                    
                    // CAMBIO 5: Una sola escritura al final (sin race condition)
                    C.block(vstartM[ii], vstartN[jj], vsizetoReadM[ii], vsizetoReadN[jj]) = C_local;
                }
            }
        } catch(std::exception& ex) {
            Rcpp::Rcout<< "c++ exception Rcpp_block_matrix_mul_parallel: "<<ex.what()<< " \n";
        }
        
        return(C);
    }

8 Usage Example

#include "BigDataStatMeth.hpp"

// Example usage
auto result = Rcpp_block_matrix_mul_parallel(...);

--- title: "Rcpp_block_matrix_mul_parallel" subtitle: "C++ Function Reference" --- ## Signature ```cpp Eigen::MatrixXd BigDataStatMeth::Rcpp_block_matrix_mul_parallel(T X, U Y, bool transpX, bool transpY, Rcpp::Nullable< int > iblock_size, Rcpp::Nullable< int > threads=R_NilValue) ``` ## Description Parallel block-based matrix multiplication. ## Parameters - `X` (`T`): First input matrix - `Y` (`U`): Second input matrix - `transpX` (`bool`): Whether to transpose X before multiplication - `transpY` (`bool`): Whether to transpose Y before multiplication - `iblock_size` (`Rcpp::Nullable< int >`): Block size for computation - `threads` (`Rcpp::Nullable< int >`): Number of threads for parallel processing ## Returns Result of matrix multiplication ## Details Implements parallel block-based matrix multiplication with configurable block sizes and thread count. This function:Supports matrix transposition before multiplicationUses OpenMP for parallel processingImplements cache-friendly block-based algorithmHandles edge cases for non-uniform block sizes ## Call Graph ::: {.diagram-section} <object type="image/svg+xml" data="images/namespace_big_data_stat_meth_a9597ad045ffee7645878c00c579a7139_cgraph.svg" class="class-diagram"> <img src="images/namespace_big_data_stat_meth_a9597ad045ffee7645878c00c579a7139_cgraph.svg" alt="Function dependencies"/> </object> ::: ## Source Code ::: {.callout-note collapse="true"} ### Implementation **File:** `inst/include/memAlgebra/memMultiplication.hpp` • **Lines 212-298** ```cpp inline Eigen::MatrixXd Rcpp_block_matrix_mul_parallel( T X, U Y, bool transpX, bool transpY, Rcpp::Nullable<int> iblock_size, Rcpp::Nullable<int> threads = R_NilValue) { Eigen::MatrixXd C; try{ static_assert(std::is_same<T, Eigen::MatrixXd >::value || std::is_same<T, Eigen::Map< Eigen::MatrixXd >>::value || std::is_same<T, Eigen::Map< Eigen::Matrix<double, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor>> >::value || std::is_same<T, Eigen::Map< Eigen::Matrix<double, Eigen::Dynamic, Eigen::Dynamic, Eigen::ColMajor>> >::value, "Error - type not allowed"); static_assert(std::is_same<U, Eigen::MatrixXd >::value || std::is_same<U, Eigen::Map< Eigen::MatrixXd >>::value || std::is_same<U, Eigen::Map< Eigen::Matrix<double, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor>> >::value || std::is_same<U, Eigen::Map< Eigen::Matrix<double, Eigen::Dynamic, Eigen::Dynamic, Eigen::ColMajor>> >::value, "Error - type not allowed"); Eigen::MatrixXd A = X, B = Y; if(transpX == true){ A = X.transpose(); } if(transpY == true){ B = Y.transpose(); } // int chunks;//, tid; hsize_t block_size; std::vector<hsize_t> vsizetoReadN, vstartN, vsizetoReadM, vstartM, vsizetoReadK, vstartK; // unsigned int ithreads; hsize_t M = A.rows(); hsize_t K = A.cols(); hsize_t N = B.cols(); if( iblock_size.isNotNull()) { block_size = Rcpp::as<int>(iblock_size); } else { block_size = MAXBLOCKSIZE/3; } C = Eigen::MatrixXd::Zero(M,N) ; if(block_size > std::min( N, std::min(M,K)) ) block_size = std::min( N, std::min(M,K)); getBlockPositionsSizes( N, block_size, vstartN, vsizetoReadN ); getBlockPositionsSizes( M, block_size, vstartM, vsizetoReadM ); getBlockPositionsSizes( K, block_size, vstartK, vsizetoReadK ); // CAMBIO 1: Eliminar collapse(2) para compatibilidad const int total_blocks = static_cast<int>(vstartM.size() * vstartN.size()); #pragma omp parallel num_threads( get_number_threads(threads, R_NilValue) ) shared(A, B, C) { // CAMBIO 2: Usar bucle plano en lugar de collapse(2) #pragma omp for schedule(dynamic) for (int block_idx = 0; block_idx < total_blocks; block_idx++) { // Convertir índice plano a coordenadas (ii, jj) const int ii = block_idx / static_cast<int>(vstartN.size()); const int jj = block_idx % static_cast<int>(vstartN.size()); // CAMBIO 3: Buffer local para evitar race condition (CLAVE para eliminar NaN) Eigen::MatrixXd C_local = Eigen::MatrixXd::Zero(vsizetoReadM[ii], vsizetoReadN[jj]); // CAMBIO 4: Bucle k secuencial dentro del hilo (elimina race condition) for(int kk = 0; kk < static_cast<int>(vstartK.size()); kk++) { // Acumulamos en buffer local (sin race condition) C_local += (A.block(vstartM[ii], vstartK[kk], vsizetoReadM[ii], vsizetoReadK[kk]) * B.block(vstartK[kk], vstartN[jj], vsizetoReadK[kk], vsizetoReadN[jj])); } // CAMBIO 5: Una sola escritura al final (sin race condition) C.block(vstartM[ii], vstartN[jj], vsizetoReadM[ii], vsizetoReadN[jj]) = C_local; } } } catch(std::exception& ex) { Rcpp::Rcout<< "c++ exception Rcpp_block_matrix_mul_parallel: "<<ex.what()<< " \n"; } return(C); } ``` ::: ## Usage Example ```cpp #include "BigDataStatMeth.hpp" // Example usage auto result = Rcpp_block_matrix_mul_parallel(...); ```