Rcpp_block_matrix_mul_parallel
C++ Function Reference
1 Signature
Eigen::MatrixXd BigDataStatMeth::Rcpp_block_matrix_mul_parallel(T X, U Y, bool transpX, bool transpY, Rcpp::Nullable< int > iblock_size, Rcpp::Nullable< int > threads=R_NilValue)2 Description
Parallel block-based matrix multiplication.
3 Parameters
X(T): First input matrixY(U): Second input matrixtranspX(bool): Whether to transpose X before multiplicationtranspY(bool): Whether to transpose Y before multiplicationiblock_size(Rcpp::Nullable< int >): Block size for computationthreads(Rcpp::Nullable< int >): Number of threads for parallel processing
4 Returns
Result of matrix multiplication
5 Details
Implements parallel block-based matrix multiplication with configurable block sizes and thread count. This function:Supports matrix transposition before multiplicationUses OpenMP for parallel processingImplements cache-friendly block-based algorithmHandles edge cases for non-uniform block sizes
6 Call Graph
7 Source Code
NoteImplementation
File: inst/include/memAlgebra/memMultiplication.hpp • Lines 212-298
inline Eigen::MatrixXd Rcpp_block_matrix_mul_parallel( T X, U Y,
bool transpX,
bool transpY,
Rcpp::Nullable<int> iblock_size,
Rcpp::Nullable<int> threads = R_NilValue)
{
Eigen::MatrixXd C;
try{
static_assert(std::is_same<T, Eigen::MatrixXd >::value ||
std::is_same<T, Eigen::Map< Eigen::MatrixXd >>::value ||
std::is_same<T, Eigen::Map< Eigen::Matrix<double, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor>> >::value ||
std::is_same<T, Eigen::Map< Eigen::Matrix<double, Eigen::Dynamic, Eigen::Dynamic, Eigen::ColMajor>> >::value,
"Error - type not allowed");
static_assert(std::is_same<U, Eigen::MatrixXd >::value ||
std::is_same<U, Eigen::Map< Eigen::MatrixXd >>::value ||
std::is_same<U, Eigen::Map< Eigen::Matrix<double, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor>> >::value ||
std::is_same<U, Eigen::Map< Eigen::Matrix<double, Eigen::Dynamic, Eigen::Dynamic, Eigen::ColMajor>> >::value,
"Error - type not allowed");
Eigen::MatrixXd A = X,
B = Y;
if(transpX == true){ A = X.transpose(); }
if(transpY == true){ B = Y.transpose(); }
// int chunks;//, tid;
hsize_t block_size;
std::vector<hsize_t> vsizetoReadN, vstartN,
vsizetoReadM, vstartM,
vsizetoReadK, vstartK;
// unsigned int ithreads;
hsize_t M = A.rows();
hsize_t K = A.cols();
hsize_t N = B.cols();
if( iblock_size.isNotNull()) {
block_size = Rcpp::as<int>(iblock_size);
} else {
block_size = MAXBLOCKSIZE/3;
}
C = Eigen::MatrixXd::Zero(M,N) ;
if(block_size > std::min( N, std::min(M,K)) )
block_size = std::min( N, std::min(M,K));
getBlockPositionsSizes( N, block_size, vstartN, vsizetoReadN );
getBlockPositionsSizes( M, block_size, vstartM, vsizetoReadM );
getBlockPositionsSizes( K, block_size, vstartK, vsizetoReadK );
// CAMBIO 1: Eliminar collapse(2) para compatibilidad
const int total_blocks = static_cast<int>(vstartM.size() * vstartN.size());
#pragma omp parallel num_threads( get_number_threads(threads, R_NilValue) ) shared(A, B, C)
{
// CAMBIO 2: Usar bucle plano en lugar de collapse(2)
#pragma omp for schedule(dynamic)
for (int block_idx = 0; block_idx < total_blocks; block_idx++)
{
// Convertir índice plano a coordenadas (ii, jj)
const int ii = block_idx / static_cast<int>(vstartN.size());
const int jj = block_idx % static_cast<int>(vstartN.size());
// CAMBIO 3: Buffer local para evitar race condition (CLAVE para eliminar NaN)
Eigen::MatrixXd C_local = Eigen::MatrixXd::Zero(vsizetoReadM[ii], vsizetoReadN[jj]);
// CAMBIO 4: Bucle k secuencial dentro del hilo (elimina race condition)
for(int kk = 0; kk < static_cast<int>(vstartK.size()); kk++)
{
// Acumulamos en buffer local (sin race condition)
C_local += (A.block(vstartM[ii], vstartK[kk], vsizetoReadM[ii], vsizetoReadK[kk]) *
B.block(vstartK[kk], vstartN[jj], vsizetoReadK[kk], vsizetoReadN[jj]));
}
// CAMBIO 5: Una sola escritura al final (sin race condition)
C.block(vstartM[ii], vstartN[jj], vsizetoReadM[ii], vsizetoReadN[jj]) = C_local;
}
}
} catch(std::exception& ex) {
Rcpp::Rcout<< "c++ exception Rcpp_block_matrix_mul_parallel: "<<ex.what()<< " \n";
}
return(C);
}8 Usage Example
#include "BigDataStatMeth.hpp"
// Example usage
auto result = Rcpp_block_matrix_mul_parallel(...);