RcppbdEigen_hdf5_Block

C++ Function Reference

1 Signature

void BigDataStatMeth::RcppbdEigen_hdf5_Block(T *dsA, BigDataStatMeth::hdf5Dataset *dsd, BigDataStatMeth::hdf5Dataset *dsu, int k, const std::string &which, int ncv, bool bcenter, bool bscale, double tol, int max_iter, bool compute_vectors, Rcpp::Nullable< int > threads=R_NilValue)

2 Description

Block-wise eigendecomposition for large HDF5 matrices using Spectra.

3 Parameters

  • dsA (T *): Input hdf5Dataset containing the matrix
  • dsd (BigDataStatMeth::hdf5Dataset *): Output hdf5Dataset for eigenvalues
  • dsu (BigDataStatMeth::hdf5Dataset *): Output hdf5Dataset for eigenvectors
  • k (int): Number of eigenvalues to compute
  • which (const std::string &): Which eigenvalues to compute
  • ncv (int): Number of Arnoldi vectors
  • bcenter (bool): Whether to center the data
  • bscale (bool): Whether to scale the data
  • tol (double): Convergence tolerance
  • max_iter (int): Maximum iterations
  • compute_vectors (bool): Whether to compute eigenvectors
  • threads (Rcpp::Nullable< int >): Number of parallel threads

4 Returns

Type: class T

5 Details

This function performs eigenvalue decomposition on large matrices stored in HDF5 using Spectra library for consistency with RSpectra results.

6 Call Graph

Function dependencies

7 Source Code

File: inst/include/hdf5Algebra/matrixEigenDecomposition.hppLines 310-422

inline void RcppbdEigen_hdf5_Block(T* dsA, 
                                       BigDataStatMeth::hdf5Dataset* dsd,
                                       BigDataStatMeth::hdf5Dataset* dsu, 
                                       int k, const std::string& which, int ncv,
                                       bool bcenter, bool bscale, double tol, int max_iter,
                                       bool compute_vectors, Rcpp::Nullable<int> threads = R_NilValue) {
        
        static_assert(std::is_same<T*, BigDataStatMeth::hdf5Dataset*>::value ||
                      std::is_same<T*, BigDataStatMeth::hdf5DatasetInternal*>::value,
                      "Error - type not allowed");
        
        BigDataStatMeth::hdf5Dataset* dsnormalizedData = nullptr;
        
        try {
            
            std::vector<hsize_t> stride = {1, 1}, block = {1, 1};
            eigdecomp reteig;
            
            // Get matrix dimensions
            hsize_t n_rows = dsA->nrows();
            hsize_t n_cols = dsA->ncols();
            
            // Check if matrix is square
            if (n_rows != n_cols) {
                Rf_error("Matrix must be square for eigendecomposition");
                return;
            }
            
            hsize_t n = n_rows;
            
            // Use parameter validation function
            std::tie(k, ncv) = validateSpectraParams((int)n, k, ncv);
            
            // Read matrix data
            Eigen::MatrixXd X;
            std::vector<double> vdA(n * n);
            
            dsA->readDatasetBlock({0, 0}, {n, n}, stride, block, vdA.data());
            X = Eigen::Map<Eigen::Matrix<double, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor>>(vdA.data(), n, n);
            
            // Handle normalization if needed
            if (bcenter || bscale) {
                
                // For large matrices, we could implement block-wise normalization here
                // For now, since we already read the matrix, apply normalization directly
                if (n * n < MAXELEMSINBLOCK) {
                    X = RcppNormalize_Data(X, bcenter, bscale, false);
                } else {
                    // For very large matrices, implement block-wise normalization
                    Rcpp::warning("Large matrix normalization not yet optimized - applying direct normalization");
                    X = RcppNormalize_Data(X, bcenter, bscale, false);
                }
            }
            
            // Use Spectra for eigendecomposition - same pattern as SVD
            reteig = RcppbdEigen_spectra(X, k, which, ncv, false, false, tol, max_iter);
            
            if (!reteig.bconv) {
                Rf_error("Eigendecomposition failed to converge");
                return;
            }
            
            // Write eigenvalues to HDF5
            dsd->createDataset(1, reteig.eigenvalues_real.size(), "real");
            dsd->writeDataset(Rcpp::wrap(reteig.eigenvalues_real));
            
            // Write eigenvectors to HDF5 if computed
            if (compute_vectors && reteig.bcomputevectors) {
                dsu->createDataset(reteig.eigenvectors_real.rows(), reteig.eigenvectors_real.cols(), "real");
                dsu->writeDataset(Rcpp::wrap(reteig.eigenvectors_real));
            }
            
            // For non-symmetric matrices, also save imaginary parts if non-zero
            if (!reteig.is_symmetric && reteig.eigenvalues_imag.cwiseAbs().maxCoeff() > 1e-14) {
                
                // Create additional datasets for imaginary parts
                BigDataStatMeth::hdf5Dataset* dsd_imag = new BigDataStatMeth::hdf5Dataset(
                    dsA->getFileName(), "EIGEN/" + dsA->getDatasetName(), "values_imag", true);
                dsd_imag->createDataset(1, reteig.eigenvalues_imag.size(), "real");
                dsd_imag->writeDataset(Rcpp::wrap(reteig.eigenvalues_imag));
                delete dsd_imag;
                
                if (compute_vectors && reteig.bcomputevectors && reteig.eigenvectors_imag.cwiseAbs().maxCoeff() > 1e-14) {
                    BigDataStatMeth::hdf5Dataset* dsu_imag = new BigDataStatMeth::hdf5Dataset(
                        dsA->getFileName(), "EIGEN/" + dsA->getDatasetName(), "vectors_imag", true);
                    dsu_imag->createDataset(reteig.eigenvectors_imag.rows(), reteig.eigenvectors_imag.cols(), "real");
                    dsu_imag->writeDataset(Rcpp::wrap(reteig.eigenvectors_imag));
                    delete dsu_imag;
                }
            }
            
        } catch(H5::FileIException& error) {
            checkClose_file(dsA, dsd, dsu, dsnormalizedData);
            Rcpp::Rcerr << "\nc++ exception RcppbdEigen_hdf5_Block (File IException)\n";
            return;
        } catch(H5::DataSetIException& error) {
            checkClose_file(dsA, dsd, dsu, dsnormalizedData);
            Rcpp::Rcerr << "\nc++ exception RcppbdEigen_hdf5_Block (DataSet IException)\n";
            return;
        } catch(H5::GroupIException& error) {
            checkClose_file(dsA, dsd, dsu, dsnormalizedData);
            Rcpp::Rcerr << "\nc++ exception RcppbdEigen_hdf5_Block (Group IException)\n";
            return;
        } catch(std::exception &ex) {
            checkClose_file(dsA, dsd, dsu, dsnormalizedData);
            Rcpp::Rcerr << "C++ exception RcppbdEigen_hdf5_Block: " << ex.what();
            return;
        } catch (...) {
            checkClose_file(dsA, dsd, dsu, dsnormalizedData);
            Rcpp::Rcerr << "\nC++ exception RcppbdEigen_hdf5_Block (unknown reason)";
            return;
        }
    }

8 Usage Example

#include "BigDataStatMeth.hpp"

// Example usage
auto result = RcppbdEigen_hdf5_Block(...);