RcppbdEigen_hdf5_Block
C++ Function Reference
1 Signature
void BigDataStatMeth::RcppbdEigen_hdf5_Block(T *dsA, BigDataStatMeth::hdf5Dataset *dsd, BigDataStatMeth::hdf5Dataset *dsu, int k, const std::string &which, int ncv, bool bcenter, bool bscale, double tol, int max_iter, bool compute_vectors, Rcpp::Nullable< int > threads=R_NilValue)2 Description
Block-wise eigendecomposition for large HDF5 matrices using Spectra.
3 Parameters
dsA(T *): Input hdf5Dataset containing the matrixdsd(BigDataStatMeth::hdf5Dataset *): Output hdf5Dataset for eigenvaluesdsu(BigDataStatMeth::hdf5Dataset *): Output hdf5Dataset for eigenvectorsk(int): Number of eigenvalues to computewhich(const std::string &): Which eigenvalues to computencv(int): Number of Arnoldi vectorsbcenter(bool): Whether to center the databscale(bool): Whether to scale the datatol(double): Convergence tolerancemax_iter(int): Maximum iterationscompute_vectors(bool): Whether to compute eigenvectorsthreads(Rcpp::Nullable< int >): Number of parallel threads
4 Returns
Type: class T
5 Details
This function performs eigenvalue decomposition on large matrices stored in HDF5 using Spectra library for consistency with RSpectra results.
6 Call Graph
7 Source Code
NoteImplementation
File: inst/include/hdf5Algebra/matrixEigenDecomposition.hpp • Lines 310-422
inline void RcppbdEigen_hdf5_Block(T* dsA,
BigDataStatMeth::hdf5Dataset* dsd,
BigDataStatMeth::hdf5Dataset* dsu,
int k, const std::string& which, int ncv,
bool bcenter, bool bscale, double tol, int max_iter,
bool compute_vectors, Rcpp::Nullable<int> threads = R_NilValue) {
static_assert(std::is_same<T*, BigDataStatMeth::hdf5Dataset*>::value ||
std::is_same<T*, BigDataStatMeth::hdf5DatasetInternal*>::value,
"Error - type not allowed");
BigDataStatMeth::hdf5Dataset* dsnormalizedData = nullptr;
try {
std::vector<hsize_t> stride = {1, 1}, block = {1, 1};
eigdecomp reteig;
// Get matrix dimensions
hsize_t n_rows = dsA->nrows();
hsize_t n_cols = dsA->ncols();
// Check if matrix is square
if (n_rows != n_cols) {
Rf_error("Matrix must be square for eigendecomposition");
return;
}
hsize_t n = n_rows;
// Use parameter validation function
std::tie(k, ncv) = validateSpectraParams((int)n, k, ncv);
// Read matrix data
Eigen::MatrixXd X;
std::vector<double> vdA(n * n);
dsA->readDatasetBlock({0, 0}, {n, n}, stride, block, vdA.data());
X = Eigen::Map<Eigen::Matrix<double, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor>>(vdA.data(), n, n);
// Handle normalization if needed
if (bcenter || bscale) {
// For large matrices, we could implement block-wise normalization here
// For now, since we already read the matrix, apply normalization directly
if (n * n < MAXELEMSINBLOCK) {
X = RcppNormalize_Data(X, bcenter, bscale, false);
} else {
// For very large matrices, implement block-wise normalization
Rcpp::warning("Large matrix normalization not yet optimized - applying direct normalization");
X = RcppNormalize_Data(X, bcenter, bscale, false);
}
}
// Use Spectra for eigendecomposition - same pattern as SVD
reteig = RcppbdEigen_spectra(X, k, which, ncv, false, false, tol, max_iter);
if (!reteig.bconv) {
Rf_error("Eigendecomposition failed to converge");
return;
}
// Write eigenvalues to HDF5
dsd->createDataset(1, reteig.eigenvalues_real.size(), "real");
dsd->writeDataset(Rcpp::wrap(reteig.eigenvalues_real));
// Write eigenvectors to HDF5 if computed
if (compute_vectors && reteig.bcomputevectors) {
dsu->createDataset(reteig.eigenvectors_real.rows(), reteig.eigenvectors_real.cols(), "real");
dsu->writeDataset(Rcpp::wrap(reteig.eigenvectors_real));
}
// For non-symmetric matrices, also save imaginary parts if non-zero
if (!reteig.is_symmetric && reteig.eigenvalues_imag.cwiseAbs().maxCoeff() > 1e-14) {
// Create additional datasets for imaginary parts
BigDataStatMeth::hdf5Dataset* dsd_imag = new BigDataStatMeth::hdf5Dataset(
dsA->getFileName(), "EIGEN/" + dsA->getDatasetName(), "values_imag", true);
dsd_imag->createDataset(1, reteig.eigenvalues_imag.size(), "real");
dsd_imag->writeDataset(Rcpp::wrap(reteig.eigenvalues_imag));
delete dsd_imag;
if (compute_vectors && reteig.bcomputevectors && reteig.eigenvectors_imag.cwiseAbs().maxCoeff() > 1e-14) {
BigDataStatMeth::hdf5Dataset* dsu_imag = new BigDataStatMeth::hdf5Dataset(
dsA->getFileName(), "EIGEN/" + dsA->getDatasetName(), "vectors_imag", true);
dsu_imag->createDataset(reteig.eigenvectors_imag.rows(), reteig.eigenvectors_imag.cols(), "real");
dsu_imag->writeDataset(Rcpp::wrap(reteig.eigenvectors_imag));
delete dsu_imag;
}
}
} catch(H5::FileIException& error) {
checkClose_file(dsA, dsd, dsu, dsnormalizedData);
Rcpp::Rcerr << "\nc++ exception RcppbdEigen_hdf5_Block (File IException)\n";
return;
} catch(H5::DataSetIException& error) {
checkClose_file(dsA, dsd, dsu, dsnormalizedData);
Rcpp::Rcerr << "\nc++ exception RcppbdEigen_hdf5_Block (DataSet IException)\n";
return;
} catch(H5::GroupIException& error) {
checkClose_file(dsA, dsd, dsu, dsnormalizedData);
Rcpp::Rcerr << "\nc++ exception RcppbdEigen_hdf5_Block (Group IException)\n";
return;
} catch(std::exception &ex) {
checkClose_file(dsA, dsd, dsu, dsnormalizedData);
Rcpp::Rcerr << "C++ exception RcppbdEigen_hdf5_Block: " << ex.what();
return;
} catch (...) {
checkClose_file(dsA, dsd, dsu, dsnormalizedData);
Rcpp::Rcerr << "\nC++ exception RcppbdEigen_hdf5_Block (unknown reason)";
return;
}
}8 Usage Example
#include "BigDataStatMeth.hpp"
// Example usage
auto result = RcppbdEigen_hdf5_Block(...);