RcppPCAHdf5
C++ Function Reference
1 Signature
void BigDataStatMeth::RcppPCAHdf5(std::string filename, std::string strgroup, std::string strdataset, std::string strSVDgroup, int k, int q, int nev, bool bcenter, bool bscale, double dthreshold, bool bforce, bool asRowMajor, Rcpp::Nullable< Rcpp::CharacterVector > method=R_NilValue, Rcpp::Nullable< int > ithreads=R_NilValue)2 Description
Perform Principal Component Analysis.
3 Parameters
filename(std::string): HDF5 file namestrgroup(std::string): Group name for resultsstrdataset(std::string): Dataset namestrSVDgroup(std::string): SVD group namek(int): Number of components to computeq(int): Block size for processingnev(int): Number of eigenvaluesbcenter(bool): Whether to center the databscale(bool): Whether to scale the datadthreshold(double): Convergence thresholdbforce(bool): Whether to force computationasRowMajor(bool): Whether data is in row-major ordermethod(Rcpp::Nullable< Rcpp::CharacterVector >): Method selection (optional)ithreads(Rcpp::Nullable< int >): Number of threads (optional)
4 Details
Performs PCA on an HDF5 dataset with options for:Full or truncated analysisData preprocessing (centering/scaling)Method selectionParallel processing
5 Call Graph
6 Source Code
NoteImplementation
File: inst/include/hdf5Algebra/matrixPCA.hpp • Lines 347-457
inline void RcppPCAHdf5( std::string filename, std::string strgroup, std::string strdataset,
std::string strSVDgroup, int k, int q, int nev,
bool bcenter, bool bscale, double dthreshold,
bool bforce, bool asRowMajor,
Rcpp::Nullable<Rcpp::CharacterVector> method = R_NilValue,
Rcpp::Nullable<int> ithreads = R_NilValue)
{
BigDataStatMeth::hdf5Dataset* dsA = nullptr;
BigDataStatMeth::hdf5Dataset* dsd = nullptr;
BigDataStatMeth::hdf5Dataset* dsu = nullptr;
BigDataStatMeth::hdf5Dataset* dsv = nullptr;
BigDataStatMeth::hdf5Dataset* dsX = nullptr;
try{
std::string strPCAgroup = "PCA/" + strdataset;
// Check for svd decomposition (u, v and d matrices) in hdf5 file or if we
// need to compute again the SVD ( foce = true )
BigDataStatMeth::hdf5File* file = new BigDataStatMeth::hdf5File(filename, false);
file->openFile("r");
bool bexistsSVD = exists_HDF5_element(file->getFileptr(), strSVDgroup);
bool bexistsPCA = exists_HDF5_element(file->getFileptr(), strPCAgroup);
delete file; file = nullptr;
if( bexistsSVD == 0 || bforce == true ) {
dsA = new BigDataStatMeth::hdf5Dataset(filename, strgroup, strdataset, false);
dsA->openDataset();
if( dsA->getDatasetptr() != nullptr ) {
RcppTypifyNormalizeHdf5( dsA, bcenter, bscale, false); // Normalize and tipify data ( ((x-mu)/(sd)) * 1/sqrt(n-1) )
} else {
checkClose_file(dsA, dsd, dsu, dsv, dsX);
return void();
}
delete dsA; dsA = nullptr;
BigDataStatMeth::RcppbdSVD_hdf5( filename, "NORMALIZED_T/" + strgroup, strdataset, k, q, nev, false, false, dthreshold, bforce, asRowMajor, method, ithreads );
strSVDgroup = "SVD/" + strdataset;
}
// Check if PCA decomposition exists
if( bexistsPCA != 0 && bforce == false) {
Rcpp::Rcout<<"PCA decomposition exits, please set overwrite = true to overwrite the existing results";
return void();
}
// ------------ Variables ----------------
dsd = new BigDataStatMeth::hdf5Dataset(filename, strSVDgroup, "d", false );
dsd->openDataset();
dsv = new BigDataStatMeth::hdf5Dataset(filename, strSVDgroup, "v", false );
dsv->openDataset();
if( dsd->getDatasetptr() != nullptr && dsv->getDatasetptr() != nullptr) {
RcppGetPCAVariablesHdf5( strPCAgroup, dsd, dsv, bforce );
}
delete dsv; dsv = nullptr;
// delete dsA;
// ------------ Individuals ----------------
dsX = new BigDataStatMeth::hdf5Dataset(filename, strgroup, strdataset, false);
dsX->openDataset();
dsu = new BigDataStatMeth::hdf5Dataset(filename, strSVDgroup, "u", false );
dsu->openDataset();
if( dsX->getDatasetptr() != nullptr && dsd->getDatasetptr() != nullptr && dsu->getDatasetptr() != nullptr) {
RcppGetPCAIndividualsHdf5( strPCAgroup, dsX, dsd, dsu, bforce );
}
delete dsd; dsd = nullptr;
delete dsu; dsu = nullptr;
delete dsX; dsX = nullptr;
// delete dsA;
} catch( H5::FileIException& error ) { // catch failure caused by the H5File operations
checkClose_file(dsA, dsd, dsu, dsv, dsX);
Rcpp::Rcerr<<"\nc++ exception RcppPCAHdf5 (File IException)\n";
return void();
} catch( H5::DataSetIException& error ) { // catch failure caused by the DataSet operations
checkClose_file(dsA, dsd, dsu, dsv, dsX);
Rcpp::Rcerr<<"\nc++ exception RcppPCAHdf5 (DataSet IException)\n";
return void();
} catch( H5::DataSpaceIException& error ) { // catch failure caused by the DataSpace operations
checkClose_file(dsA, dsd, dsu, dsv, dsX);
Rcpp::Rcerr<<"\nc++ exception RcppPCAHdf5 (DataSpace IException)\n";
return void();
} catch(std::exception &ex) {
checkClose_file(dsA, dsd, dsu, dsv, dsX);
Rcpp::Rcerr<<"\nc++ exception RcppPCAHdf5 \n"<< ex.what();
return void();
} catch (...) {
checkClose_file(dsA, dsd, dsu, dsv, dsX);
Rcpp::Rcerr<<"\nC++ exception RcppPCAHdf5 (unknown reason)";
return void();
}
return void();
}7 Usage Example
#include "BigDataStatMeth.hpp"
// Example usage
auto result = RcppPCAHdf5(...);