Exercises
Exercise 1: Server connection
- Build a connection object to a single study server: UMF Cluj (
https://192.168.1.200:8005) - Login using the connection object
 
library(DSI)
library(DSOpal)
library(dsBaseClient)
builder <- DSI::newDSLoginBuilder()
builder$append(server = "verona",
              url = "https://192.168.1.50:8890",
              user = "user_analisis", password = "********")
logindata <- builder$build()
library(httr);set_config(config(ssl_verifypeer = 0L))
connections <- DSI::datashield.login(logins = logindata)Exercise 2: Assign data
Using the connection object from the Exercise 1:
- Find the available projects.
 - Find the available resources inside the projects.
 - Load and resolve the resouce.
 - What type of object is the loaded resource?
 - What are the names of the available variables in that object?
 - How many individuals does it contain? And variables?
 
o <- opalr::opal.login(username = "user_analisis",
                  password = "*********",
                  url = "https://192.168.1.50:8890")
opalr::opal.projects(o)
opalr::opal.resources(o, "UMF_Cluj")
opalr::opal.resources(o, "S_uncover")
DSI::datashield.assign.resource(connections, "resource", "S_uncover.verona")
DSI::datashield.assign.expr(conns = connections, symbol = "data",
                            expr = "as.resource.data.frame(resource)")
ds.class("data")
ds.colnames("data")
ds.dim("data")Exercise 3: Data validation
Using the connection object from the Exercise 1 and 2:
- What is the class of the variables 
CSXRRAandTRXTR? - What is the range of the variable 
CSXRRA? - What are the categories of the variable 
TRXTR? 
ds.class("data$CSXRRA")
ds.class("data$TRXTR")
ds.table("data$TRXTR")
ds.summary("data$CSXRRA")Exercise 4: Data wranggling
Using the connection object from the Exercise 1 and 2:
- Select 4 different categorical variables (
Yes/No) - Create a new variable 
COUNTSthat has the count forYes. - Recode the variable with the levels 
1,2+. - Add the new variable to the original dataset.
 
ds.class("data$TRXIS")
ds.class("data$DSXIC")
ds.class("data$CMXDI")
ds.class("data$CMXHT")
ds.table("data$TRXIS")
ds.table("data$DSXIC")
ds.table("data$CMXDI")
ds.table("data$CMXHT")
variables <- c("TRXIS", "DSXIC", 
               "CMXDI", "CMXHT")
for (x in variables){
  ds.recodeValues(var.name = paste0("data$", x), 
                  values2replace.vector = c("Yes", "No"), 
                  new.values.vector = c(1, 0),
                  newobj = paste0(x, "_recoded"))
}
for (x in variables){
  ds.asNumeric(x.name = paste0(x, "_recoded"), 
               newobj = paste0(x, "_recoded_num"))
}
ds.dataFrame(x = paste0(variables, "_recoded_num"), 
             newobj = "joint_comorbidities")
             
ds.rowColCalc(x = "joint_comorbidities", 
              operation = "rowSums", 
              newobj = "new_variable")
              
ds.asFactor(input.var.name = "new_variable", 
            newobj.name = "new_variable_factor")
ds.recodeValues(var.name = "new_variable_factor", 
                values2replace.vector = c("0", "1", "2", "3", "4"),
                new.values.vector = c("0", "1", "2+", "2+", "2+"), 
                newobj = "COUNTS")
                
DSI::datashield.assign.expr(connections, "data", "cbind(data, COUNTS)")Exercise 5: Descriptive analysis
Using the connection object from the Exercise 1 and 2:
- Perform a boxplot of the variable 
CSXRRA. - Perform a boxplot of the variable 
CSXRRAgrouped byCMXCPD. - Perform a boxplot of the variable 
CSXRRAgrouped byCMXCPDandCMXCLD. - Calculate contingency table of the variables 
CMXCPDandCMXCLD. 
ds.boxPlot("data$CSXRRA")
ds.asFactor(input.var.name = "data$CMXCLD", newobj.name = "CMXCLD_factor")
DSI::datashield.assign.expr(connections, "data", "cbind(data, CMXCLD_factor)")
ds.asFactor(input.var.name = "data$CMXCPD", newobj.name = "CMXCPD_factor")
DSI::datashield.assign.expr(connections, "data", "cbind(data, CMXCPD_factor)")
ds.boxPlot(x = "data", variables = "CSXRRA", group = "CMXCPD_factor")
ds.boxPlot(x = "data", variables = "CSXRRA", group = "CMXCPD_factor", group2 = "CMXCLD_factor")
ds.table("data$CMXCPD", "data$CMXCLD")Exercise 6: Statistical models
Using the connection object from the Exercise 1 and 2:
- Fit a GLM (gaussian) with the model 
LBXSC3SIHn ~ TRXIS. (Model for illustrating purpose, not to answer any cientific question) - Fit a GLM (Poisson) with the model 
DATLGT ~ COUNTS(COUNTSis the variable created on the exercise 4) 
ds.glm(formula = "LBXSC3SIHn ~ TRXIS", data = "data", family = "gaussian")
ds.glm(formula = "DATLGT ~ COUNTS", data = "data", family = "poisson")Exercise 7: Extra
Using the connection object from the Exercise 1 and 2:
- Do a variable selection (Lasso regression) without the date variables.
 - Do a survival analysis with the variables of the Lasso regression.
 
library(dsSurvivalClient)
types <- lapply(ds.colnames("data_complete")[[1]], function(x){
  ds.class(paste0("data_complete$", x))[[1]][1]
})
types <- unlist(types)
`%notin%` <- Negate(`%in%`)
indexes_to_remove <- which(types %notin% "numeric")
times <- ds.dim("data_complete")[[1]][1]
ds.rep(x1 = 1,
            times = times,
            source.times = "c",
            source.each = "c",
            newobj = "ONES")
ds.dataFrameSubset(df.name = 'data_complete',  V1.name = "ONES",  V2.name = "ONES",  Boolean.operator = "==",keep.cols = NULL,  rm.cols = indexes_to_remove,  keep.NAs = NULL,  newobj = 'data_complete_numeric',  datasources = connections, notify.of.progress = FALSE)
ds.assign(toAssign='data_complete_numeric$DSXOS_recoded_num', newobj='Y', datasources = connections)
ds.dataFrameSubset(df.name = 'data_complete_numeric',  V1.name = "ONES",  V2.name = "ONES",  Boolean.operator = "==",keep.cols = NULL,  rm.cols = c(1, 2, 5, 6, 9, 110, 111, 112),  keep.NAs = NULL,  newobj = 'X',  datasources = connections, notify.of.progress = FALSE)
ds.asMatrix(x.name = 'Y', newobj = 'Y')
ds.asMatrix(x.name = 'X', newobj = 'X')
opts=list();opts$init=0; opts$maxIter=10; opts$tol=0.01; opts$ter=2;
set.seed(123)
m1=dsMTLClient::ds.LS_Lasso(X='X', Y='Y', lam=0.5, C=0, opts, datasources=connections, nDigits=15)
variables_interest <- ds.colnames("X")[[1]][which(m1$w!=0)]
ds.make(toAssign = "data$DSXOS_recoded_num", newobj = "EVENT")
ds.make(toAssign = "data$DATLGT", newobj = "SURVTIME")
formula <- paste0("survival::Surv(time=SURVTIME,event=EVENT)~",
                  paste("data$", variables_interest, collapse = "+", sep = ""))
dsSurvivalClient::ds.coxph.SLMA(formula = formula,
                                dataName = 'data',
                                datasources = connections)
dsSurvivalClient::ds.survfit(formula = formula, objectname = "scurves")
library(survival)
dsSurvivalClient::ds.plotsurvfit(formula = "scurves")