Exercises
Exercise 1: Server connection
- Build a connection object to a single study server: UMF Cluj (
https://192.168.1.200:8005
) - Login using the connection object
library(DSI)
library(DSOpal)
library(dsBaseClient)
<- DSI::newDSLoginBuilder()
builder $append(server = "verona",
builderurl = "https://192.168.1.50:8890",
user = "user_analisis", password = "********")
<- builder$build()
logindata library(httr);set_config(config(ssl_verifypeer = 0L))
<- DSI::datashield.login(logins = logindata) connections
Exercise 2: Assign data
Using the connection object from the Exercise 1:
- Find the available projects.
- Find the available resources inside the projects.
- Load and resolve the resouce.
- What type of object is the loaded resource?
- What are the names of the available variables in that object?
- How many individuals does it contain? And variables?
<- opalr::opal.login(username = "user_analisis",
o password = "*********",
url = "https://192.168.1.50:8890")
::opal.projects(o)
opalr::opal.resources(o, "UMF_Cluj")
opalr::opal.resources(o, "S_uncover")
opalr::datashield.assign.resource(connections, "resource", "S_uncover.verona")
DSI::datashield.assign.expr(conns = connections, symbol = "data",
DSIexpr = "as.resource.data.frame(resource)")
ds.class("data")
ds.colnames("data")
ds.dim("data")
Exercise 3: Data validation
Using the connection object from the Exercise 1 and 2:
- What is the class of the variables
CSXRRA
andTRXTR
? - What is the range of the variable
CSXRRA
? - What are the categories of the variable
TRXTR
?
ds.class("data$CSXRRA")
ds.class("data$TRXTR")
ds.table("data$TRXTR")
ds.summary("data$CSXRRA")
Exercise 4: Data wranggling
Using the connection object from the Exercise 1 and 2:
- Select 4 different categorical variables (
Yes
/No
) - Create a new variable
COUNTS
that has the count forYes
. - Recode the variable with the levels
1
,2+
. - Add the new variable to the original dataset.
ds.class("data$TRXIS")
ds.class("data$DSXIC")
ds.class("data$CMXDI")
ds.class("data$CMXHT")
ds.table("data$TRXIS")
ds.table("data$DSXIC")
ds.table("data$CMXDI")
ds.table("data$CMXHT")
<- c("TRXIS", "DSXIC",
variables "CMXDI", "CMXHT")
for (x in variables){
ds.recodeValues(var.name = paste0("data$", x),
values2replace.vector = c("Yes", "No"),
new.values.vector = c(1, 0),
newobj = paste0(x, "_recoded"))
}
for (x in variables){
ds.asNumeric(x.name = paste0(x, "_recoded"),
newobj = paste0(x, "_recoded_num"))
}
ds.dataFrame(x = paste0(variables, "_recoded_num"),
newobj = "joint_comorbidities")
ds.rowColCalc(x = "joint_comorbidities",
operation = "rowSums",
newobj = "new_variable")
ds.asFactor(input.var.name = "new_variable",
newobj.name = "new_variable_factor")
ds.recodeValues(var.name = "new_variable_factor",
values2replace.vector = c("0", "1", "2", "3", "4"),
new.values.vector = c("0", "1", "2+", "2+", "2+"),
newobj = "COUNTS")
::datashield.assign.expr(connections, "data", "cbind(data, COUNTS)") DSI
Exercise 5: Descriptive analysis
Using the connection object from the Exercise 1 and 2:
- Perform a boxplot of the variable
CSXRRA
. - Perform a boxplot of the variable
CSXRRA
grouped byCMXCPD
. - Perform a boxplot of the variable
CSXRRA
grouped byCMXCPD
andCMXCLD
. - Calculate contingency table of the variables
CMXCPD
andCMXCLD
.
ds.boxPlot("data$CSXRRA")
ds.asFactor(input.var.name = "data$CMXCLD", newobj.name = "CMXCLD_factor")
::datashield.assign.expr(connections, "data", "cbind(data, CMXCLD_factor)")
DSIds.asFactor(input.var.name = "data$CMXCPD", newobj.name = "CMXCPD_factor")
::datashield.assign.expr(connections, "data", "cbind(data, CMXCPD_factor)")
DSI
ds.boxPlot(x = "data", variables = "CSXRRA", group = "CMXCPD_factor")
ds.boxPlot(x = "data", variables = "CSXRRA", group = "CMXCPD_factor", group2 = "CMXCLD_factor")
ds.table("data$CMXCPD", "data$CMXCLD")
Exercise 6: Statistical models
Using the connection object from the Exercise 1 and 2:
- Fit a GLM (gaussian) with the model
LBXSC3SIHn ~ TRXIS
. (Model for illustrating purpose, not to answer any cientific question) - Fit a GLM (Poisson) with the model
DATLGT ~ COUNTS
(COUNTS
is the variable created on the exercise 4)
ds.glm(formula = "LBXSC3SIHn ~ TRXIS", data = "data", family = "gaussian")
ds.glm(formula = "DATLGT ~ COUNTS", data = "data", family = "poisson")
Exercise 7: Extra
Using the connection object from the Exercise 1 and 2:
- Do a variable selection (Lasso regression) without the date variables.
- Do a survival analysis with the variables of the Lasso regression.
library(dsSurvivalClient)
<- lapply(ds.colnames("data_complete")[[1]], function(x){
types ds.class(paste0("data_complete$", x))[[1]][1]
})<- unlist(types)
types `%notin%` <- Negate(`%in%`)
<- which(types %notin% "numeric")
indexes_to_remove <- ds.dim("data_complete")[[1]][1]
times ds.rep(x1 = 1,
times = times,
source.times = "c",
source.each = "c",
newobj = "ONES")
ds.dataFrameSubset(df.name = 'data_complete', V1.name = "ONES", V2.name = "ONES", Boolean.operator = "==",keep.cols = NULL, rm.cols = indexes_to_remove, keep.NAs = NULL, newobj = 'data_complete_numeric', datasources = connections, notify.of.progress = FALSE)
ds.assign(toAssign='data_complete_numeric$DSXOS_recoded_num', newobj='Y', datasources = connections)
ds.dataFrameSubset(df.name = 'data_complete_numeric', V1.name = "ONES", V2.name = "ONES", Boolean.operator = "==",keep.cols = NULL, rm.cols = c(1, 2, 5, 6, 9, 110, 111, 112), keep.NAs = NULL, newobj = 'X', datasources = connections, notify.of.progress = FALSE)
ds.asMatrix(x.name = 'Y', newobj = 'Y')
ds.asMatrix(x.name = 'X', newobj = 'X')
=list();opts$init=0; opts$maxIter=10; opts$tol=0.01; opts$ter=2;
opts
set.seed(123)
=dsMTLClient::ds.LS_Lasso(X='X', Y='Y', lam=0.5, C=0, opts, datasources=connections, nDigits=15)
m1
<- ds.colnames("X")[[1]][which(m1$w!=0)]
variables_interest
ds.make(toAssign = "data$DSXOS_recoded_num", newobj = "EVENT")
ds.make(toAssign = "data$DATLGT", newobj = "SURVTIME")
<- paste0("survival::Surv(time=SURVTIME,event=EVENT)~",
formula paste("data$", variables_interest, collapse = "+", sep = ""))
::ds.coxph.SLMA(formula = formula,
dsSurvivalClientdataName = 'data',
datasources = connections)
::ds.survfit(formula = formula, objectname = "scurves")
dsSurvivalClientlibrary(survival)
::ds.plotsurvfit(formula = "scurves") dsSurvivalClient