diff --git a/DESCRIPTION b/DESCRIPTION index a204c924e..399837b96 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: mlr3proba Title: Probabilistic Supervised Learning for 'mlr3' -Version: 0.6.3 +Version: 0.6.4 Authors@R: c(person(given = "Raphael", family = "Sonabend", @@ -30,11 +30,16 @@ Authors@R: role = c("cre", "aut"), email = "bblodfon@gmail.com", comment = c(ORCID = "0000-0002-3609-8674")), - person(given = "Lukas", + person(given = "Lukas", family = "Burk", - email = "github@quantenbrot.de", + email = "github@quantenbrot.de", role = "ctb", - comment = c(ORCID = "0000-0001-7528-3795"))) + comment = c(ORCID = "0000-0001-7528-3795")), + person(given = "Maximilian", + family = "Muecke", + email = "muecke.maximilian@gmail.com", + role = "ctb", + comment = c(ORCID = "0009-0000-9432-9795"))) Description: Provides extensions for probabilistic supervised learning for 'mlr3'. This includes extending the regression task to probabilistic and interval regression, adding a survival task, and other specialized @@ -86,7 +91,7 @@ Encoding: UTF-8 LazyData: true NeedsCompilation: no Roxygen: list(markdown = TRUE, r6 = TRUE) -RoxygenNote: 7.3.1 +RoxygenNote: 7.3.2 Collate: 'LearnerDens.R' 'aaa.R' diff --git a/NEWS.md b/NEWS.md index ddac439b2..71e45ec18 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,8 @@ +# mlr3proba 0.6.4 + +* Add useR! 2024 tutorial +* Lots of refactoring, improve code quality (thanks to @m-muecke) + # mlr3proba 0.6.3 * Add new tasks from `survival` package: `veteran`, `pbc`, `mgus`, `gbsg` diff --git a/R/LearnerDensHistogram.R b/R/LearnerDensHistogram.R index 79f759b1c..5ed1bd514 100644 --- a/R/LearnerDensHistogram.R +++ b/R/LearnerDensHistogram.R @@ -27,12 +27,12 @@ LearnerDensHistogram = R6::R6Class("LearnerDensHistogram", private = list( .train = function(task) { pars = self$param_set$get_values(tags = "train") - fit = invoke(.histogram, dat = task$data()[[1]], .args = pars) + fit = invoke(.histogram, dat = task$data()[[1L]], .args = pars) set_class(list(distr = fit$distr, hist = fit$hist), "dens.hist") }, .predict = function(task) { - newdata = task$data()[[1]] + newdata = task$data()[[1L]] list(pdf = self$model$distr$pdf(newdata), cdf = self$model$distr$cdf(newdata), distr = self$model$distr) } diff --git a/R/LearnerDensKDE.R b/R/LearnerDensKDE.R index b6f967a24..b59021bc0 100644 --- a/R/LearnerDensKDE.R +++ b/R/LearnerDensKDE.R @@ -18,7 +18,7 @@ LearnerDensKDE = R6::R6Class("LearnerDensKDE", #' Creates a new instance of this [R6][R6::R6Class] class. initialize = function() { ps = ps( - kernel = p_fct(levels = subset(distr6::listKernels(), select = "ShortName")[[1]], + kernel = p_fct(levels = subset(distr6::listKernels(), select = "ShortName")[[1L]], default = "Epan", tags = "train"), bandwidth = p_dbl(lower = 0, tags = "train", special_vals = list("silver")) ) @@ -46,7 +46,7 @@ LearnerDensKDE = R6::R6Class("LearnerDensKDE", self$param_set$values$kernel == "Epan" } - data = task$data()[[1]] + data = task$data()[[1L]] kernel = get(as.character(subset( distr6::listKernels(), @@ -54,20 +54,22 @@ LearnerDensKDE = R6::R6Class("LearnerDensKDE", ClassName)))$new() - bw = ifelse(self$param_set$values$bandwidth == "silver", + bw = if (isTRUE(self$param_set$values$bandwidth == "silver")) { 0.9 * min(sd(data), stats::IQR(data, na.rm = TRUE) / 1.349, na.rm = TRUE) * - length(data)^-0.2, - self$param_set$values$bandwidth) + length(data)^-0.2 + } else { + self$param_set$values$bandwidth + } pdf = function(x) {} # nolint body(pdf) = substitute({ - if (length(x) == 1) { + if (length(x) == 1L) { return(1 / (rows * bw) * sum(kernel$pdf((x - train) / bw))) } else { x = matrix(x, nrow = length(x), ncol = rows) train_mat = matrix(train, nrow = nrow(x), ncol = rows, byrow = TRUE) - return(1 / (rows * bw) * colSums(apply((x - train_mat) / bw, 1, kernel$pdf))) + return(1 / (rows * bw) * colSums(apply((x - train_mat) / bw, 1L, kernel$pdf))) } }, list( rows = task$nrow, @@ -83,7 +85,7 @@ LearnerDensKDE = R6::R6Class("LearnerDensKDE", }, .predict = function(task) { - list(pdf = self$model$pdf(task$data()[[1]]), + list(pdf = self$model$pdf(task$data()[[1L]]), distr = self$model) } ) diff --git a/R/LearnerSurvCoxPH.R b/R/LearnerSurvCoxPH.R index 2720bbc37..28f04441b 100644 --- a/R/LearnerSurvCoxPH.R +++ b/R/LearnerSurvCoxPH.R @@ -55,13 +55,13 @@ LearnerSurvCoxPH = R6Class("LearnerSurvCoxPH", stop(sprintf( "Learner %s on task %s failed to predict: Missing values in new data (line(s) %s)\n", self$id, task$id, - paste0(which(!complete.cases(newdata)), collapse = ", "))) + toString(which(!complete.cases(newdata))))) } pv = self$param_set$get_values(tags = "predict") # Get predicted values - fit = mlr3misc::invoke(survival::survfit, formula = self$model, newdata = newdata, + fit = invoke(survival::survfit, formula = self$model, newdata = newdata, se.fit = FALSE, .args = pv) lp = predict(self$model, type = "lp", newdata = newdata) diff --git a/R/MeasureRegrLogloss.R b/R/MeasureRegrLogloss.R index 156ad0251..c7bfbe280 100644 --- a/R/MeasureRegrLogloss.R +++ b/R/MeasureRegrLogloss.R @@ -45,7 +45,7 @@ MeasureRegrLogloss = R6::R6Class("MeasureRegrLogloss", if (inherits(distr, c("Matdist", "Arrdist"))) { pdf = diag(distr$pdf(truth)) } else { - pdf = as.numeric(distr$pdf(data = matrix(truth, nrow = 1))) + pdf = as.numeric(distr$pdf(data = matrix(truth, nrow = 1L))) } pdf[pdf == 0] = self$param_set$values$eps diff --git a/R/MeasureSurvAUC.R b/R/MeasureSurvAUC.R index 784a2b972..3708b0a79 100644 --- a/R/MeasureSurvAUC.R +++ b/R/MeasureSurvAUC.R @@ -16,7 +16,7 @@ MeasureSurvAUC = R6Class("MeasureSurvAUC", #' Creates a new instance of this [R6][R6::R6Class] class. initialize = function(id, properties = character(), label = NA_character_, man = NA_character_, param_set = ps()) { - if (class(self)[[1]] == "MeasureSurvAUC") { + if (class(self)[[1L]] == "MeasureSurvAUC") { stop("This is an abstract class that should not be constructed directly.") } @@ -48,15 +48,15 @@ MeasureSurvAUC = R6Class("MeasureSurvAUC", } args$times = ps$times - if (length(args$times) == 0) { - args$times = sort(unique(prediction$truth[, 1])) + if (length(args$times) == 0L) { + args$times = sort(unique(prediction$truth[, 1L])) } if ("Surv.rsp.new" %in% names(formals(FUN))) { args$Surv.rsp.new = prediction$truth # nolint } - auc = mlr3misc::invoke(FUN, lpnew = prediction$lp, .args = args) + auc = invoke(FUN, lpnew = prediction$lp, .args = args) if (is.null(ps$integrated) || !ps$integrated || grepl("tnr|tpr", self$id)) { auc diff --git a/R/MeasureSurvCalibrationAlpha.R b/R/MeasureSurvCalibrationAlpha.R index 8af1c61be..fba92bdcc 100644 --- a/R/MeasureSurvCalibrationAlpha.R +++ b/R/MeasureSurvCalibrationAlpha.R @@ -56,7 +56,7 @@ MeasureSurvCalibrationAlpha = R6Class("MeasureSurvCalibrationAlpha", ) ps$values = list(eps = 1e-3, se = FALSE, method = method, truncate = Inf) range = if (method == "ratio") c(-Inf, Inf) else c(0, Inf) - minimize = ifelse(method == "ratio", FALSE, TRUE) + minimize = method != "ratio" super$initialize( id = "surv.calib_alpha", @@ -73,8 +73,8 @@ MeasureSurvCalibrationAlpha = R6Class("MeasureSurvCalibrationAlpha", private = list( .score = function(prediction, ...) { truth = prediction$truth - all_times = truth[, 1] # both event times and censoring times - status = truth[, 2] + all_times = truth[, 1L] # both event times and censoring times + status = truth[, 2L] deaths = sum(status) ps = self$param_set$values @@ -86,7 +86,7 @@ MeasureSurvCalibrationAlpha = R6Class("MeasureSurvCalibrationAlpha", # Bypass distr6 construction if underlying distr represented by array if (inherits(distr, "array")) { surv = distr - if (length(dim(surv)) == 3) { + if (length(dim(surv)) == 3L) { # survival 3d array, extract median surv = .ext_surv_mat(arr = surv, which.curve = 0.5) } @@ -103,7 +103,7 @@ MeasureSurvCalibrationAlpha = R6Class("MeasureSurvCalibrationAlpha", } else { if (inherits(distr, "VectorDistribution")) { cumhaz = as.numeric( - distr$cumHazard(data = matrix(all_times, nrow = 1)) + distr$cumHazard(data = matrix(all_times, nrow = 1L)) ) } else { cumhaz = diag(as.matrix(distr$cumHazard(all_times))) diff --git a/R/MeasureSurvCalibrationBeta.R b/R/MeasureSurvCalibrationBeta.R index f351266ba..841e372d6 100644 --- a/R/MeasureSurvCalibrationBeta.R +++ b/R/MeasureSurvCalibrationBeta.R @@ -47,7 +47,7 @@ MeasureSurvCalibrationBeta = R6Class("MeasureSurvCalibrationBeta", ) ps$values = list(se = FALSE, method = method) range = if (method == "ratio") c(-Inf, Inf) else c(0, Inf) - minimize = ifelse(method == "ratio", FALSE, TRUE) + minimize = method != "ratio" super$initialize( id = "surv.calib_beta", @@ -66,15 +66,15 @@ MeasureSurvCalibrationBeta = R6Class("MeasureSurvCalibrationBeta", df = data.frame(truth = prediction$truth, lp = prediction$lp) fit = try(summary(survival::coxph(truth ~ lp, data = df)), silent = TRUE) - if (class(fit)[1] == "try-error") { + if (inherits(fit, "try-error")) { return(NA) } else { ps = self$param_set$values if (ps$se) { - return(fit$coefficients[,"se(coef)"]) + return(fit$coefficients[, "se(coef)"]) } else { - out = fit$coefficients[,"coef"] + out = fit$coefficients[, "coef"] if (ps$method == "diff") { out = abs(1 - out) diff --git a/R/MeasureSurvChamblessAUC.R b/R/MeasureSurvChamblessAUC.R index 601c8f6d3..15071a89d 100644 --- a/R/MeasureSurvChamblessAUC.R +++ b/R/MeasureSurvChamblessAUC.R @@ -46,9 +46,9 @@ MeasureSurvChamblessAUC = R6Class("MeasureSurvChamblessAUC", ps = self$param_set$values if (!ps$integrated) { msg = "If `integrated=FALSE` then `times` should be a scalar numeric." - assert_numeric(ps$times, len = 1, .var.name = msg) + assert_numeric(ps$times, len = 1L, .var.name = msg) } else { - if (!is.null(ps$times) && length(ps$times) == 1) { + if (!is.null(ps$times) && length(ps$times) == 1L) { ps$integrated = FALSE } } diff --git a/R/MeasureSurvCindex.R b/R/MeasureSurvCindex.R index d9b821deb..8b61f77fe 100644 --- a/R/MeasureSurvCindex.R +++ b/R/MeasureSurvCindex.R @@ -110,15 +110,15 @@ MeasureSurvCindex = R6Class("MeasureSurvCindex", # calculate t_max (cutoff time horizon) if (is.null(ps$t_max) && !is.null(ps$p_max)) { truth = prediction$truth - unique_times = unique(sort(truth[,"time"])) + unique_times = unique(sort(truth[, "time"])) surv = survival::survfit(truth ~ 1) indx = which(1 - (surv$n.risk / surv$n) > ps$p_max) - if (length(indx) == 0) { + if (length(indx) == 0L) { t_max = NULL # t_max calculated in `cindex()` } else { # first time point that surpasses the specified # `p_max` proportion of censoring - t_max = surv$time[indx[1]] + t_max = surv$time[indx[1L]] } } else { t_max = ps$t_max diff --git a/R/MeasureSurvDCalibration.R b/R/MeasureSurvDCalibration.R index 5d1f2ff58..0c241953b 100644 --- a/R/MeasureSurvDCalibration.R +++ b/R/MeasureSurvDCalibration.R @@ -58,7 +58,7 @@ MeasureSurvDCalibration = R6Class("MeasureSurvDCalibration", #' @description Creates a new instance of this [R6][R6::R6Class] class. initialize = function() { ps = ps( - B = p_int(1, default = 10), + B = p_int(1L, default = 10L), chisq = p_lgl(default = FALSE), truncate = p_dbl(lower = 0, upper = Inf, default = Inf) ) diff --git a/R/MeasureSurvGraf.R b/R/MeasureSurvGraf.R index 62d1b4395..5d690ebb3 100644 --- a/R/MeasureSurvGraf.R +++ b/R/MeasureSurvGraf.R @@ -102,9 +102,9 @@ MeasureSurvGraf = R6::R6Class("MeasureSurvGraf", } if (!ps$integrated) { msg = "If `integrated=FALSE` then `times` should be a scalar numeric." - assert_numeric(ps$times, len = 1, .var.name = msg) + assert_numeric(ps$times, len = 1L, .var.name = msg) } else { - if (!is.null(ps$times) && length(ps$times) == 1) { + if (!is.null(ps$times) && length(ps$times) == 1L) { ps$integrated = FALSE } } diff --git a/R/MeasureSurvIntLogloss.R b/R/MeasureSurvIntLogloss.R index 040102957..44208c874 100644 --- a/R/MeasureSurvIntLogloss.R +++ b/R/MeasureSurvIntLogloss.R @@ -95,9 +95,9 @@ MeasureSurvIntLogloss = R6::R6Class("MeasureSurvIntLogloss", } if (!ps$integrated) { msg = "If `integrated=FALSE` then `times` should be a scalar numeric." - assert_numeric(ps$times, len = 1, .var.name = msg) + assert_numeric(ps$times, len = 1L, .var.name = msg) } else { - if (!is.null(ps$times) && length(ps$times) == 1) { + if (!is.null(ps$times) && length(ps$times) == 1L) { ps$integrated = FALSE } } diff --git a/R/MeasureSurvLogloss.R b/R/MeasureSurvLogloss.R index 231c17592..c7dec6ea9 100644 --- a/R/MeasureSurvLogloss.R +++ b/R/MeasureSurvLogloss.R @@ -89,7 +89,7 @@ MeasureSurvLogloss = R6::R6Class("MeasureSurvLogloss", ps = self$param_set$values if (ps$se) { - ll = surv_logloss(prediction$truth, prediction$data$distr, ps$eps, ps$IPCW, train) #nolint + ll = surv_logloss(prediction$truth, prediction$data$distr, ps$eps, ps$IPCW, train) # nolint sd(ll) / sqrt(length(ll)) } else { mean(surv_logloss(prediction$truth, prediction$data$distr, ps$eps, ps$IPCW, train)) # nolint diff --git a/R/MeasureSurvRCLL.R b/R/MeasureSurvRCLL.R index 3b560c99a..1c3cd90df 100644 --- a/R/MeasureSurvRCLL.R +++ b/R/MeasureSurvRCLL.R @@ -69,14 +69,14 @@ MeasureSurvRCLL = R6::R6Class("MeasureSurvRCLL", } out = rep(-99L, length(prediction$row_ids)) truth = prediction$truth - event = truth[, 2] == 1 - event_times = truth[event, 1] - cens_times = truth[!event, 1] + event = truth[, 2L] == 1 + event_times = truth[event, 1L] + cens_times = truth[!event, 1L] # Bypass distr6 construction if underlying distr represented by array if (inherits(prediction$data$distr, "array")) { surv = prediction$data$distr - if (length(dim(surv)) == 3) { + if (length(dim(surv)) == 3L) { # survival 3d array, extract median surv = .ext_surv_mat(arr = surv, which.curve = 0.5) } diff --git a/R/MeasureSurvSchmid.R b/R/MeasureSurvSchmid.R index 138a4ca5e..700b214d5 100644 --- a/R/MeasureSurvSchmid.R +++ b/R/MeasureSurvSchmid.R @@ -91,9 +91,9 @@ MeasureSurvSchmid = R6::R6Class("MeasureSurvSchmid", } if (!ps$integrated) { msg = "If `integrated=FALSE` then `times` should be a scalar numeric." - assert_numeric(ps$times, len = 1, .var.name = msg) + assert_numeric(ps$times, len = 1L, .var.name = msg) } else { - if (!is.null(ps$times) && length(ps$times) == 1) { + if (!is.null(ps$times) && length(ps$times) == 1L) { ps$integrated = FALSE } } diff --git a/R/MeasureSurvUnoAUC.R b/R/MeasureSurvUnoAUC.R index 6a66eb129..b1565944f 100644 --- a/R/MeasureSurvUnoAUC.R +++ b/R/MeasureSurvUnoAUC.R @@ -45,7 +45,7 @@ MeasureSurvUnoAUC = R6Class("MeasureSurvUnoAUC", msg = "If `integrated=FALSE` then `times` should be a scalar numeric." assert_numeric(ps$times, len = 1, .var.name = msg) } else { - if (!is.null(ps$times) && length(ps$times) == 1) { + if (!is.null(ps$times) && length(ps$times) == 1L) { ps$integrated = FALSE } } diff --git a/R/PipeOpBreslow.R b/R/PipeOpBreslow.R index 454ffe533..006d9f8e8 100644 --- a/R/PipeOpBreslow.R +++ b/R/PipeOpBreslow.R @@ -49,7 +49,7 @@ #' task = tsk("rats") #' part = partition(task, ratio = 0.8) #' train_task = task$clone()$filter(part$train) -#' test_task = task$clone()$filter(part$test) +#' test_task = task$clone()$filter(part$test) #' #' learner = lrn("surv.coxph") # learner with lp predictions #' b = po("breslowcompose", learner = learner, breslow.overwrite = TRUE) diff --git a/R/PipeOpCrankCompositor.R b/R/PipeOpCrankCompositor.R index 77312df04..f17851b2a 100644 --- a/R/PipeOpCrankCompositor.R +++ b/R/PipeOpCrankCompositor.R @@ -79,7 +79,7 @@ PipeOpCrankCompositor = R6Class("PipeOpCrankCompositor", param_set = ps( method = p_fct(default = "sum_haz", levels = c("sum_haz", "mean", "median", "mode"), tags = "predict"), - which = p_int(default = 1, lower = 1, tags = "predict"), + which = p_int(default = 1L, lower = 1L, tags = "predict"), response = p_lgl(default = FALSE, tags = "predict"), overwrite = p_lgl(default = FALSE, tags = "predict") ) @@ -105,7 +105,7 @@ PipeOpCrankCompositor = R6Class("PipeOpCrankCompositor", .predict = function(inputs) { - inpred = inputs[[1]] + inpred = inputs[[1L]] response = self$param_set$values$response b_response = !anyMissing(inpred$response) @@ -120,7 +120,7 @@ PipeOpCrankCompositor = R6Class("PipeOpCrankCompositor", } else { assert("distr" %in% inpred$predict_types) method = self$param_set$values$method - if (length(method) == 0) method = "sum_haz" + if (length(method) == 0L) method = "sum_haz" if (method == "sum_haz") { if (inherits(inpred$data$distr, "matrix") || !requireNamespace("survivalmodels", quietly = TRUE)) { @@ -132,11 +132,11 @@ PipeOpCrankCompositor = R6Class("PipeOpCrankCompositor", } } else if (method == "mean") { comp = try(inpred$distr$mean(), silent = TRUE) - if (class(comp)[1] == "try-error") { + if (inherits(comp, "try-error")) { requireNamespace("cubature") comp = try(inpred$distr$mean(cubature = TRUE), silent = TRUE) } - if (class(comp)[1] == "try-error") { + if (inherits(comp, "try-error")) { comp = numeric(length(inpred$crank)) } } else { diff --git a/R/PipeOpDistrCompositor.R b/R/PipeOpDistrCompositor.R index 3faa822f4..8d121cb91 100644 --- a/R/PipeOpDistrCompositor.R +++ b/R/PipeOpDistrCompositor.R @@ -82,8 +82,8 @@ PipeOpDistrCompositor = R6Class("PipeOpDistrCompositor", #' Creates a new instance of this [R6][R6::R6Class] class. initialize = function(id = "distrcompose", param_vals = list()) { param_set = ps( - form = p_fct(default = "aft", levels = c("aft", "ph", "po"), tags = c("predict")), - overwrite = p_lgl(default = FALSE, tags = c("predict")) + form = p_fct(default = "aft", levels = c("aft", "ph", "po"), tags = "predict"), + overwrite = p_lgl(default = FALSE, tags = "predict") ) param_set$values = list(form = "aft", overwrite = FALSE) @@ -119,10 +119,10 @@ PipeOpDistrCompositor = R6Class("PipeOpDistrCompositor", row_ids = inpred$row_ids truth = inpred$truth - mlr3misc::map(inputs, function(x) checkmate::assert_true(identical(truth, x$truth))) + walk(inputs, function(x) assert_true(identical(truth, x$truth))) form = self$param_set$values$form - if (length(form) == 0) form = "aft" + if (length(form) == 0L) form = "aft" nr = length(inpred$data$row_ids) # assumes PH-style lp where high value = high risk @@ -134,7 +134,7 @@ PipeOpDistrCompositor = R6Class("PipeOpDistrCompositor", if (inherits(base$data$distr, "Distribution")) { base = distr6::as.MixtureDistribution(base$distr) - times = unlist(base[1]$properties$support$elements) + times = unlist(base[1L]$properties$support$elements) nc = length(times) survmat = matrix(1 - base$cdf(times), nrow = nr, ncol = nc, byrow = TRUE) } else { @@ -147,17 +147,17 @@ PipeOpDistrCompositor = R6Class("PipeOpDistrCompositor", timesmat = matrix(times, nrow = nr, ncol = nc, byrow = TRUE) lpmat = matrix(lp, nrow = nr, ncol = nc) - if (form == "ph") { - cdf = 1 - (survmat^exp(lpmat)) - } else if (form == "aft") { - mtc = findInterval(timesmat / exp(lpmat), times) - mtc[mtc == 0] = NA - cdf = 1 - matrix(survmat[1, mtc], nr, nc, FALSE) - cdf[is.na(cdf)] = 0 - } else if (form == "po") { - cdf = 1 - (survmat * ((exp(-lpmat) + ((1 - exp(-lpmat)) * survmat))^-1)) - cdf[survmat == 1] = 0 - } + if (form == "ph") { + cdf = 1 - (survmat^exp(lpmat)) + } else if (form == "aft") { + mtc = findInterval(timesmat / exp(lpmat), times) + mtc[mtc == 0] = NA + cdf = 1 - matrix(survmat[1L, mtc], nr, nc, FALSE) + cdf[is.na(cdf)] = 0 + } else if (form == "po") { + cdf = 1 - (survmat * ((exp(-lpmat) + ((1 - exp(-lpmat)) * survmat))^-1)) + cdf[survmat == 1] = 0 + } distr = .surv_return(times, 1 - cdf)$distr diff --git a/R/PipeOpPredRegrSurv.R b/R/PipeOpPredRegrSurv.R index fba430dfa..d8caaf351 100644 --- a/R/PipeOpPredRegrSurv.R +++ b/R/PipeOpPredRegrSurv.R @@ -85,7 +85,7 @@ PipeOpPredRegrSurv = R6Class("PipeOpPredRegrSurv", } distr = try(input$distr, silent = TRUE) - if (class(distr)[1] == "try-error" || is.null(distr)) { + if (inherits(distr, "try-error") || is.null(distr)) { distr = NULL } diff --git a/R/PipeOpSurvAvg.R b/R/PipeOpSurvAvg.R index 394216c46..fddaf95c8 100644 --- a/R/PipeOpSurvAvg.R +++ b/R/PipeOpSurvAvg.R @@ -92,28 +92,28 @@ PipeOpSurvAvg = R6Class("PipeOpSurvAvg", lp = c(simplify2array(lp_matrix) %*% weights) } - if (length(unique(weights)) == 1) { + if (length(unique(weights)) == 1L) { weights = "uniform" } - distr = map(inputs, "distr") + distr = map(inputs, "distr") - ok = mlr3misc::map_lgl(distr, function(.x) { - checkmate::test_class(.x, "Matdist") | checkmate::test_class(.x, "Arrdist") - }) - - if (all(ok)) { - distr = distr6::mixMatrix(distr, weights) - } else { - ok = mlr3misc::map_lgl(distr, function(.x) { - checkmate::test_class(.x, "VectorDistribution") + ok = map_lgl(distr, function(.x) { + test_class(.x, "Matdist") || test_class(.x, "Arrdist") }) + if (all(ok)) { - distr = distr6::mixturiseVector(distr, weights) + distr = distr6::mixMatrix(distr, weights) } else { - distr = NULL + ok = map_lgl(distr, function(.x) { + test_class(.x, "VectorDistribution") + }) + if (all(ok)) { + distr = distr6::mixturiseVector(distr, weights) + } else { + distr = NULL + } } - } PredictionSurv$new(row_ids = row_ids, truth = truth, response = response, crank = crank, diff --git a/R/PipeOpTaskSurvRegr.R b/R/PipeOpTaskSurvRegr.R index 9ba2175c7..c4f43c056 100644 --- a/R/PipeOpTaskSurvRegr.R +++ b/R/PipeOpTaskSurvRegr.R @@ -129,9 +129,9 @@ PipeOpTaskSurvRegr = R6Class("PipeOpTaskSurvRegr", tags = c("train", "bj")), center = p_lgl(default = TRUE, tags = c("train", "bj")), mimpu = p_lgl(default = NULL, special_vals = list(NULL), tags = c("train", "bj")), - iter.bj = p_int(default = 20, lower = 2, tags = c("train", "bj")), - max.cycle = p_int(default = 5, lower = 1, tags = c("train", "bj")), - mstop = p_int(default = 50, lower = 1, tags = c("train", "bj")), + iter.bj = p_int(default = 20L, lower = 2L, tags = c("train", "bj")), + max.cycle = p_int(default = 5L, lower = 1L, tags = c("train", "bj")), + mstop = p_int(default = 50L, lower = 1L, tags = c("train", "bj")), nu = p_dbl(default = 0.1, lower = 0, tags = c("train", "bj")) ) ps$add_dep("alpha", "method", CondEqual$new("ipcw")) @@ -164,15 +164,15 @@ PipeOpTaskSurvRegr = R6Class("PipeOpTaskSurvRegr", pv = self$param_set$values target = pv$target if (is.null(target)) { - target = inputs[[1]]$target_names[1L] + target = inputs[[1L]]$target_names[1L] } - backend = private$.reorder(copy(inputs[[1]]$data()), pv$features, target, inputs[[2]]) - return(list(TaskRegr$new(id = inputs[[1]]$id, backend = backend, target = target))) + backend = private$.reorder(copy(inputs[[1L]]$data()), pv$features, target, inputs[[2L]]) + return(list(TaskRegr$new(id = inputs[[1L]]$id, backend = backend, target = target))) }, .transform = function(inputs) { - input = inputs[[1]] + input = inputs[[1L]] backend = copy(input$data()) time = input$target_names[1L] status = input$target_names[2L] @@ -201,7 +201,7 @@ PipeOpTaskSurvRegr = R6Class("PipeOpTaskSurvRegr", reorder = private$.reorder(backend, pv$features, pv$target, inputs[[2]]) ) - target = ifelse(method == "reorder", pv$target, time) + target = if (method == "reorder") pv$target else time new_task = TaskRegr$new(id = input$id, backend = backend, target = target) @@ -229,9 +229,9 @@ PipeOpTaskSurvRegr = R6Class("PipeOpTaskSurvRegr", est = est$train(task)$predict(task)$distr if (inherits(est, c("Matdist", "Arrdist"))) { - weights = diag(est$survival(task$truth()[, 1])) + weights = diag(est$survival(task$truth()[, 1L])) } else { - weights = as.numeric(est$survival(data = matrix(task$truth()[, 1], nrow = 1))) + weights = as.numeric(est$survival(data = matrix(task$truth()[, 1L], nrow = 1L))) } weights[weights == 0] = eps weights = 1 / weights @@ -257,7 +257,7 @@ PipeOpTaskSurvRegr = R6Class("PipeOpTaskSurvRegr", unique_times = sort(unique(backend[[time]])) if (estimator == "kaplan") { - est = LearnerSurvKaplan$new()$train(input)$predict(input, row_ids = 1)$distr[1] + est = LearnerSurvKaplan$new()$train(input)$predict(input, row_ids = 1)$distr[1L] den = est$survival(backend[[time]][cens]) num = sapply(backend[[time]][cens], function(x) { est$survivalAntiDeriv(x) @@ -271,7 +271,7 @@ PipeOpTaskSurvRegr = R6Class("PipeOpTaskSurvRegr", est$param_set$values$lambda = self$param_set$values$lambda est = est$train(input)$predict(input)$distr } - den = as.numeric(est$survival(data = matrix(backend[[time]], nrow = 1)))[cens] + den = as.numeric(est$survival(data = matrix(backend[[time]], nrow = 1L)))[cens] mrl = numeric(sum(cens)) for (i in seq_along(mrl)) { x = backend[cens, ][[time]][i] @@ -286,11 +286,11 @@ PipeOpTaskSurvRegr = R6Class("PipeOpTaskSurvRegr", }, .bj = function(backend, status, time) { - mlr3misc::require_namespaces("bujar") + require_namespaces("bujar") x = data.frame(backend)[, colnames(backend) %nin% c(time, status), drop = FALSE] - x = model.matrix(~., x)[, -1] - bj = mlr3misc::invoke(bujar::bujar, + x = model.matrix(~., x)[, -1L] + bj = invoke(bujar::bujar, y = backend[[time]], cens = backend[[status]], x = x, diff --git a/R/PredictionDataSurv.R b/R/PredictionDataSurv.R index a089b901c..b9234125e 100644 --- a/R/PredictionDataSurv.R +++ b/R/PredictionDataSurv.R @@ -16,7 +16,7 @@ check_prediction_data.PredictionDataSurv = function(pdata, ...) { # nolint assert(nrow(pdata$distr$modelTable) == n) } else if (inherits(pdata$distr, c("Matdist", "Arrdist"))) { assert(nrow(gprm(pdata$distr, "pdf")) == n) - } else if (class(pdata$distr)[1] == "array") { # from Arrdist + } else if (class(pdata$distr)[1L] == "array") { # from Arrdist assert_array(pdata$distr, d = 3, any.missing = FALSE, null.ok = TRUE) } else { assert_matrix(pdata$distr, nrows = n, any.missing = FALSE, null.ok = TRUE) @@ -73,15 +73,15 @@ c.PredictionDataSurv = function(..., keep_duplicates = TRUE) { if ("distr" %in% predict_types) { distr_list = map(dots, "distr") - test_dist = unique(vapply(distr_list, testDistribution, logical(1))) + test_dist = unique(map_lgl(distr_list, testDistribution)) # Mix of distributions and arrays? Convert arrays to distributions! - if (length(test_dist) == 2) { + if (length(test_dist) == 2L) { distr_list = map(distr_list, function(.x) { if (testDistribution(.x)) { .x } else { - as.Distribution(1 - .x, fun = "cdf", + as.Distribution(1 - .x, fun = "cdf", decorators = c("CoreStatistics", "ExoticStatistics")) } }) @@ -93,13 +93,15 @@ c.PredictionDataSurv = function(..., keep_duplicates = TRUE) { result$distr = do.call(c, c(distr_list, list(decorators = c("CoreStatistics", "ExoticStatistics")))) } else { - dims = vapply(distr_list, function(.x) length(dim(.x)), integer(1)) + dims = map_int(distr_list, function(.x) length(dim(.x))) # If mix of arrays and matrices, convert arrays to median survival matrices - if (length(unique(dims)) > 1) { + if (length(unique(dims)) > 1L) { distr_list = lapply(distr_list, function(.x) { - if (length(dim(.x)) == 3) { + if (length(dim(.x)) == 3L) { .ext_surv_mat(.x, which.curve = 0.5) - } else .x + } else { + .x + } }) } # All objects are now either 3d arrays or 2d matrices @@ -134,14 +136,14 @@ filter_prediction_data.PredictionDataSurv = function(pdata, row_ids, ...) { if (testDistribution(distr)) { # distribution ok = inherits(distr, c("VectorDistribution", "Matdist", "Arrdist")) && - length(keep) > 1 # e.g.: Arrdist(1xYxZ) and keep = FALSE + length(keep) > 1L # e.g.: Arrdist(1xYxZ) and keep = FALSE if (ok) { pdata$distr = distr[keep] # we can subset row/samples like this } else { pdata$distr = base::switch(keep, distr) # one distribution only } } else { - if (length(dim(distr)) == 2) { # 2d matrix + if (length(dim(distr)) == 2L) { # 2d matrix pdata$distr = distr[keep, , drop = FALSE] } else { # 3d array pdata$distr = distr[keep, , , drop = FALSE] diff --git a/R/PredictionSurv.R b/R/PredictionSurv.R index 18f26bc9d..9732ccc5f 100644 --- a/R/PredictionSurv.R +++ b/R/PredictionSurv.R @@ -164,10 +164,10 @@ PredictionSurv = R6Class("PredictionSurv", } times = x$getParameterValue("x") - time1 = times[[1]] + time1 = times[[1L]] ## check all times equal - return x if not - if (!all(vapply(times, identical, logical(1), y = time1))) { + if (!all(map_lgl(times, identical, y = time1))) { return(x) } @@ -178,7 +178,7 @@ PredictionSurv = R6Class("PredictionSurv", } }, .distrify_survarray = function(x) { - if (inherits(x, "array") && nrow(x) > 0) { # can be matrix as well + if (inherits(x, "array") && nrow(x) > 0L) { # can be matrix as well # create Matdist or Arrdist (default => median curve) distr6::as.Distribution(1 - x, fun = "cdf", decorators = c("CoreStatistics", "ExoticStatistics")) diff --git a/R/RcppExports.R b/R/RcppExports.R index a9a0a21ad..18607e064 100644 --- a/R/RcppExports.R +++ b/R/RcppExports.R @@ -1,27 +1,26 @@ # Generated by using Rcpp::compileAttributes() -> do not edit by hand # Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393 -.c_get_unique_times <- function(true_times, req_times) { - .Call(`_mlr3proba_c_get_unique_times`, true_times, req_times) +.c_get_unique_times = function(true_times, req_times) { + .Call(`_mlr3proba_c_get_unique_times`, true_times, req_times) } -c_score_intslogloss <- function(truth, unique_times, cdf, eps) { - .Call(`_mlr3proba_c_score_intslogloss`, truth, unique_times, cdf, eps) +c_score_intslogloss = function(truth, unique_times, cdf, eps) { + .Call(`_mlr3proba_c_score_intslogloss`, truth, unique_times, cdf, eps) } -c_score_graf_schmid <- function(truth, unique_times, cdf, power = 2L) { - .Call(`_mlr3proba_c_score_graf_schmid`, truth, unique_times, cdf, power) +c_score_graf_schmid = function(truth, unique_times, cdf, power = 2L) { + .Call(`_mlr3proba_c_score_graf_schmid`, truth, unique_times, cdf, power) } -.c_weight_survival_score <- function(score, truth, unique_times, cens, proper, eps) { - .Call(`_mlr3proba_c_weight_survival_score`, score, truth, unique_times, cens, proper, eps) +.c_weight_survival_score = function(score, truth, unique_times, cens, proper, eps) { + .Call(`_mlr3proba_c_weight_survival_score`, score, truth, unique_times, cens, proper, eps) } -c_concordance <- function(time, status, crank, t_max, weight_meth, cens, surv, tiex) { - .Call(`_mlr3proba_c_concordance`, time, status, crank, t_max, weight_meth, cens, surv, tiex) +c_concordance = function(time, status, crank, t_max, weight_meth, cens, surv, tiex) { + .Call(`_mlr3proba_c_concordance`, time, status, crank, t_max, weight_meth, cens, surv, tiex) } -c_gonen <- function(crank, tiex) { - .Call(`_mlr3proba_c_gonen`, crank, tiex) +c_gonen = function(crank, tiex) { + .Call(`_mlr3proba_c_gonen`, crank, tiex) } - diff --git a/R/TaskDens_zzz.R b/R/TaskDens_zzz.R index cb10ffb6b..844c2216e 100644 --- a/R/TaskDens_zzz.R +++ b/R/TaskDens_zzz.R @@ -15,7 +15,7 @@ NULL load_precip = function(id = "precip") { - b = as_data_backend(data.table::data.table(precip = load_dataset("precip", "datasets", + b = as_data_backend(data.table(precip = load_dataset("precip", "datasets", keep_rownames = TRUE))) task = TaskDens$new(id, b, label = "Annual Precipitation") b$hash = task$man = "mlr3proba::mlr_tasks_precip" @@ -39,7 +39,7 @@ load_precip = function(id = "precip") { NULL load_faithful = function(id = "faithful") { - b = as_data_backend(data.table::data.table(eruptions = load_dataset("faithful", "datasets", + b = as_data_backend(data.table(eruptions = load_dataset("faithful", "datasets", keep_rownames = TRUE)$eruptions)) task = TaskDens$new(id, b, label = "Old Faithful Eruptions") b$hash = task$man = "mlr3proba::mlr_tasks_faithful" diff --git a/R/TaskGeneratorCoxed.R b/R/TaskGeneratorCoxed.R index ccf3803df..48eec92fb 100644 --- a/R/TaskGeneratorCoxed.R +++ b/R/TaskGeneratorCoxed.R @@ -40,9 +40,9 @@ TaskGeneratorCoxed = R6::R6Class("TaskGeneratorCoxed", param_set = ps( T = p_dbl(lower = 1, default = 100), # time-horizon type = p_fct(default = "none", levels = c("none", "tvc", "tvbeta")), # time-varying effects - knots = p_int(lower = 1, default = 8), # for flexible-hazard method + knots = p_int(lower = 1L, default = 8L), # for flexible-hazard method spline = p_lgl(default = TRUE), # for flexible-hazard method - xvars = p_int(lower = 1, default = 3), # number of covariates to generate + xvars = p_int(lower = 1L, default = 3L), # number of covariates to generate mu = p_uty(default = 0), # mean for `xvars` sd = p_uty(default = 0.5), # sd for `xvars` censor = p_dbl(lower = 0, upper = 1, default = 0.1), # censoring proportion diff --git a/R/TaskSurv.R b/R/TaskSurv.R index 24435772e..bdc076682 100644 --- a/R/TaskSurv.R +++ b/R/TaskSurv.R @@ -78,13 +78,13 @@ TaskSurv = R6::R6Class("TaskSurv", backend = as_data_backend(backend) if (type != "interval2") { - c_ev = r6_private(backend)$.data[, event, with = FALSE][[1]] + c_ev = r6_private(backend)$.data[, event, with = FALSE][[1L]] if (type == "mstate") { assert_factor(c_ev) } else if (type == "interval") { - assert_integerish(c_ev, lower = 0, upper = 3) + assert_integerish(c_ev, lower = 0L, upper = 3L) } else if (!is.logical(c_ev)) { - assert_integerish(c_ev, lower = 0, upper = 2) + assert_integerish(c_ev, lower = 0L, upper = 2L) } } @@ -149,7 +149,7 @@ TaskSurv = R6::R6Class("TaskSurv", formula = function(rhs = NULL, reverse = FALSE) { # formula appends the rhs argument to Surv(time, event)~ tn = self$target_names - if (length(tn) == 2) { + if (length(tn) == 2L) { if (reverse) { lhs = sprintf("Surv(%s, 1 - %s, type = '%s')", tn[1L], tn[2L], self$censtype) } else { @@ -314,12 +314,12 @@ TaskSurv = R6::R6Class("TaskSurv", assert_number(quantile_prob, lower = 0.8, upper = 1, null.ok = FALSE) assert_number(admin_time, lower = 0, null.ok = TRUE) - times = self$times(rows) + times = self$times(rows) status = self$status(rows) # Get administrative time if (is.null(admin_time)) { - t_max = unname(round(quantile(times, probs = quantile_prob))) + t_max = unname(round(stats::quantile(times, probs = quantile_prob))) } else { t_max = min(admin_time, max(times)) } @@ -360,15 +360,17 @@ TaskSurv = R6::R6Class("TaskSurv", dep_cens_prop = function(rows = NULL, method = "holm", sign_level = 0.05) { assert_choice(self$censtype, choices = c("right", "left")) - status_var = self$target_names[[2]] - glm_summary = glm(formula = mlr3misc::formulate(lhs = status_var, rhs = "."), - data = self$data(cols = c(self$feature_names, status_var)), - family = binomial(link = "logit")) |> summary() + status_var = self$target_names[[2L]] + glm_summary = summary(stats::glm( + formula = formulate(lhs = status_var, rhs = "."), + data = self$data(cols = c(self$feature_names, status_var)), + family = stats::binomial(link = "logit") + )) # extract the p-values p_values = glm_summary$coefficients[, "Pr(>|z|)"] - p_values_adj = p.adjust(p_values, method = method) + p_values_adj = stats::p.adjust(p_values, method = method) n_coefs = length(p_values_adj) - 1 # exclude the intercept, include dummy-encoded variables - n_signif = sum(p_values_adj[-1] <= sign_level) + n_signif = sum(p_values_adj[-1L] <= sign_level) n_signif / n_coefs }, @@ -392,7 +394,7 @@ TaskSurv = R6::R6Class("TaskSurv", cox = lrn("surv.coxph") cox$encapsulate = c(train = "evaluate", predict = "evaluate") cox$train(self) - ok = (length(cox$errors) == 0) & (length(cox$warnings) == 0) + ok = (length(cox$errors) == 0L) & (length(cox$warnings) == 0L) # cox model didn't converge, train didn't succeed, etc if (!ok) stop("Error/warning during cox model fitting") diff --git a/R/TaskSurv_zzz.R b/R/TaskSurv_zzz.R index 3093f4e68..07d31ef1b 100644 --- a/R/TaskSurv_zzz.R +++ b/R/TaskSurv_zzz.R @@ -248,8 +248,8 @@ NULL load_actg = function() { data = load_dataset("actg", "mlr3proba") data[, c("id", "time_d", "censor_d")] = NULL - colnames(data)[6] = "sexF" - colnames(data)[2] = "status" + colnames(data)[6L] = "sexF" + colnames(data)[2L] = "status" b = as_data_backend(data) task = TaskSurv$new("actg", b, time = "time", event = "status", label = "ACTG 320") @@ -305,7 +305,7 @@ NULL load_grace = function() { data = load_dataset("grace", "mlr3proba") - data[, c("id")] = NULL + data[, "id"] = NULL colnames(data)[1:2] = c("time", "status") b = as_data_backend(data) @@ -335,7 +335,7 @@ NULL load_whas = function() { data = load_dataset("whas", "mlr3proba") data[, c("id", "yrgrp", "dstat")] = NULL - colnames(data)[2] = "sexF" + colnames(data)[2L] = "sexF" colnames(data)[10:11] = c("time", "status") b = as_data_backend(data) diff --git a/R/as_prediction_dens.R b/R/as_prediction_dens.R index e9b696911..d762df1cf 100644 --- a/R/as_prediction_dens.R +++ b/R/as_prediction_dens.R @@ -41,7 +41,7 @@ as_prediction_dens.data.frame = function(x, ...) { # nolint assert_names(names(x), subset.of = c(mandatory, optional)) if ("distr" %in% names(x)) { - distr = x$distr[[1]] + distr = x$distr[[1L]] } else { distr = NULL } diff --git a/R/as_prediction_surv.R b/R/as_prediction_surv.R index 2e390aed9..4dc5c2e4a 100644 --- a/R/as_prediction_surv.R +++ b/R/as_prediction_surv.R @@ -40,7 +40,7 @@ as_prediction_surv.data.frame = function(x, ...) { # nolint assert_names(names(x), subset.of = c(mandatory, optional)) if ("distr" %in% names(x)) { - distr = x$distr[[1]][[1]] + distr = x$distr[[1L]][[1L]] } else { distr = NULL } @@ -51,7 +51,7 @@ as_prediction_surv.data.frame = function(x, ...) { # nolint } else if ("response" %in% names(x)) { x$crank = -x$response } else { - x$crank = -apply(1 - distr, 1, function(.x) sum(c(.x[1], diff(.x)) * x$time)) + x$crank = -apply(1 - distr, 1L, function(.x) sum(c(.x[1L], diff(.x)) * x$time)) } } diff --git a/R/autoplot.R b/R/autoplot.R index 37cd332b2..fe94e5cc0 100644 --- a/R/autoplot.R +++ b/R/autoplot.R @@ -211,7 +211,7 @@ autoplot.PredictionSurv = function(object, type = "calib", "calib" = { assert_task(task) if (is.null(times)) { - times = sort(unique(task$truth()[, 1])) + times = sort(unique(task$truth()[, 1L])) } if (inherits(object$distr, "VectorDistribution")) { @@ -220,7 +220,7 @@ autoplot.PredictionSurv = function(object, type = "calib", pred_surv = rowMeans(1 - object$distr$cdf(times)) } - km = mlr3::lrn("surv.kaplan") + km = lrn("surv.kaplan") km_pred = km$train(task, row_ids = row_ids)$predict(task, row_ids = row_ids) km_surv = rowMeans(1 - km_pred$distr$cdf(times)) diff --git a/R/breslow.R b/R/breslow.R index bb4807f30..c24b89b9a 100644 --- a/R/breslow.R +++ b/R/breslow.R @@ -83,7 +83,7 @@ #' learner = lrn("surv.coxph") #' learner$train(task, part$train) #' p_train = learner$predict(task, part$train) -#' p_test = learner$predict(task, part$test) +#' p_test = learner$predict(task, part$test) #' #' surv = breslow(times = task$times(part$train), status = task$status(part$train), #' lp_train = p_train$lp, lp_test = p_test$lp) @@ -142,13 +142,13 @@ breslow = function(times, status, lp_train, lp_test, eval_times = NULL, type = " event_times = sort(unique(times[status == 1])) # baseline (non-cumulative) hazards are first evaluated on the specific `event_times` - bhaz = vapply(event_times, function(et) { + bhaz = map_dbl(event_times, function(et) { sum(times[status == 1] == et) / sum(exp(lp[times >= et])) - }, numeric(1)) + }) # `eval_times` will be the sorted unique times (not just events) - eval_times = sort(unique(eval_times %||% times)) - if (length(event_times) == 0) { + eval_times = sort(unique(eval_times %??% times)) + if (length(event_times) == 0L) { # 0 events (training data has only censored observations!) res = numeric(length(eval_times)) } else { diff --git a/R/cindex.R b/R/cindex.R index a5ef00b98..ebb3ac47e 100644 --- a/R/cindex.R +++ b/R/cindex.R @@ -2,7 +2,7 @@ cindex = function(truth, crank, t_max = NULL, weight_meth = c("I", "G", "G2", "SG", "S"), tiex = 0.5, train = NULL, eps = 1e-3) { - if (length(unique(crank)) == 1) { + if (length(unique(crank)) == 1L) { return(0.5) } @@ -19,25 +19,25 @@ cindex = function(truth, crank, t_max = NULL, weight_meth = match.arg(weight_meth) if (weight_meth %in% c("I", "S")) { - cens = matrix(ncol = 2) + cens = matrix(ncol = 2L) } else { cens = survival::survfit(Surv(train[, "time"], 1 - train[, "status"]) ~ 1) - cens = matrix(c(cens$time, cens$surv), ncol = 2) + cens = matrix(c(cens$time, cens$surv), ncol = 2L) } - if (weight_meth == "SG" | weight_meth == "S") { + if (weight_meth == "SG" || weight_meth == "S") { surv = survival::survfit(train ~ 1) - surv = matrix(c(surv$time, surv$surv), ncol = 2) + surv = matrix(c(surv$time, surv$surv), ncol = 2L) } else { - surv = matrix(ncol = 2) + surv = matrix(ncol = 2L) } if (is.null(t_max)) { t_max = max(time) + 1 } - cens[cens[, 2] == 0, 2] = eps - surv[surv[, 2] == 0, 2] = eps + cens[cens[, 2L] == 0, 2L] = eps + surv[surv[, 2L] == 0, 2L] = eps c_concordance(time, status, crank[ord], t_max, weight_meth, cens, surv, tiex) } diff --git a/R/helpers.R b/R/helpers.R index 52bfa8122..a6d9b88a9 100644 --- a/R/helpers.R +++ b/R/helpers.R @@ -14,17 +14,17 @@ format_range = function(range) { # used in roxygen templates format_types = function(types) { - if (length(types) == 0) { + if (length(types) == 0L) { return("-") } else { - return(paste0(types, collapse = ", ")) + return(toString(types)) } } toproper = function(str, split = " ", fixed = TRUE) { str = strsplit(str, split, fixed) str = lapply(str, function(x) { - paste0(toupper(substr(x, 1, 1)), tolower(substr(x, 2, 1000)), collapse = split) + paste0(toupper(substr(x, 1L, 1L)), tolower(substr(x, 2L, 1000)), collapse = split) }) return(unlist(str)) } @@ -35,8 +35,8 @@ check_subsetpattern = function(x, choices, empty.ok = TRUE) { # nolint } else { return(sprintf( "Must be a subset of %s, but is %s", - paste0("{", paste0(choices, collapse = ", "), "}"), - paste0("{", paste0(x, collapse = ", "), "}"))) + paste0("{", toString(choices), "}"), + paste0("{", toString(x), "}"))) } } @@ -50,14 +50,6 @@ r6_private = function(x) { x$.__enclos_env__$private } -`%||%` = function(x, y) { - if (is.null(x)) { - y - } else { - x - } -} - ## used for plotting apply_theme = function(theme_object, default_object = NULL) { if (getOption("mlr3.theme", TRUE)) theme_object else default_object %??% geom_blank() diff --git a/R/integrated_scores.R b/R/integrated_scores.R index 53cc31c63..085d5c08c 100644 --- a/R/integrated_scores.R +++ b/R/integrated_scores.R @@ -23,13 +23,13 @@ weighted_survival_score = function(loss, truth, distribution, times = NULL, if (!is.null(p_max)) { surv = survival::survfit(truth ~ 1) indx = which(1 - (surv$n.risk / surv$n) > p_max) - if (length(indx) == 0) { + if (length(indx) == 0L) { # no indexes found, get last time point - t_max = tail(surv$time, n = 1) + t_max = tail(surv$time, n = 1L) } else { # first time point that surpasses the specified # `p_max` proportion of censoring - t_max = surv$time[indx[1]] + t_max = surv$time[indx[1L]] } } else if (is.null(t_max)) { t_max = max(unique_times) @@ -43,7 +43,7 @@ weighted_survival_score = function(loss, truth, distribution, times = NULL, unique_times = unique_times[unique_times <= t_max] # keep all the test set time points for the censoring distr via KM if no train data - all_times = truth[, "time"] + all_times = truth[, "time"] all_status = truth[, "status"] # get the cdf matrix (rows => times, cols => obs) @@ -51,7 +51,7 @@ weighted_survival_score = function(loss, truth, distribution, times = NULL, cdf = as.matrix(distribution$cdf(unique_times)) } else if (inherits(distribution, "array")) { - if (length(dim(distribution)) == 3) { + if (length(dim(distribution)) == 3L) { # survival 3d array, extract median surv_mat = .ext_surv_mat(arr = distribution, which.curve = 0.5) } else { # survival 2d array @@ -70,14 +70,14 @@ weighted_survival_score = function(loss, truth, distribution, times = NULL, } # apply `t_max` cutoff to the test set's (time, status) - true_times = all_times [all_times <= t_max] + true_times = all_times[all_times <= t_max] true_status = all_status[all_times <= t_max] - true_truth = Surv(true_times, true_status) + true_truth = Surv(true_times, true_status) assert_numeric(true_times, any.missing = FALSE) assert_numeric(unique_times, any.missing = FALSE) assert_matrix(cdf, nrows = length(unique_times), ncols = length(true_times), - any.missing = FALSE) + any.missing = FALSE) # Note that whilst we calculate the score for censored here, they are then # corrected in the weighting function `.c_weight_survival_score()` @@ -93,16 +93,16 @@ weighted_survival_score = function(loss, truth, distribution, times = NULL, if (is.null(train)) { cens = survival::survfit(Surv(all_times, 1 - all_status) ~ 1) } else { - train_times = train[, "time"] + train_times = train[, "time"] train_status = train[, "status"] cens = survival::survfit(Surv(train_times, 1 - train_status) ~ 1) } # G(t): KM estimate of the censoring distr - cens = matrix(c(cens$time, cens$surv), ncol = 2) + cens = matrix(c(cens$time, cens$surv), ncol = 2L) # filter time points based on `t_max` cutoff if (tmax_apply) { - cens = cens[cens[,1] <= t_max, , drop = FALSE] + cens = cens[cens[, 1L] <= t_max, , drop = FALSE] } score = .c_weight_survival_score(score, true_truth, unique_times, cens, proper, eps) @@ -114,16 +114,16 @@ weighted_survival_score = function(loss, truth, distribution, times = NULL, integrated_score = function(score, integrated, method = NULL) { # score is a matrix of BS(i,t) scores # rows => observations, cols => time points - if (ncol(score) == 1) { + if (ncol(score) == 1L) { integrated = FALSE } if (integrated) { # summary score (integrated across all time points) - if (method == 1) { + if (method == 1L) { score = as.numeric(score) return(mean(score[is.finite(score)], na.rm = TRUE)) # remove NAs and Infs - } else if (method == 2) { + } else if (method == 2L) { times = as.numeric(colnames(score)) lt = ncol(score) score = col_sums(score) # score(t) @@ -138,13 +138,13 @@ integrated_se = function(score, integrated) { if (integrated) { sqrt(sum(stats::cov(score), na.rm = TRUE) / (nrow(score) * ncol(score)^2)) } else { - apply(score, 2, function(x) stats::sd(x) / sqrt(nrow(score))) + apply(score, 2L, function(x) stats::sd(x) / sqrt(nrow(score))) } } # like colMeans(), but removing Infs, NAs and NaNs col_sums = function(mat) { - apply(mat, 2, function(x) { + apply(mat, 2L, function(x) { x = x[is.finite(x)] mean(x, na.rm = TRUE) }) diff --git a/R/pecs.R b/R/pecs.R index 250be4b6e..127405005 100644 --- a/R/pecs.R +++ b/R/pecs.R @@ -59,10 +59,10 @@ #' #' @export pecs = function(x, measure = c("graf", "logloss"), times, n, eps = NULL, ...) { - mlr3misc::require_namespaces("ggplot2") + require_namespaces("ggplot2") - if (!missing(times)) assertNumeric(times, min.len = 1) - if (!missing(n)) assertIntegerish(n, len = 1) + if (!missing(times)) assertNumeric(times, min.len = 1L) + if (!missing(n)) assertIntegerish(n, len = 1L) UseMethod("pecs", x) } @@ -76,7 +76,7 @@ pecs.list = function(x, measure = c("graf", "logloss"), times, n, eps = NULL, ta measure = match.arg(measure) if (is.null(eps)) { - eps = ifelse(measure == "graf", 1e-3, 1e-15) + eps = if (measure == "graf") 1e-3 else 1e-15 } else { assertNumeric(eps, lower = -1, upper = 1) } @@ -94,14 +94,14 @@ pecs.list = function(x, measure = c("graf", "logloss"), times, n, eps = NULL, ta true_times = sort(unique(task$truth()[, "time"])) times = .pec_times(true_times = true_times, times = times, n = n) - if (length(times) <= 1) { + if (length(times) <= 1L) { stop(sprintf( "Not enough `times` in the true observed times range: %s", - paste0("[", paste0(round(range(true_times), 3), collapse = ", "), "]"))) + paste0("[", toString(round(range(true_times), 3)), "]"))) } n = as.integer(!is.null(train_task)) + as.integer(!is.null(train_set)) - if (n == 1) { + if (n == 1L) { stop("Either 'train_task' and 'train_set' should be passed to measure or neither.") } else if (n) { train = train_task$truth(train_set) @@ -130,7 +130,7 @@ pecs.list = function(x, measure = c("graf", "logloss"), times, n, eps = NULL, ta }) } - times = as.numeric(names(scores[[1]])) + times = as.numeric(names(scores[[1L]])) scores = round(rbindlist(list(scores)), 4) colnames(scores) = sapply(x, function(y) gsub("surv.", "", y$id, fixed = TRUE)) scores$time = times @@ -148,16 +148,16 @@ pecs.PredictionSurv = function(x, measure = c("graf", "logloss"), times, n, eps measure = match.arg(measure) if (is.null(eps)) { - eps = ifelse(measure == "graf", 1e-3, 1e-15) + eps = if (measure == "graf") 1e-3 else 1e-15 } else { assertNumeric(eps, lower = -1, upper = 1) } - true_times = sort(unique(x$truth[, 1])) + true_times = sort(unique(x$truth[, 1L])) times = .pec_times(true_times = true_times, times = times, n = n) n = as.integer(!is.null(train_task)) + as.integer(!is.null(train_set)) - if (n == 1) { + if (n == 1L) { stop("Either 'train_task' and 'train_set' should be passed to measure or neither.") } else if (n) { train = train_task$truth(train_set) @@ -201,11 +201,11 @@ pecs.PredictionSurv = function(x, measure = c("graf", "logloss"), times, n, eps times[times > max(true_times)] = max(true_times) times[times < min(true_times)] = min(true_times) times = sort(unique(times)) - if (length(times) == 2) { + if (length(times) == 2L) { if (missing(n)) { - return(true_times[true_times >= times[1] & true_times <= times[2]]) + return(true_times[true_times >= times[1L] & true_times <= times[2L]]) } else { - return(seq(times[1], times[2], length.out = n)) + return(seq(times[1L], times[2L], length.out = n)) } } else { return(times) diff --git a/R/pipelines.R b/R/pipelines.R index c51bde81c..7f897b5fa 100644 --- a/R/pipelines.R +++ b/R/pipelines.R @@ -24,7 +24,7 @@ #' } #' } pipeline_survaverager = function(learners, param_vals = list(), graph_learner = FALSE) { - learners = mlr3pipelines::gunion(mlr3misc::map(learners, mlr3pipelines::as_graph)) + learners = mlr3pipelines::gunion(map(learners, mlr3pipelines::as_graph)) po = mlr3pipelines::po("survavg", param_vals = param_vals) gr = mlr3pipelines::`%>>%`(learners, po) diff --git a/R/plot_probregr.R b/R/plot_probregr.R index d79cb92c3..3cd265320 100644 --- a/R/plot_probregr.R +++ b/R/plot_probregr.R @@ -57,7 +57,7 @@ plot_probregr = function(p, n, type = c("point", "line", "both", "none"), xmax = ceiling(max(truth) + 3 * max(d$stdev())) x = seq(xmin, xmax, length.out = 100) - data_pred = suppressWarnings(cbind(x, data.table::melt(d$pdf(x)))) + data_pred = suppressWarnings(cbind(x, melt(d$pdf(x)))) if (rm_zero) data_pred[round(data_pred$value, 6) == 0, ] = NA variable = factor(d$strprint(), levels = d$strprint()) data_truth = data.frame(x = truth, variable = variable) diff --git a/R/surv_measures.R b/R/surv_measures.R index 1367d29a3..c30bb8b64 100644 --- a/R/surv_measures.R +++ b/R/surv_measures.R @@ -1,12 +1,12 @@ surv_logloss = function(truth, distr, eps = 1e-15, IPCW = TRUE, train = NULL, ...) { - event = truth[, 2] == 1 - all_times = truth[, 1] - event_times = truth[event, 1] + event = truth[, 2L] == 1 + all_times = truth[, 1L] + event_times = truth[event, 1L] # Bypass distr6 construction if underlying distr represented by array if (inherits(distr, "array")) { surv = distr - if (length(dim(surv)) == 3) { + if (length(dim(surv)) == 3L) { # survival 3d array, extract median surv = .ext_surv_mat(arr = surv, which.curve = 0.5) } @@ -22,9 +22,9 @@ surv_logloss = function(truth, distr, eps = 1e-15, IPCW = TRUE, train = NULL, .. ) } else { if (inherits(distr, c("Matdist", "Arrdist"))) { - pred = diag(distr$pdf(truth[, 1])) + pred = diag(distr$pdf(truth[, 1L])) } else { - pred = as.numeric(distr$pdf(data = matrix(truth[, 1], nrow = 1))) + pred = as.numeric(distr$pdf(data = matrix(truth[, 1L], nrow = 1L))) } } @@ -50,7 +50,7 @@ surv_logloss = function(truth, distr, eps = 1e-15, IPCW = TRUE, train = NULL, .. nrow = length(truth), byrow = TRUE) # Remove all censored observations - surv_km = surv_km[event,] + surv_km = surv_km[event, ] # calculate KM survival at event times extend_times_cdf = getFromNamespace("C_Vec_WeightedDiscreteCdf", ns = "distr6") @@ -74,8 +74,8 @@ surv_logloss = function(truth, distr, eps = 1e-15, IPCW = TRUE, train = NULL, .. surv_mse = function(truth, response) { assert_surv(truth) - uncensored = truth[, 2] == 1 - mse = (truth[uncensored, 1] - response[uncensored])^2 + uncensored = truth[, 2L] == 1 + mse = (truth[uncensored, 1L] - response[uncensored])^2 list( mse = mse, @@ -86,8 +86,8 @@ surv_mse = function(truth, response) { surv_mae = function(truth, response) { assert_surv(truth) - uncensored = truth[, 2] == 1 - mae = abs(truth[uncensored, 1] - response[uncensored]) + uncensored = truth[, 2L] == 1 + mae = abs(truth[uncensored, 1L] - response[uncensored]) list( mae = mae, diff --git a/R/surv_return.R b/R/surv_return.R index 4c805d173..7871edbb9 100644 --- a/R/surv_return.R +++ b/R/surv_return.R @@ -40,16 +40,14 @@ response = NULL, which.curve = NULL) { if (!is.null(surv)) { - if (class(surv)[1] == "numeric") { + if (class(surv)[1L] == "numeric") { # in case of a vector (one observation) convert to matrix - surv = matrix(surv, nrow = 1, dimnames = list(NULL, names(surv))) + surv = matrix(surv, nrow = 1L, dimnames = list(NULL, names(surv))) } - if (class(surv)[1] == "array") { - if (length(dim(surv)) != 3) { - stop("3D survival arrays supported only") - } + if (class(surv)[1L] == "array" && length(dim(surv)) != 3L) { + stop("3D survival arrays supported only") } - times = times %||% colnames(surv) + times = times %??% colnames(surv) if (length(times) != ncol(surv)) { stop("'times' must have the same length as the 2nd dimension (columns of 'surv')") } @@ -87,12 +85,12 @@ .ext_surv_mat = function(arr, which.curve) { # if NULL return the 'median' curve (default) if (is.null(which.curve)) { - return(array(apply(arr, c(1, 2), stats::quantile, 0.5), c(nrow(arr), ncol(arr)), - dimnames(arr)[c(1, 2)])) + return(array(apply(arr, 1:2, stats::quantile, 0.5), c(nrow(arr), ncol(arr)), + dimnames(arr)[1:2])) } # which.curve must be length 1 and either 'mean' or >0 - ok = (length(which.curve) == 1) && + ok = (length(which.curve) == 1L) && ((is.character(which.curve) && which.curve == "mean") || (is.numeric(which.curve) && which.curve > 0)) if (!ok) { @@ -107,13 +105,13 @@ # mean if (which.curve == "mean") { - apply(arr, c(1, 2), mean) - # curve chosen based on quantile + apply(arr, 1:2, mean) + # curve chosen based on quantile } else if (which.curve < 1) { - array(apply(arr, c(1, 2), stats::quantile, which.curve), c(nrow(arr), ncol(arr)), - dimnames(arr)[c(1, 2)]) - # curve chosen based on index + array(apply(arr, 1:2, stats::quantile, which.curve), c(nrow(arr), ncol(arr)), + dimnames(arr)[1:2]) + # curve chosen based on index } else { - array(arr[, , which.curve], c(nrow(arr), ncol(arr)), dimnames(arr)[c(1, 2)]) + array(arr[, , which.curve], c(nrow(arr), ncol(arr)), dimnames(arr)[1:2]) } } diff --git a/R/zzz.R b/R/zzz.R index 6a38bb1f3..d77a3d2ff 100644 --- a/R/zzz.R +++ b/R/zzz.R @@ -54,12 +54,12 @@ utils::globalVariables(c( .onUnload = function(libpath) { event = packageEvent("mlr3", "onLoad") hooks = getHook(event) - pkgname = vapply(hooks[-1], function(x) environment(x)$pkgname, NA_character_) + pkgname = map_chr(hooks[-1L], function(x) environment(x)$pkgname) setHook(event, hooks[pkgname != "mlr3proba"], action = "replace") event = packageEvent("mlr3pipelines", "onLoad") hooks = getHook(event) - pkgname = vapply(hooks[-1], function(x) environment(x)$pkgname, NA_character_) + pkgname = map_chr(hooks[-1L], function(x) environment(x)$pkgname) setHook(event, hooks[pkgname != "mlr3proba"], action = "replace") # unregister diff --git a/man/TaskDens.Rd b/man/TaskDens.Rd index 564e3bde7..84f4554f8 100644 --- a/man/TaskDens.Rd +++ b/man/TaskDens.Rd @@ -48,6 +48,7 @@ Other Task:
mlr3::Task$add_strata()
mlr3::Task$cbind()
mlr3::Task$data()
mlr3::Task$divide()
mlr3::Task$droplevels()
mlr3::Task$filter()
mlr3::Task$format()
mlr3::Task$add_strata()
mlr3::Task$cbind()
mlr3::Task$data()
mlr3::Task$divide()
mlr3::Task$droplevels()
mlr3::Task$filter()
mlr3::Task$format()