mlr-org
diff --git a/‎NEWS.md
Lines changed: 6 additions & 2 deletions b/‎NEWS.md
Lines changed: 6 additions & 2 deletions
diff --git a/‎R/MeasureSurvChamblessAUC.R
Lines changed: 1 addition & 0 deletions b/‎R/MeasureSurvChamblessAUC.R
Lines changed: 1 addition & 0 deletions
diff --git a/‎R/MeasureSurvCindex.R
Lines changed: 1 addition & 0 deletions b/‎R/MeasureSurvCindex.R
Lines changed: 1 addition & 0 deletions
diff --git a/‎R/MeasureSurvDCalibration.R
Lines changed: 11 additions & 8 deletions b/‎R/MeasureSurvDCalibration.R
Lines changed: 11 additions & 8 deletions
diff --git a/‎R/MeasureSurvGraf.R
Lines changed: 9 additions & 6 deletions b/‎R/MeasureSurvGraf.R
Lines changed: 9 additions & 6 deletions
diff --git a/‎R/MeasureSurvHungAUC.R
Lines changed: 1 addition & 0 deletions b/‎R/MeasureSurvHungAUC.R
Lines changed: 1 addition & 0 deletions
diff --git a/‎R/MeasureSurvIntLogloss.R
Lines changed: 9 additions & 6 deletions b/‎R/MeasureSurvIntLogloss.R
Lines changed: 9 additions & 6 deletions
diff --git a/‎R/MeasureSurvLogloss.R
Lines changed: 4 additions & 1 deletion b/‎R/MeasureSurvLogloss.R
Lines changed: 4 additions & 1 deletion
diff --git a/‎R/MeasureSurvSchmid.R
Lines changed: 9 additions & 13 deletions b/‎R/MeasureSurvSchmid.R
Lines changed: 9 additions & 13 deletions
diff --git a/‎R/MeasureSurvSongAUC.R
Lines changed: 1 addition & 0 deletions b/‎R/MeasureSurvSongAUC.R
Lines changed: 1 addition & 0 deletions
@@ -1,7 +1,11 @@
 # mlr3proba 0.7.1
 
-* Removed all `PipeOp`s and pipelines related to survival => regression reduction techniques (see #414)
-* Bug fix: `$predict_type` of `survtoclassif_disctime` and `survtoclassif_IPCW` was `prob` (classification type) and not `crank` (survival type)
+* cleanup: removed all `PipeOp`s and pipelines related to survival => regression reduction techniques (see #414)
+* fix: `$predict_type` of `survtoclassif_disctime` and `survtoclassif_IPCW` was `prob` (classification type) and not `crank` (survival type)
+* fix: G(t) is not filtered when `t_max|p_max` is specified in scoring rules (didn't influence evaluation at all)
+* docs: Clarified the use and impact of using `t_max` in scoring rules, added examples in scoring rules and AUC scores
+* feat: Added new argument `remove_obs` in scoring rules to remove observations with observed time `t > t_max` as a processing step to alleviate IPCW issues.
+This was before 'hard-coded' which made the Integrated Brier Score (`msr("surv.graf")`) differ minimally from other implementations and the original definition.
 
 # mlr3proba 0.7.0
 
 
@@ -15,6 +15,7 @@
 #'
 #' @family AUC survival measures
 #' @family lp survival measures
+#' @template example_auc_measures
 #' @export
 MeasureSurvChamblessAUC = R6Class("MeasureSurvChamblessAUC",
   inherit = MeasureSurvAUC,
 
@@ -69,6 +69,7 @@
 #'
 #' # Harrell's C-index evaluated up to a specific time horizon
 #' p$score(msr("surv.cindex", t_max = 97))
+#'
 #' # Harrell's C-index evaluated up to the time corresponding to 30% of censoring
 #' p$score(msr("surv.cindex", p_max = 0.3))
 #'
 
@@ -3,6 +3,8 @@
 #' @templateVar fullname MeasureSurvDCalibration
 #'
 #' @description
+#' `r lifecycle::badge("experimental")`
+#'
 #' This calibration method is defined by calculating the following statistic:
 #' \deqn{s = B/n \sum_i (P_i - n/B)^2}
 #' where \eqn{B} is number of 'buckets' (that equally divide \eqn{[0,1]} into intervals),
@@ -12,8 +14,8 @@
 #' falls within the corresponding interval.
 #' This statistic assumes that censoring time is independent of death time.
 #'
-#' A model is well-calibrated if \eqn{s \sim Unif(B)}, tested with `chisq.test`
-#'  (\eqn{p > 0.05} if well-calibrated).
+#' A model is well D-calibrated if \eqn{s \sim Unif(B)}, tested with `chisq.test`
+#'  (\eqn{p > 0.05} if well-calibrated, i.e. higher p-values are preferred).
 #' Model \eqn{i} is better calibrated than model \eqn{j} if \eqn{s(i) < s(j)},
 #' meaning that *lower values* of this measure are preferred.
 #'
@@ -23,7 +25,7 @@
 #' is well-calibrated. If `chisq = FALSE` and `s` is the predicted value then you can manually
 #' compute the p.value with `pchisq(s, B - 1, lower.tail = FALSE)`.
 #'
-#' NOTE: This measure is still experimental both theoretically and in implementation. Results
+#' **NOTE**: This measure is still experimental both theoretically and in implementation. Results
 #' should therefore only be taken as an indicator of performance and not for
 #' conclusive judgements about model calibration.
 #'
@@ -38,11 +40,12 @@
 #' You can manually get the p-value by executing `pchisq(s, B - 1, lower.tail = FALSE)`.
 #' The null hypothesis is that the model is D-calibrated.
 #' - `truncate` (`double(1)`) \cr
-#' This parameter controls the upper bound of the output statistic,
-#' when `chisq` is `FALSE`. We use `truncate = Inf` by default but \eqn{10} may be sufficient
-#' for most purposes, which corresponds to a p-value of 0.35 for the chisq.test using
-#' \eqn{B = 10} buckets. Values \eqn{>10} translate to even lower p-values and thus
-#' less calibrated models. If the number of buckets \eqn{B} changes, you probably will want to
+#' This parameter controls the upper bound of the output statistic, when `chisq` is `FALSE`.
+#' We use `truncate = Inf` by default but values between \eqn{10-16} are sufficient
+#' for most purposes, which correspond to p-values of \eqn{0.35-0.06} for the `chisq.test` using
+#' the default \eqn{B = 10} buckets.
+#' Values \eqn{B > 10} translate to even lower p-values and thus less D-calibrated models.
+#' If the number of buckets \eqn{B} changes, you probably will want to
 #' change the `truncate` value as well to correspond to the same p-value significance.
 #' Note that truncation may severely limit automated tuning with this measure.
 #'
 
@@ -11,6 +11,7 @@
 #' @templateVar eps 1e-3
 #' @template param_eps
 #' @template param_erv
+#' @template param_remove_obs
 #'
 #' @aliases MeasureSurvBrier mlr_measures_surv.brier
 #'
@@ -25,13 +26,13 @@
 #' survival function \eqn{S_i(t)}, the *observation-wise* loss integrated across
 #' the time dimension up to the time cutoff \eqn{\tau^*}, is:
 #'
-#' \deqn{L_{ISBS}(S_i, t_i, \delta_i) = \text{I}(t_i \leq \tau^*) \int^{\tau^*}_0  \frac{S_i^2(\tau) \text{I}(t_i \leq \tau, \delta=1)}{G(t_i)} + \frac{(1-S_i(\tau))^2 \text{I}(t_i > \tau)}{G(\tau)} \ d\tau}
+#' \deqn{L_{ISBS}(S_i, t_i, \delta_i) = \int^{\tau^*}_0  \frac{S_i^2(\tau) \text{I}(t_i \leq \tau, \delta_i=1)}{G(t_i)} + \frac{(1-S_i(\tau))^2 \text{I}(t_i > \tau)}{G(\tau)} \ d\tau}
 #'
 #' where \eqn{G} is the Kaplan-Meier estimate of the censoring distribution.
 #'
 #' The **re-weighted ISBS** (RISBS) is:
 #'
-#' \deqn{L_{RISBS}(S_i, t_i, \delta_i) = \delta_i \text{I}(t_i \leq \tau^*) \int^{\tau^*}_0  \frac{S_i^2(\tau) \text{I}(t_i \leq \tau) + (1-S_i(\tau))^2 \text{I}(t_i > \tau)}{G(t_i)} \ d\tau}
+#' \deqn{L_{RISBS}(S_i, t_i, \delta_i) = \delta_i \frac{\int^{\tau^*}_0  S_i^2(\tau) \text{I}(t_i \leq \tau) + (1-S_i(\tau))^2 \text{I}(t_i > \tau) \ d\tau}{G(t_i)}}
 #'
 #' which is always weighted by \eqn{G(t_i)} and is equal to zero for a censored subject.
 #'
@@ -48,10 +49,11 @@
 #' @template details_tmax
 #'
 #' @references
-#' `r format_bib("graf_1999")`
+#' `r format_bib("graf_1999", "sonabend2024", "kvamme2023")`
 #'
 #' @family Probabilistic survival measures
 #' @family distr survival measures
+#' @template example_scoring_rules
 #' @export
 MeasureSurvGraf = R6Class("MeasureSurvGraf",
   inherit = MeasureSurv,
@@ -73,11 +75,12 @@ MeasureSurvGraf = R6Class("MeasureSurvGraf",
         se = p_lgl(default = FALSE),
         proper = p_lgl(default = FALSE),
         eps = p_dbl(0, 1, default = 1e-3),
-        ERV = p_lgl(default = FALSE)
+        ERV = p_lgl(default = FALSE),
+        remove_obs = p_lgl(default = FALSE)
       )
       ps$set_values(
         integrated = TRUE, method = 2L, se = FALSE,
-        proper = FALSE, eps = 1e-3, ERV = ERV
+        proper = FALSE, eps = 1e-3, ERV = ERV, remove_obs = FALSE
       )
 
       range = if (ERV) c(-Inf, 1) else c(0, Inf)
@@ -132,7 +135,7 @@ MeasureSurvGraf = R6Class("MeasureSurvGraf",
         truth = prediction$truth,
         distribution = prediction$data$distr, times = times,
         t_max = ps$t_max, p_max = ps$p_max, proper = ps$proper, train = train,
-        eps = ps$eps
+        eps = ps$eps, remove_obs = ps$remove_obs
       )
 
       if (ps$se) {
 
@@ -15,6 +15,7 @@
 #'
 #' @family AUC survival measures
 #' @family lp survival measures
+#' @template example_auc_measures
 #' @export
 MeasureSurvHungAUC = R6Class("MeasureSurvHungAUC",
   inherit = MeasureSurvAUC,
 
@@ -11,6 +11,7 @@
 #' @templateVar eps 1e-3
 #' @template param_eps
 #' @template param_erv
+#' @template param_remove_obs
 #'
 #' @description
 #' Calculates the **Integrated Survival Log-Likelihood** (ISLL) or Integrated
@@ -23,13 +24,13 @@
 #' survival function \eqn{S_i(t)}, the *observation-wise* loss integrated across
 #' the time dimension up to the time cutoff \eqn{\tau^*}, is:
 #'
-#' \deqn{L_{ISLL}(S_i, t_i, \delta_i) = -\text{I}(t_i \leq \tau^*) \int^{\tau^*}_0  \frac{log[1-S_i(\tau)] \text{I}(t_i \leq \tau, \delta=1)}{G(t_i)} + \frac{\log[S_i(\tau)] \text{I}(t_i > \tau)}{G(\tau)} \ d\tau}
+#' \deqn{L_{ISLL}(S_i, t_i, \delta_i) = - \int^{\tau^*}_0  \frac{log[1-S_i(\tau)] \text{I}(t_i \leq \tau, \delta_i=1)}{G(t_i)} + \frac{\log[S_i(\tau)] \text{I}(t_i > \tau)}{G(\tau)} \ d\tau}
 #'
 #' where \eqn{G} is the Kaplan-Meier estimate of the censoring distribution.
 #'
 #' The **re-weighted ISLL** (RISLL) is:
 #'
-#' \deqn{L_{RISLL}(S_i, t_i, \delta_i) = -\delta_i \text{I}(t_i \leq \tau^*) \int^{\tau^*}_0  \frac{\log[1-S_i(\tau)]) \text{I}(t_i \leq \tau) + \log[S_i(\tau)] \text{I}(t_i > \tau)}{G(t_i)} \ d\tau}
+#' \deqn{L_{RISLL}(S_i, t_i, \delta_i) = -\delta_i \frac{\int^{\tau^*}_0  \log[1-S_i(\tau)]) \text{I}(t_i \leq \tau) + \log[S_i(\tau)] \text{I}(t_i > \tau) \ d\tau}{G(t_i)}}
 #'
 #' which is always weighted by \eqn{G(t_i)} and is equal to zero for a censored subject.
 #'
@@ -46,10 +47,11 @@
 #' @template details_tmax
 #'
 #' @references
-#' `r format_bib("graf_1999")`
+#' `r format_bib("graf_1999", "sonabend2024", "kvamme2023")`
 #'
 #' @family Probabilistic survival measures
 #' @family distr survival measures
+#' @template example_scoring_rules
 #' @export
 MeasureSurvIntLogloss = R6Class("MeasureSurvIntLogloss",
   inherit = MeasureSurv,
@@ -71,11 +73,12 @@ MeasureSurvIntLogloss = R6Class("MeasureSurvIntLogloss",
         se = p_lgl(default = FALSE),
         proper = p_lgl(default = FALSE),
         eps = p_dbl(0, 1, default = 1e-3),
-        ERV = p_lgl(default = FALSE)
+        ERV = p_lgl(default = FALSE),
+        remove_obs = p_lgl(default = FALSE)
       )
       ps$set_values(
         integrated = TRUE, method = 2L, se = FALSE,
-        proper = FALSE, eps = 1e-3, ERV = ERV
+        proper = FALSE, eps = 1e-3, ERV = ERV, remove_obs = FALSE
       )
 
       range = if (ERV) c(-Inf, 1) else c(0, Inf)
@@ -130,7 +133,7 @@ MeasureSurvIntLogloss = R6Class("MeasureSurvIntLogloss",
         truth = prediction$truth,
         distribution = prediction$data$distr, times = times,
         t_max = ps$t_max, p_max = ps$p_max, proper = ps$proper, train = train,
-        eps = ps$eps
+        eps = ps$eps, remove_obs = ps$remove_obs
       )
 
       if (ps$se) {
 
@@ -10,7 +10,7 @@
 #' Calculates the cross-entropy, or negative log-likelihood (NLL) or logarithmic (log), loss.
 #' @section Parameter details:
 #' - `IPCW` (`logical(1)`)\cr
-#' If `TRUE` (default) then returns the \eqn{L_{RNLL}} score (which is proper), otherwise the \eqn{L_{NLL}} score (improper).
+#' If `TRUE` (default) then returns the \eqn{L_{RNLL}} score (which is proper), otherwise the \eqn{L_{NLL}} score (improper). See Sonabend et al. (2024) for more details.
 #'
 #' @details
 #' The Log Loss, in the context of probabilistic predictions, is defined as the
@@ -33,6 +33,9 @@
 #'
 #' @template details_trainG
 #'
+#' @references
+#' `r format_bib("sonabend2024")`
+#'
 #' @family Probabilistic survival measures
 #' @family distr survival measures
 #' @export
 
@@ -11,6 +11,7 @@
 #' @templateVar eps 1e-3
 #' @template param_eps
 #' @template param_erv
+#' @template param_remove_obs
 #'
 #' @description
 #' Calculates the **Integrated Schmid Score** (ISS), aka integrated absolute loss.
@@ -22,27 +23,20 @@
 #' survival function \eqn{S_i(t)}, the *observation-wise* loss integrated across
 #' the time dimension up to the time cutoff \eqn{\tau^*}, is:
 #'
-#' \deqn{L_{ISS}(S_i, t_i, \delta_i) = \text{I}(t_i \leq \tau^*) \int^{\tau^*}_0  \frac{S_i(\tau) \text{I}(t_i \leq \tau, \delta=1)}{G(t_i)} + \frac{(1-S_i(\tau)) \text{I}(t_i > \tau)}{G(\tau)} \ d\tau}
+#' \deqn{L_{ISS}(S_i, t_i, \delta_i) = \int^{\tau^*}_0  \frac{S_i(\tau) \text{I}(t_i \leq \tau, \delta=1)}{G(t_i)} + \frac{(1-S_i(\tau)) \text{I}(t_i > \tau)}{G(\tau)} \ d\tau}
 #'
 #' where \eqn{G} is the Kaplan-Meier estimate of the censoring distribution.
 #'
 #' The **re-weighted ISS** (RISS) is:
 #'
-#' \deqn{L_{RISS}(S_i, t_i, \delta_i) = \delta_i \text{I}(t_i \leq \tau^*) \int^{\tau^*}_0  \frac{S_i(\tau) \text{I}(t_i \leq \tau) + (1-S_i(\tau)) \text{I}(t_i > \tau)}{G(t_i)} \ d\tau}
+#' \deqn{L_{RISS}(S_i, t_i, \delta_i) = \delta_i \frac{\int^{\tau^*}_0  S_i(\tau) \text{I}(t_i \leq \tau) + (1-S_i(\tau)) \text{I}(t_i > \tau) \ d\tau}{G(t_i)}}
 #'
 #' which is always weighted by \eqn{G(t_i)} and is equal to zero for a censored subject.
 #'
 #' To get a single score across all \eqn{N} observations of the test set, we
 #' return the average of the time-integrated observation-wise scores:
 #' \deqn{\sum_{i=1}^N L(S_i, t_i, \delta_i) / N}
 #'
-#'
-#' \deqn{L_{ISS}(S,t|t^*) = [(S(t^*))I(t \le t^*, \delta = 1)(1/G(t))] + [((1 - S(t^*)))I(t > t^*)(1/G(t^*))]}
-#' where \eqn{G} is the Kaplan-Meier estimate of the censoring distribution.
-#'
-#' The re-weighted ISS, RISS is given by
-#' \deqn{L_{RISS}(S,t|t^*) = [(S(t^*))I(t \le t^*, \delta = 1)(1/G(t))] + [((1 - S(t^*)))I(t > t^*)(1/G(t))]}
-#'
 #' @template properness
 #' @templateVar improper_id ISS
 #' @templateVar proper_id RISS
@@ -52,10 +46,11 @@
 #' @template details_tmax
 #'
 #' @references
-#' `r format_bib("schemper_2000", "schmid_2011")`
+#' `r format_bib("schemper_2000", "schmid_2011", "sonabend2024", "kvamme2023")`
 #'
 #' @family Probabilistic survival measures
 #' @family distr survival measures
+#' @template example_scoring_rules
 #' @export
 MeasureSurvSchmid = R6Class("MeasureSurvSchmid",
   inherit = MeasureSurv,
@@ -77,11 +72,12 @@ MeasureSurvSchmid = R6Class("MeasureSurvSchmid",
         se = p_lgl(default = FALSE),
         proper = p_lgl(default = FALSE),
         eps = p_dbl(0, 1, default = 1e-3),
-        ERV = p_lgl(default = FALSE)
+        ERV = p_lgl(default = FALSE),
+        remove_obs = p_lgl(default = FALSE)
       )
       ps$set_values(
         integrated = TRUE, method = 2L, se = FALSE,
-        proper = FALSE, eps = 1e-3, ERV = ERV
+        proper = FALSE, eps = 1e-3, ERV = ERV, remove_obs = FALSE
       )
 
       range = if (ERV) c(-Inf, 1) else c(0, Inf)
@@ -135,7 +131,7 @@ MeasureSurvSchmid = R6Class("MeasureSurvSchmid",
         truth = prediction$truth,
         distribution = prediction$data$distr, times = times,
         t_max = ps$t_max, p_max = ps$p_max, proper = ps$proper, train = train,
-        eps = ps$eps
+        eps = ps$eps, remove_obs = ps$remove_obs
       )
 
       if (ps$se) {
 
@@ -16,6 +16,7 @@
 #'
 #' @family AUC survival measures
 #' @family lp survival measures
+#' @template example_auc_measures
 #' @export
 MeasureSurvSongAUC = R6Class("MeasureSurvSongAUC",
   inherit = MeasureSurvAUC,