mlr-org
diff --git a/‎DESCRIPTION
Lines changed: 2 additions & 1 deletion b/‎DESCRIPTION
Lines changed: 2 additions & 1 deletion
diff --git a/‎NAMESPACE
Lines changed: 1 addition & 0 deletions b/‎NAMESPACE
Lines changed: 1 addition & 0 deletions
diff --git a/‎NEWS.md
Lines changed: 1 addition & 4 deletions b/‎NEWS.md
Lines changed: 1 addition & 4 deletions
diff --git a/‎R/MeasureSurvCindex.R
Lines changed: 6 additions & 1 deletion b/‎R/MeasureSurvCindex.R
Lines changed: 6 additions & 1 deletion
diff --git a/‎R/MeasureSurvDCalibration.R
Lines changed: 119 additions & 0 deletions b/‎R/MeasureSurvDCalibration.R
Lines changed: 119 additions & 0 deletions
diff --git a/‎R/bibentries.R
Lines changed: 11 additions & 0 deletions b/‎R/bibentries.R
Lines changed: 11 additions & 0 deletions
diff --git a/‎R/zzz.R
Lines changed: 6 additions & 5 deletions b/‎R/zzz.R
Lines changed: 6 additions & 5 deletions
diff --git a/‎man/mlr_measures_surv.calib_alpha.Rd
Lines changed: 4 additions & 1 deletion b/‎man/mlr_measures_surv.calib_alpha.Rd
Lines changed: 4 additions & 1 deletion
diff --git a/‎man/mlr_measures_surv.calib_beta.Rd
Lines changed: 3 additions & 1 deletion b/‎man/mlr_measures_surv.calib_beta.Rd
Lines changed: 3 additions & 1 deletion
diff --git a/‎man/mlr_measures_surv.chambless_auc.Rd
Lines changed: 1 addition & 0 deletions b/‎man/mlr_measures_surv.chambless_auc.Rd
Lines changed: 1 addition & 0 deletions
@@ -1,6 +1,6 @@
 Package: mlr3proba
 Title: Probabilistic Supervised Learning for 'mlr3'
-Version: 0.4.0.9000
+Version: 0.4.0
 Authors@R:
     c(person(given = "Raphael",
              family = "Sonabend",
@@ -87,6 +87,7 @@ Collate:
     'MeasureSurvCalibrationBeta.R'
     'MeasureSurvChamblessAUC.R'
     'MeasureSurvCindex.R'
+    'MeasureSurvDCalibration.R'
     'MeasureSurvGraf.R'
     'MeasureSurvHungAUC.R'
     'MeasureSurvIntLogloss.R'
 
@@ -34,6 +34,7 @@ export(MeasureSurvCalibrationAlpha)
 export(MeasureSurvCalibrationBeta)
 export(MeasureSurvChamblessAUC)
 export(MeasureSurvCindex)
+export(MeasureSurvDCalibration)
 export(MeasureSurvGraf)
 export(MeasureSurvHungAUC)
 export(MeasureSurvIntLogloss)
 
@@ -1,14 +1,11 @@
-# mlr3proba 0.4.0.9000
-
-- Internal changes only.
-
 
 # mlr3proba 0.4.0
 
 * Deprecated measures from 0.2.0 have now been deleted.
 * IPCW measures such as `surv.graf`, `surv.schmid`, and `surv.intlogloss` now allow training data to be passed to the score function with `task` and `train_set` to allow the censoring distribution to be estimated on the training data. This is automatically applied for resample and benchmark results.
 * IPCW measures such as `surv.graf`, `surv.schmid`, and `surv.intlogloss` now include a parameter `proper` to determine what weighting scheme should be applied by the estimated censoring distribution, The current method (Graf, 1999) `proper = FALSE`, weights observations either by their event time or 'current' time depending if they're dead or not, the new method `proper = TRUE` weights observations by event time. The `proper = TRUE` method is strictly proper when censoring and survival times are independent and G is estimated on large enough data. The `proper = FALSE` method is never proper. The default is currently `proper = FALSE` to enable backward compatibility, this will be changed to `proper = TRUE` in v0.6.0.
 * The `rm_cens` parameter in `surv.logloss` has been deprecated in favour of `IPCW`. `rm_cens` will be removed in v0.6.0. If `rm_cens` or `IPCW` are `TRUE` then censored observations are removed and the score is weighted by an estimate of the censoring distribution at individual event times. Otherwise if `rm_cens` and `IPCW` are `FALSE` then no deletion or weighting takes place. The `IPCW = TRUE` method is strictly proper when censoring and survival times are independent and G is estimated on large enough data. The `ipcw = FALSE` method is never proper. 
+* Add `surv.dcalib` for the D-Calibration measure from Haider et al. (2020).
 
 # mlr3proba 0.3.2
 
 
@@ -18,7 +18,12 @@
 #'  * `"SG"` = Weights concordance by S/G (Shemper et al.)
 #'  * `"S"` = Weights concordance by S (Peto and Peto)
 #'
-#'  The last three require training data.
+#'  The last three require training data. `"GH"` is only applicable to [LearnerSurvCoxPH].
+#'
+#'  @details
+#'  The implementation is slightly different from [survival::concordance]. Firstly this
+#'  implementation is faster, and secondly the weights are computed on the training dataset whereas
+#'  in [survival::concordance] the weights are computed on the same testing data.
 #'
 #' @references
 #' `r format_bib("peto_1972", "harrell_1982", "goenen_2005", "schemper_2009", "uno_2011")`
 
@@ -0,0 +1,119 @@
+#' @template surv_measure
+#' @templateVar title D-Calibration
+#' @templateVar fullname MeasureSurvDCalibration
+#'
+#' @description
+#' This calibration method is defined by calculating
+#' \deqn{s = B/n \sum_i (P_i - n/B)^2}
+#' where \eqn{B} is number of 'buckets', \eqn{n} is the number of predictions,
+#' and \eqn{P_i} is the predicted number of deaths in the \eqn{i}th interval
+#' [0, 100/B), [100/B, 50/B),....,[(B - 100)/B, 1).
+#'
+#' A model is well-calibrated if `s ~ Unif(B)`, tested with `chisq.test`
+#'  (`p > 0.05` if well-calibrated).
+#' Model `i` is better calibrated than model `j` if `s_i < s_j`.
+#'
+#' @details
+#' This measure can either return the test statistic or the p-value from the `chisq.test`.
+#' The former is useful for model comparison whereas the latter is useful for determining if a model
+#' is well-calibration. If `chisq = FALSE` and `m` is the predicted value then you can manually
+#' compute the p.value with `pchisq(m, B - 1, lower.tail = FALSE)`.
+#'
+#' NOTE: This measure is still experimental both theoretically and in implementation. Results
+#' should therefore only be taken as an indicator of performance and not for
+#' conclusive judgements about model calibration.
+#'
+#' @references
+#' `r format_bib("haider_2020")`
+#'
+#' @family calibration survival measures
+#' @family distr survival measures
+#' @export
+MeasureSurvDCalibration = R6Class("MeasureSurvDCalibration",
+  inherit = MeasureSurv,
+  public = list(
+    #' @description Creates a new instance of this [R6][R6::R6Class] class.
+    #' @param B (`integer(1)`) \cr
+    #' Number of buckets to test for uniform predictions over. Default of `10` is recommended by
+    #' Haider et al. (2020).
+    #' @param chisq (`logical(1)`) \cr
+    #' If `TRUE` returns the p.value of the corresponding chisq.test instead of the measure.
+    #' Otherwise this can be performed manually with `pchisq(m, B - 1, lower.tail = FALSE)`.
+    #' `p > 0.05` indicates well-calibrated.
+    initialize = function(B = 10L, chisq = FALSE) {
+      super$initialize(
+        id = "surv.dcalib",
+        range = c(0, Inf),
+        minimize = TRUE,
+        predict_type = "distr",
+        man = "mlr3proba::mlr_measures_surv.dcalib",
+      )
+
+      private$.B = assert_integerish(B)
+      private$.chisq = assert_flag(chisq)
+    }
+  ),
+
+  active = list(
+    #' @field B (`integer(1)`) \cr
+    #' Number of buckets to test for uniform predictions over. Default of `10` is recommended by
+    #' Haider et al. (2020).
+    B = function(x) {
+      if (!missing(x)) {
+        private$.B = assert_integerish(x)
+      } else {
+        return(private$.B)
+      }
+    },
+
+    #' @field chisq `(logical(1))` \cr
+    #' If `TRUE` returns the p.value of the corresponding chisq.test instead of the measure.
+    #' Otherwise this can be performed manually with `pchisq(m, B - 1, lower.tail = FALSE)`.
+    #' `p > 0.05` indicates well-calibrated.
+    chisq = function(x) {
+      if (!missing(x)) {
+        private$.chisq = assert_flag(x)
+      } else {
+        return(private$.chisq)
+      }
+    }
+  ),
+
+  private = list(
+    .B = 10L,
+    .chisq = FALSE,
+    .score = function(prediction, ...) {
+
+      # initialize buckets
+      bj = numeric(self$B)
+      # predict individual probability of death at observed event time
+      si = as.numeric(prediction$distr$survival(data = matrix(prediction$truth[, 1L], nrow = 1L)))
+      # remove zeros
+      si = map_dbl(si, function(.x) max(.x, 1e-5))
+      # index of associated bucket
+      js = ceiling(self$B * si)
+
+      # could remove loop for dead observations but needed for censored ones and minimal overhead
+      # in combining both
+      for (i in seq_along(si)) {
+        ji = js[[i]]
+        if (prediction$truth[i, 2L] == 1L) {
+          # dead observations contribute 1 to their index
+          bj[ji] = bj[ji] + 1
+        } else {
+          # uncensored observations spread across buckets with most weighting on penultimate
+          for (k in seq.int(ji - 1)) {
+            bj[k] = bj[k] + 1/(self$B * si[[i]])
+          }
+          bj[ji] = bj[ji] + (1 - (ji - 1)/(self$B * si[[i]]))
+        }
+      }
+
+      if (self$chisq) {
+        return(stats::chisq.test(bj)$p.value)
+      } else {
+        return((self$B/length(si)) * sum((bj - length(si)/self$B)^2))
+      }
+    }
+  )
+)
@@ -599,5 +599,16 @@ bibentries = c( # nolint start
     url               = "https://www.jstor.org/stable/2335161",
     volume            = "66",
     year              = "1979"
+  ),
+
+  haider_2020         = bibentry("article",
+    author            = "Haider, Humza and Hoehn, Bret and Davis, Sarah and Greiner, Russell",
+    journal           = "Journal of Machine Learning Research",
+    volume            = "21",
+    number            = "85",
+    pages             = "1--63",
+    title             = "Effective Ways to Build and Evaluate Individual Survival Distributions",
+    url               = "http://jmlr.org/papers/v21/18-772.html",
+    year              = "2020"
   )
 ) # nolint end
@@ -97,6 +97,7 @@ register_mlr3 = function() {
 
    x$add("surv.cindex", MeasureSurvCindex)
 
+  x$add("surv.dcalib", MeasureSurvDCalibration)
   x$add("surv.calib_beta", MeasureSurvCalibrationBeta)
   x$add("surv.calib_alpha", MeasureSurvCalibrationAlpha)
 
@@ -166,12 +167,12 @@ register_mlr3pipelines = function() {
   pkgname = vapply(hooks[-1], function(x) environment(x)$pkgname, NA_character_)
   setHook(event, hooks[pkgname != "mlr3proba"], action = "replace")
 
-   event = packageEvent("mlr3pipelines", "onLoad")
-   hooks = getHook(event)
-   pkgname = vapply(hooks[-1], function(x) environment(x)$pkgname, NA_character_)
-   setHook(event, hooks[pkgname != "mlr3proba"], action = "replace")
+  event = packageEvent("mlr3pipelines", "onLoad")
+  hooks = getHook(event)
+  pkgname = vapply(hooks[-1], function(x) environment(x)$pkgname, NA_character_)
+  setHook(event, hooks[pkgname != "mlr3proba"], action = "replace")
 
-   library.dynam.unload("mlr3proba", libpath)
+  library.dynam.unload("mlr3proba", libpath)
 }
 
 leanify_package()