Skip to content

Commit 162a9ec

Browse files
committed
Merge branch 'dev' of github.com:iiasa/ibis.iSDM into dev
2 parents 2c09250 + 3bd181c commit 162a9ec

File tree

2 files changed

+76
-2
lines changed

2 files changed

+76
-2
lines changed

R/train.R

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -516,12 +516,14 @@ methods::setMethod(
516516

517517
# select only columns needed by equation
518518
if (model$biodiversity[[id]]$equation != "<Default>") {
519-
520519
env <- subset(env, select = c("ID", "x", "y", attr(stats::terms.formula(model$biodiversity[[id]]$equation),
521520
"term.labels")))
522-
523521
}
524522

523+
# Check for common issues and exclude variables if affected (could be outsourced)
524+
# --- Variance check ---
525+
env <- predictor_check(env)
526+
525527
# Remove missing values as several engines can't deal with those easily
526528
miss <- stats::complete.cases(env)
527529
if(sum( !miss )>0 && getOption('ibis.setupmessages', default = TRUE)) {

R/utils-predictors.R

Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1036,6 +1036,78 @@ makeBin <- function(v, n, nknots, cutoffs = NULL){
10361036
return(out)
10371037
}
10381038

1039+
#### Check predictors ----
1040+
1041+
#' Helper function to check extracted predictors for issues
1042+
#' @description
1043+
#' Here we check the variables in a provided [`data.frame`] for known issues.
1044+
#' Note that this is done vertically (per column) and not horizontally (thus removing observations).
1045+
#'
1046+
#' If any of the conditions are satistified the entire predictor is removed from the model!
1047+
#' @details
1048+
#' Specifically checked are:
1049+
#' [*] Whether all values in a column are \code{NA}.
1050+
#' [*] Whether all values in a column are finite.
1051+
#' [*] Whether the variance of all variables is greater than 0.
1052+
#'
1053+
#' @param env A [`data.frame`] with all predictor variables.
1054+
#' @return A [`data.frame`] potentially with any variable names excluded. If the
1055+
#' function fails due to some reason it returns the original \code{env}.
1056+
#'
1057+
#' @keywords utils
1058+
#'
1059+
#' @examples
1060+
#' \dontrun{
1061+
#' # Remove highly correlated predictors
1062+
#' env <- predictor_check( env )
1063+
#' }
1064+
#' @author Martin Jung
1065+
#' @noRd
1066+
predictor_check <- function(env){
1067+
assertthat::assert_that(
1068+
is.data.frame(env)
1069+
)
1070+
# Dummy copy
1071+
dummy <- env
1072+
1073+
# Check NaN
1074+
check_nan <- apply(env, 2, function(z) all(is.nan(z)))
1075+
if(any(check_nan)){
1076+
if(getOption('ibis.setupmessages', default = TRUE)) {
1077+
myLog('[Setup]','yellow', 'Excluded ', paste0(names(which(check_nan)),collapse = "; "),
1078+
' variables owing to exclusively NA data!' )
1079+
}
1080+
env <- env |> dplyr::select(-dplyr::any_of(names(which(check_nan))))
1081+
}
1082+
1083+
# Check inifinites
1084+
check_infinite <- apply(env, 2, function(z) any( is.infinite(z) ) )
1085+
if(any(check_infinite)){
1086+
if(getOption('ibis.setupmessages', default = TRUE)) {
1087+
myLog('[Setup]','yellow', 'Excluded ', paste0(names(which(check_infinite)),collapse = "; "),
1088+
' variables owing to observations with infinite values!' )
1089+
}
1090+
env <- env |> dplyr::select(-dplyr::any_of(names(which(check_infinite))))
1091+
}
1092+
1093+
# Check variance
1094+
check_var <- apply(env, 2, function(z) var(z, na.rm = TRUE)) == 0
1095+
if(any(check_var)){
1096+
if(getOption('ibis.setupmessages', default = TRUE)) {
1097+
myLog('[Setup]','yellow', 'Excluded ', paste0(names(which(check_var)),collapse = "; "),
1098+
' variables owing to zero variance!' )
1099+
}
1100+
env <- env |> dplyr::select(-dplyr::any_of(names(which(check_var))))
1101+
}
1102+
1103+
# Check whether all columns have been removed, if so revert back for safety?
1104+
if(ncol(env)==0) env <- dummy
1105+
rm(dummy)
1106+
1107+
# Return
1108+
return(env)
1109+
}
1110+
10391111
#### Filter predictor functions ----
10401112

10411113
#' Filter a set of correlated predictors to fewer ones

0 commit comments

Comments
 (0)