Skip to content

Commit 80b4190

Browse files
Merge pull request #107 from rsquaredacademy/develop
Develop
2 parents 442b1bf + 2e27512 commit 80b4190

File tree

134 files changed

+2284
-1594
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

134 files changed

+2284
-1594
lines changed

DESCRIPTION

Lines changed: 2 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,25 +1,22 @@
11
Package: blorr
22
Type: Package
33
Title: Tools for Developing Binary Logistic Regression Models
4-
Version: 0.2.2.9000
4+
Version: 0.3.0
55
Authors@R: person("Aravind", "Hebbali", email = "hebbali.aravind@gmail.com", role = c("aut", "cre"),
66
comment = c(ORCID = "0000-0001-9220-9669"))
77
Description: Tools designed to make it easier for beginner and intermediate users to build and validate
88
binary logistic regression models. Includes bivariate analysis, comprehensive regression output,
99
model fit statistics, variable selection procedures, model validation techniques and a 'shiny'
1010
app for interactive model building.
1111
Depends:
12-
R(>= 3.3)
12+
R(>= 3.5)
1313
Imports:
1414
car,
15-
caret,
1615
data.table,
17-
e1071,
1816
ggplot2,
1917
gridExtra,
2018
lest,
2119
Rcpp,
22-
scales,
2320
stats,
2421
utils
2522
Suggests:
@@ -40,5 +37,3 @@ Encoding: UTF-8
4037
LazyData: true
4138
RoxygenNote: 7.1.0
4239
LinkingTo: Rcpp
43-
Remotes:
44-
tidyverse/dplyr

NAMESPACE

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
S3method(blr_bivariate_analysis,default)
44
S3method(blr_coll_diag,default)
5+
S3method(blr_confusion_matrix,default)
56
S3method(blr_gains_table,default)
67
S3method(blr_model_fit_stats,default)
78
S3method(blr_multi_model_fit_stats,default)
@@ -30,6 +31,7 @@ S3method(plot,blr_step_p_forward)
3031
S3method(plot,blr_woe_iv)
3132
S3method(print,blr_bivariate_analysis)
3233
S3method(print,blr_coll_diag)
34+
S3method(print,blr_confusion_matrix)
3335
S3method(print,blr_gains_table)
3436
S3method(print,blr_model_fit_stats)
3537
S3method(print,blr_multi_model_fit_stats)
@@ -119,14 +121,12 @@ export(blr_woe_iv)
119121
export(blr_woe_iv_stats)
120122
importFrom(Rcpp,sourceCpp)
121123
importFrom(car,Anova)
122-
importFrom(caret,confusionMatrix)
123124
importFrom(data.table,":=")
124125
importFrom(data.table,.N)
125126
importFrom(data.table,data.table)
126127
importFrom(data.table,rbindlist)
127128
importFrom(data.table,setDF)
128129
importFrom(data.table,setorder)
129-
importFrom(e1071,classAgreement)
130130
importFrom(ggplot2,aes)
131131
importFrom(ggplot2,annotate)
132132
importFrom(ggplot2,element_blank)

NEWS.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,7 @@
1+
# blorr 0.3.0
2+
3+
This is a minor release to reduce package dependencies and fix bugs.
4+
15
# blorr 0.2.2
26

37
This is a patch release to fix CRAN errors.

R/blr-bivariate-analysis.R

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -331,13 +331,18 @@ plot.blr_segment_dist <- function(x, title = NA, xaxis_title = "Levels",
331331
geom_col(aes(y = `n%`), fill = bar_color) +
332332
geom_line(aes(y = `1s%`, group = 1), color = line_color) +
333333
xlab(xaxis_title) + ggtitle(plot_title) + ylab(yaxis_title) +
334-
scale_y_continuous(labels = scales::percent,
335-
sec.axis = sec_axis(~. / sec_axis_scale, name = sec_yaxis_title,
336-
labels = scales::percent))
334+
scale_y_continuous(
335+
breaks = seq(0, 1, by = 0.1),
336+
labels = paste0(seq(0, 1, by = 0.1) * 100, '%'),
337+
sec.axis = sec_axis(
338+
trans = ~.,
339+
breaks = seq(0, 1, by = 0.1),
340+
labels = paste0(seq(0, 1, by = 0.1) * 100, '%'),
341+
name = sec_yaxis_title))
337342

338343
if (print_plot) {
339344
print(p)
340-
}
345+
}
341346

342347
invisible(p)
343348
}
@@ -347,6 +352,6 @@ secondary_axis_scale_comp <- function(x) {
347352

348353
d <- x$dist_table
349354
d$sec <- d$`n%` / d$`1s%`
350-
min(d$sec)
355+
max(d$sec)
351356

352357
}

R/blr-gains-table.R

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -100,8 +100,8 @@ plot.blr_gains_table <- function(x, title = "Lift Chart", xaxis_title = "% Popul
100100
geom_line(aes(x = cum_total_per, y = cum_1s_per), color = lift_curve_col) +
101101
geom_line(aes(x = cum_total_per, y = cum_total_y), color = diag_line_col) +
102102
ggtitle(title) + xlab(xaxis_title) + ylab(yaxis_title) +
103-
scale_x_continuous(labels = scales::percent) +
104-
scale_y_continuous(labels = scales::percent) +
103+
scale_x_continuous(labels = c('0%', '25%', '50%', '75%', '100%')) +
104+
scale_y_continuous(labels = c('0%', '25%', '50%', '75%', '100%')) +
105105
theme(plot.title = element_text(hjust = plot_title_justify))
106106

107107
if (print_plot) {
@@ -168,8 +168,8 @@ blr_ks_chart <- function(gains_table, title = "KS Chart", yaxis_title = " ",
168168
annotate("text", x = annotate_x, y = annotate_y,
169169
label = paste0("KS: ", ks_stat, "%")) +
170170
ggtitle(title) + xlab(xaxis_title) + ylab(yaxis_title) +
171-
scale_x_continuous(labels = scales::percent) +
172-
scale_y_continuous(labels = scales::percent) +
171+
scale_x_continuous(labels = c('0%', '25%', '50%', '75%', '100%')) +
172+
scale_y_continuous(labels = c('0%', '25%', '50%', '75%', '100%')) +
173173
theme(plot.title = element_text(hjust = 0.5),
174174
legend.title = element_blank())
175175

R/blr-model-validation.R

Lines changed: 84 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,11 @@
11
#' Confusion matrix
22
#'
3-
#' Wrapper for \code{confMatrix} from the caret package.
3+
#' Confusion matrix and statistics.
44
#'
55
#' @param model An object of class \code{glm}.
66
#' @param data A \code{tibble} or a \code{data.frame}.
77
#' @param cutoff Cutoff for classification.
8+
#' @param ... Other arguments.
89
#'
910
#' @return Confusion matix.
1011
#'
@@ -14,20 +15,22 @@
1415
#'
1516
#' blr_confusion_matrix(model, cutoff = 0.4)
1617
#'
17-
#' @importFrom caret confusionMatrix
18-
#' @importFrom e1071 classAgreement
19-
#'
2018
#' @family model validation techniques
2119
#'
2220
#' @export
2321
#'
24-
blr_confusion_matrix <- function(model, cutoff = 0.5, data = NULL) {
22+
blr_confusion_matrix <- function(model, cutoff = 0.5, data = NULL, ...) UseMethod("blr_confusion_matrix")
23+
24+
#' @rdname blr_confusion_matrix
25+
#' @export
26+
#'
27+
blr_confusion_matrix.default <- function(model, cutoff = 0.5, data = NULL, ...) {
2528

2629
blr_check_model(model)
2730
blr_check_values(cutoff, 0, 1)
2831

2932
namu <- formula(model)[[2]]
30-
33+
3134
if (is.null(data)) {
3235
data <- model$model
3336
response <- data[[1]]
@@ -38,7 +41,81 @@ blr_confusion_matrix <- function(model, cutoff = 0.5, data = NULL) {
3841

3942
p_data <- predict(model, newdata = data, type = "response")
4043
c_data <- as.factor(as.numeric(p_data > cutoff))
44+
out <- table(Prediction = c_data, Reference = response)
45+
46+
a <- out[4]
47+
b <- out[2]
48+
c <- out[3]
49+
d <- out[1]
50+
51+
accuracy <- (a + d) / (a + b + c + d)
52+
no_inf_rate <- table(response)[[1]] / sum(table(response))
53+
sensitivity <- a / (a + c)
54+
specificity <- d / (b + d)
55+
prevalence <- (a + c) / (a + b + c + d)
56+
detect_rate <- a / (a + b + c + d)
57+
detect_prev <- (a + b) / (a + b + c + d)
58+
bal_accuracy <- (sensitivity + specificity) / 2
59+
precision <- a / (a + b)
60+
recall <- a / (a + c)
61+
kappa <- blr_kappa(out)
62+
mcnemar_p <- stats::mcnemar.test(out)$p.value
63+
64+
ppv <- (sensitivity * prevalence) / ((sensitivity * prevalence) +
65+
((1 - specificity) * (1 - prevalence)))
66+
67+
npv <- specificity * (1 - prevalence) / (((1 - sensitivity) * prevalence) +
68+
(specificity * (1 - prevalence)))
4169

42-
confusionMatrix(data = c_data, reference = response, positive = '1')
70+
result <- list(
71+
accuracy = accuracy,
72+
balanced_accuracy = bal_accuracy,
73+
conf_matrix = out,
74+
detection_prevalence = detect_prev,
75+
detection_rate = detect_rate,
76+
mcnemar_kappa = kappa,
77+
mcnemar_test_p_val = mcnemar_p,
78+
negative_predicted_value = npv,
79+
no_information_rate = no_inf_rate,
80+
positive_predicted_value = ppv,
81+
precision = precision,
82+
prevalence = prevalence,
83+
recall = recall,
84+
sensitivity = sensitivity,
85+
specificity = specificity)
86+
87+
class(result) <- "blr_confusion_matrix"
88+
return(result)
89+
90+
}
91+
92+
#' @export
93+
#'
94+
print.blr_confusion_matrix <- function(x, ...) {
95+
96+
cat('Confusion Matrix and Statistics', '\n\n')
97+
print(x$conf_matrix)
98+
cat('\n\n')
99+
cat(' Accuracy :', format(round(x$accuracy, 4), nsmall = 4), '\n')
100+
cat(' No Information Rate :', format(round(x$no_information_rate, 4), nsmall = 4), '\n\n')
101+
cat(' Kappa :', format(round(x$mcnemar_kappa, 4), nsmall = 4), '\n\n')
102+
cat("McNemars's Test P-Value :", format(round(x$mcnemar_test_p_val, 4), nsmall = 4), '\n\n')
103+
cat(' Sensitivity :', format(round(x$sensitivity, 4), nsmall = 4), '\n')
104+
cat(' Specificity :', format(round(x$specificity, 4), nsmall = 4), '\n')
105+
cat(' Pos Pred Value :', format(round(x$positive_predicted_value, 4), nsmall = 4), '\n')
106+
cat(' Neg Pred Value :', format(round(x$negative_predicted_value, 4), nsmall = 4), '\n')
107+
cat(' Prevalence :', format(round(x$prevalence, 4), nsmall = 4), '\n')
108+
cat(' Detection Rate :', format(round(x$detection_rate, 4), nsmall = 4), '\n')
109+
cat(' Detection Prevalence :', format(round(x$detection_prevalence, 4), nsmall = 4), '\n')
110+
cat(' Balanced Accuracy :', format(round(x$balanced_accuracy, 4), nsmall = 4), '\n')
111+
cat(' Precision :', format(round(x$precision, 4), nsmall = 4), '\n')
112+
cat(' Recall :', format(round(x$recall, 4), nsmall = 4), '\n\n')
113+
cat(" 'Positive' Class : 1")
114+
115+
}
43116

117+
blr_kappa <- function(out) {
118+
agreement <- sum(diag(out)) / sum(out)
119+
expected <- sum(rowSums(out) * colSums(out)) / (sum(out) ^ 2)
120+
(agreement - expected) / (1 - expected)
44121
}

R/blr-roc-curve.R

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -53,8 +53,9 @@ blr_roc_curve <- function(gains_table, title = "ROC Curve",
5353
ggplot(plot_data, aes(x = `1 - specificity`, y = sensitivity_per)) +
5454
geom_point(shape = point_shape, fill = point_fill, color = point_color) +
5555
geom_line(color = roc_curve_col) + ggtitle(title) +
56-
scale_x_continuous(labels = scales::percent) + xlab(xaxis_title) +
57-
scale_y_continuous(labels = scales::percent) + ylab(yaxis_title) +
56+
scale_x_continuous(labels = c('0%', '25%', '50%', '75%', '100%')) +
57+
scale_y_continuous(labels = c('0%', '25%', '50%', '75%', '100%')) +
58+
xlab(xaxis_title) + ylab(yaxis_title) +
5859
theme(plot.title = element_text(hjust = plot_title_justify)) +
5960
geom_line(aes(x = `1 - specificity`, y = `1 - specificity`),
6061
color = diag_line_col)

R/blr-stepwise-backward-regression.R

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -99,10 +99,6 @@ blr_step_p_backward.default <- function(model, prem = 0.3, details = FALSE, ...)
9999
m <- glm(paste(response, "~", paste(preds, collapse = " + ")), l, family = binomial(link = 'logit'))
100100
m_sum <- Anova(m, test.statistic = "Wald")
101101
pvals <- m_sum$`Pr(>Chisq)`
102-
# m_sum <- summary(m)
103-
# pvals <- unname(m_sum$coefficients[, 4])[-1]
104-
# m <- ols_regress(paste(response, "~", paste(preds, collapse = " + ")), l)
105-
# pvals <- m$pvalues[-1]
106102
maxp <- which(pvals == max(pvals))
107103

108104
suppressWarnings(

R/blr-stepwise-forward-regression.R

Lines changed: 0 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -160,18 +160,9 @@ blr_step_p_forward.default <- function(model, penter = 0.3, details = FALSE, ...
160160
m_sum <- Anova(m, test.statistic = "Wald")
161161
pvals[i] <- m_sum$`Pr(>Chisq)`[ppos]
162162
tvals[i] <- m_sum$Chisq[ppos]
163-
# m_sum <- summary(m)
164-
# pvals[i] <- unname(m_sum$coefficients[, 4])[ppos]
165-
# tvals[i] <- unname(m_sum$coefficients[, 3])[ppos]
166-
# m <- blr_regress(paste(response, "~",
167-
# paste(predictors, collapse = " + ")), l)
168-
# pvals[i] <- m$pval[ppos]
169-
# tvals[i] <- m$zval[ppos]
170163
}
171164

172165
minp <- which(pvals == min(pvals))
173-
# tvals <- abs(tvals)
174-
# maxt <- which(tvals == max(tvals))
175166

176167
if (pvals[minp] <= penter) {
177168

R/blr-stepwise-regression.R

Lines changed: 0 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -114,36 +114,16 @@ blr_step_p_both.default <- function(model, pent = 0.1, prem = 0.3, details = FAL
114114
m_sum <- Anova(m, test.statistic = "Wald")
115115
pvals[i] <- m_sum$`Pr(>Chisq)`[ppos]
116116
tvals[i] <- m_sum$Chisq[ppos]
117-
# m_sum <- summary(m)
118-
# pvals[i] <- unname(m_sum$coefficients[, 4])[ppos]
119-
# tvals[i] <- unname(m_sum$coefficients[, 3])[ppos]
120-
# m <- ols_regress(paste(response, "~", paste(predictors, collapse = " + ")), l)
121-
# pvals[i] <- m$pvalues[ppos]
122-
# tvals[i] <- m$tvalues[ppos]
123117
}
124118

125119
minp <- which(pvals == min(pvals))
126-
# tvals <- abs(tvals)
127-
# maxt <- which(tvals == max(tvals))
128120
preds <- all_pred[minp]
129121
lpreds <- length(preds)
130122
fr <- glm(paste(response, "~", paste(preds, collapse = " + ")), l, family = binomial(link = 'logit'))
131123
mfs <- blr_model_fit_stats(fr)
132124
aic <- mfs$m_aic
133125
bic <- mfs$m_bic
134126
dev <- mfs$m_deviance
135-
# fr <- ols_regress(paste(response, "~",
136-
# paste(preds, collapse = " + ")), l)
137-
# rsq <- fr$rsq
138-
# adjrsq <- fr$adjr
139-
# cp <- ols_mallows_cp(fr$model, model)
140-
# aic <- ols_aic(fr$model)
141-
# sbc <- ols_sbc(fr$model)
142-
# sbic <- ols_sbic(fr$model, model)
143-
# rmse <- sqrt(fr$ems)
144-
# betas <- append(betas, fr$betas)
145-
# lbetas <- append(lbetas, length(fr$betas))
146-
# pvalues <- append(pvalues, fr$pvalues)
147127

148128
if (details) {
149129
cat("\n")
@@ -186,18 +166,9 @@ blr_step_p_both.default <- function(model, pent = 0.1, prem = 0.3, details = FAL
186166
m_sum <- Anova(m, test.statistic = "Wald")
187167
pvals[i] <- m_sum$`Pr(>Chisq)`[ppos]
188168
tvals[i] <- m_sum$Chisq[ppos]
189-
# m_sum <- summary(m)
190-
# pvals[i] <- unname(m_sum$coefficients[, 4])[ppos]
191-
# tvals[i] <- unname(m_sum$coefficients[, 3])[ppos]
192-
# m <- ols_regress(paste(response, "~",
193-
# paste(predictors, collapse = " + ")), l)
194-
# pvals[i] <- m$pvalues[ppos]
195-
# tvals[i] <- m$tvalues[ppos]
196169
}
197170

198171
minp <- which(pvals == min(pvals))
199-
# tvals <- abs(tvals)
200-
# maxt <- which(tvals == max(tvals))
201172

202173
if (pvals[minp] <= pent) {
203174

@@ -211,18 +182,6 @@ blr_step_p_both.default <- function(model, pent = 0.1, prem = 0.3, details = FAL
211182
aic <- c(aic, mfs$m_aic)
212183
bic <- c(bic, mfs$m_bic)
213184
dev <- c(dev, mfs$m_deviance)
214-
# fr <- ols_regress(paste(response, "~",
215-
# paste(preds, collapse = " + ")), l)
216-
# rsq <- c(rsq, fr$rsq)
217-
# adjrsq <- c(adjrsq, fr$adjr)
218-
# aic <- c(aic, ols_aic(fr$model))
219-
# sbc <- c(sbc, ols_sbc(fr$model))
220-
# sbic <- c(sbic, ols_sbic(fr$model, model))
221-
# cp <- c(cp, ols_mallows_cp(fr$model, model))
222-
# rmse <- c(rmse, sqrt(fr$ems))
223-
# betas <- append(betas, fr$betas)
224-
# lbetas <- append(lbetas, length(fr$betas))
225-
# pvalues <- append(pvalues, fr$pvalues)
226185

227186
if (details == TRUE) {
228187
cat("\n")
@@ -255,8 +214,6 @@ blr_step_p_both.default <- function(model, pent = 0.1, prem = 0.3, details = FAL
255214
family = binomial(link = 'logit'))
256215
m_sum <- Anova(m2, test.statistic = "Wald")
257216
pvals_r <- m_sum$`Pr(>Chisq)`
258-
# tvals_r <- m_sum$Chisq[ppos]
259-
# tvals_r <- abs(unname(m_sum$coefficients[, 3])[-1])
260217
maxp <- which(pvals_r == max(pvals_r))
261218
if (pvals_r[maxp] > prem) {
262219

@@ -271,16 +228,6 @@ blr_step_p_both.default <- function(model, pent = 0.1, prem = 0.3, details = FAL
271228
aic <- c(aic, mfs$m_aic)
272229
bic <- c(bic, mfs$m_bic)
273230
dev <- c(dev, mfs$m_deviance)
274-
# rsq <- c(rsq, fr$rsq)
275-
# adjrsq <- c(adjrsq, fr$adjr)
276-
# aic <- c(aic, ols_aic(fr$model))
277-
# sbc <- c(sbc, ols_sbc(fr$model))
278-
# sbic <- c(sbic, ols_sbic(fr$model, model))
279-
# cp <- c(cp, ols_mallows_cp(fr$model, model))
280-
# rmse <- c(rmse, sqrt(fr$ems))
281-
# betas <- append(betas, fr$betas)
282-
# lbetas <- append(lbetas, length(fr$betas))
283-
# pvalues <- append(pvalues, fr$pvalues)
284231

285232
if (details) {
286233
cat("\n")

0 commit comments

Comments
 (0)