Skip to content

Commit 58eabcc

Browse files
author
hbaniecki
committed
update&test documentation #65
1 parent 3c8da6b commit 58eabcc

16 files changed

+485
-429
lines changed

.Rbuildignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,3 +18,4 @@ modelStudio_my_test.R
1818
^LICENSE$
1919
^CONTRIBUTING.md
2020
^\.github$
21+
^pickle$

.github/workflows/R-CMD-check.yaml

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -27,9 +27,9 @@ jobs:
2727
config:
2828
- {os: windows-latest, r: '3.6'}
2929
- {os: macOS-latest, r: '3.6'}
30-
- {os: macOS-latest, r: 'devel'}
3130
- {os: ubuntu-16.04, r: '3.5', rspm: "https://demo.rstudiopm.com/all/__linux__/xenial/latest"}
3231
- {os: ubuntu-16.04, r: '3.6', rspm: "https://demo.rstudiopm.com/all/__linux__/xenial/latest"}
32+
#- {os: macOS-latest, r: 'devel'} this is bugged
3333

3434
env:
3535
R_REMOTES_NO_ERRORS_FROM_WARNINGS: true
@@ -71,8 +71,6 @@ jobs:
7171
- name: Install dependencies
7272
run: |
7373
remotes::install_deps(dependencies = TRUE)
74-
remotes::install_github("https://github.com/r-lib/xml2")
75-
remotes::install_cran("processx")
7674
remotes::install_cran("rcmdcheck")
7775
shell: Rscript {0}
7876

.gitignore

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,3 +45,5 @@ modelStudio.Rproj
4545
#*.html
4646

4747
docs/*
48+
49+
*.pickle

R/modelStudio.R

Lines changed: 32 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -38,15 +38,15 @@
3838
#' @references
3939
#'
4040
#' \itemize{
41-
#' \item Wrapper for the function is implemented in \href{https://modeloriented.github.io/DALEX/}{\bold{DALEX}}
41+
#' \item The input object is implemented in \href{https://modeloriented.github.io/DALEX/}{\bold{DALEX}}
4242
#' \item Feature Importance, Ceteris Paribus, Partial Dependence and Accumulated Dependence plots
4343
#' are implemented in \href{https://modeloriented.github.io/ingredients/}{\bold{ingredients}}
4444
#' \item Break Down and Shapley Values plots are implemented in \href{https://modeloriented.github.io/iBreakDown/}{\bold{iBreakDown}}
4545
#' }
4646
#'
4747
#' @seealso
48-
#' Vignettes: \href{https://modeloriented.github.io/modelStudio/articles/vignette_examples.html}{\bold{modelStudio - R & python examples}}
49-
#' and \href{https://modeloriented.github.io/modelStudio/articles/vignette_modelStudio.html}{\bold{modelStudio - perks and features}}
48+
#' Vignettes: \href{https://modeloriented.github.io/modelStudio/articles/ms-r-python-examples.html}{\bold{modelStudio - R & Python examples}}
49+
#' and \href{(https://modeloriented.github.io/modelStudio/articles/ms-perks-features.html}{\bold{modelStudio - perks and features}}
5050
#'
5151
#' @examples
5252
#' library("DALEX")
@@ -55,9 +55,7 @@
5555
#' #:# ex1 classification on 'titanic_imputed' dataset
5656
#'
5757
#' # fit a model
58-
#' model_titanic <- glm(survived ~.,
59-
#' data = titanic_imputed,
60-
#' family = "binomial")
58+
#' model_titanic <- glm(survived ~., data = titanic_imputed, family = "binomial")
6159
#'
6260
#' # create an explainer for the model
6361
#' explainer_titanic <- explain(model_titanic,
@@ -70,8 +68,9 @@
7068
#' rownames(new_observations) <- c("Lucas","James")
7169
#'
7270
#' # make a studio for the model
73-
#' modelStudio(explainer_titanic, new_observations,
74-
#' N = 100, B = 10)
71+
#' modelStudio(explainer_titanic,
72+
#' new_observations,
73+
#' N = 100, B = 10) # faster example
7574
#'
7675
#' \donttest{
7776
#'
@@ -88,32 +87,46 @@
8887
#' rownames(new_apartments) <- c("ap1","ap2")
8988
#'
9089
#' # change dashboard dimensions and animation length
91-
#' modelStudio(explainer_apartments, new_apartments,
92-
#' facet_dim = c(2, 3), time = 800)
90+
#' modelStudio(explainer_apartments,
91+
#' new_apartments,
92+
#' facet_dim = c(2, 3),
93+
#' time = 800)
9394
#'
9495
#' # add information about true labels
95-
#' modelStudio(explainer_apartments, new_apartments,
96-
#' new_observation_y = apartments[1:2, 1])
96+
#' modelStudio(explainer_apartments,
97+
#' new_apartments,
98+
#' new_observation_y = new_apartments$m2.price)
9799
#'
98100
#' # don't compute EDA plots
99-
#' modelStudio(explainer_apartments, eda = FALSE)
101+
#' modelStudio(explainer_apartments,
102+
#' eda = FALSE)
100103
#'
101104
#'
102105
#' #:# ex3 xgboost model on 'HR' dataset
103106
#' library("xgboost")
104107
#'
105-
#' model_matrix <- model.matrix(status == "fired" ~ . -1, HR)
106-
#' data <- xgb.DMatrix(model_matrix, label = HR$status == "fired")
108+
#' # fit a model
109+
#' HR_matrix <- model.matrix(status == "fired" ~ . -1, HR)
110+
#'
111+
#' xgb_matrix <- xgb.DMatrix(HR_matrix, label = HR$status == "fired")
107112
#'
108113
#' params <- list(max_depth = 7, objective = "binary:logistic", eval_metric = "auc")
109114
#'
110-
#' model_HR <- xgb.train(params, data, nrounds = 300)
115+
#' model_HR <- xgb.train(params, xgb_matrix, nrounds = 300)
111116
#'
117+
#' # create an explainer for the model
112118
#' explainer_HR <- explain(model_HR,
113-
#' data = model_matrix,
114-
#' y = HR$status == "fired")
119+
#' data = HR_matrix,
120+
#' y = HR$status == "fired",
121+
#' label = "xgboost")
115122
#'
116-
#' modelStudio(explainer_HR)
123+
#' # pick observations
124+
#' new_observation <- HR_matrix[1:2, , drop=FALSE]
125+
#' rownames(new_observation) <- c("id1", "id2")
126+
127+
#' # make a studio for the model
128+
#' modelStudio(explainer_HR,
129+
#' new_observation)
117130
#'
118131
#' }
119132
#'

R/modelStudioOptions.R

Lines changed: 13 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -48,35 +48,35 @@
4848
#' @references
4949
#'
5050
#' \itemize{
51-
#' \item Wrapper for the function is implemented in \href{https://modeloriented.github.io/DALEX/}{\bold{DALEX}}
51+
#' \item The input object is implemented in \href{https://modeloriented.github.io/DALEX/}{\bold{DALEX}}
5252
#' \item Feature Importance, Ceteris Paribus, Partial Dependence and Accumulated Dependence plots
5353
#' are implemented in \href{https://modeloriented.github.io/ingredients/}{\bold{ingredients}}
5454
#' \item Break Down and Shapley Values plots are implemented in \href{https://modeloriented.github.io/iBreakDown/}{\bold{iBreakDown}}
5555
#' }
5656
#'
5757
#' @seealso
58-
#' Vignettes: \href{https://modeloriented.github.io/modelStudio/articles/vignette_examples.html}{\bold{modelStudio - R & python examples}}
59-
#' and \href{https://modeloriented.github.io/modelStudio/articles/vignette_modelStudio.html}{\bold{modelStudio - perks and features}}
58+
#' Vignettes: \href{https://modeloriented.github.io/modelStudio/articles/ms-r-python-examples.html}{\bold{modelStudio - R & Python examples}}
59+
#' and \href{(https://modeloriented.github.io/modelStudio/articles/ms-perks-features.html}{\bold{modelStudio - perks and features}}
6060
#'
6161
#' @examples
6262
#' library("DALEX")
6363
#' library("modelStudio")
6464
#'
6565
#' # fit a model
66-
#' model_apartments <- glm(m2.price ~. ,
67-
#' data = apartments)
66+
#' model_apartments <- glm(m2.price ~. , data = apartments)
6867
#'
6968
#' # create an explainer for the model
7069
#' explainer_apartments <- DALEX::explain(model_apartments,
7170
#' data = apartments,
72-
#' y = apartments$m2.price)
71+
#' y = apartments$m2.price,
72+
#' label = "glm")
7373
#'
7474
#' # pick observations
75-
#' new_apartments <- apartments[1:2,]
76-
#' rownames(new_apartments) <- c("ap1","ap2")
75+
#' new_observation <- apartments[1:2,]
76+
#' rownames(new_observation) <- c("ap1","ap2")
7777
#'
7878
#' # modify default options
79-
#' op <- modelStudioOptions(
79+
#' new_options <- modelStudioOptions(
8080
#' show_subtitle = TRUE,
8181
#' bd_subtitle = "Hello World",
8282
#' line_size = 5,
@@ -88,8 +88,10 @@
8888
#' )
8989
#'
9090
#' # make a studio for the model
91-
#' modelStudio(explainer_apartments, new_apartments,
92-
#' N = 100, B = 10, options = op)
91+
#' modelStudio(explainer_apartments,
92+
#' new_observation,
93+
#' options = new_options,
94+
#' N = 100, B = 10) # faster example
9395
#'
9496
#' @export
9597
#' @rdname modelStudioOptions

README.md

Lines changed: 40 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -10,13 +10,13 @@
1010

1111
The `modelStudio` package **automates the Explanatory Analysis of Machine Learning predictive models**. Generate advanced interactive and animated model explanations in the form of a **serverless HTML site** with only one line of code. This tool is model agnostic, therefore compatible with most of the black box predictive models and frameworks (e.g.&nbsp;`mlr/mlr3`, `xgboost`, `caret`, `h2o`, `scikit-learn`, `lightGBM`, `keras/tensorflow`).
1212

13-
The main `modelStudio()` function computes various (instance and dataset level) model explanations and produces an&nbsp**interactive,&nbsp;customisable dashboard made with D3.js**. It consists of multiple panels for plots with their short descriptions. Easily&nbsp;**save&nbsp;and&nbsp;share** the dashboard with others. Tools for model exploration unite with tools for EDA (Exploratory Data Analysis) to give a broad overview of the model behavior.
13+
The main `modelStudio()` function computes various (instance and dataset level) model explanations and produces an&nbsp;**interactive,&nbsp;customisable dashboard made with D3.js**. It consists of multiple panels for plots with their short descriptions. Easily&nbsp;**save&nbsp;and&nbsp;share** the dashboard with others. Tools for model exploration unite with tools for EDA (Exploratory Data Analysis) to give a broad overview of the model behavior.
1414

1515
<!--- [explain FIFA19](https://pbiecek.github.io/explainFIFA19/) &emsp; --->
1616
<!--- [explain Lung Cancer](https://github.com/hbaniecki/transparent_xai/) &emsp; --->
1717
&emsp; &emsp; &emsp; &emsp; &emsp; &emsp;
1818
[**explain FIFA20**](https://pbiecek.github.io/explainFIFA20/) &emsp;
19-
[**R & Python examples**](http://modelstudio.drwhy.ai/articles/vignette_examples.html) &emsp;
19+
[**R & Python examples**](http://modelstudio.drwhy.ai/articles/ms-r-python-examples.html) &emsp;
2020
[**More Resources**](http://modelstudio.drwhy.ai/#more-resources) &emsp;
2121
[**FAQ & Troubleshooting**](https://github.com/ModelOriented/modelStudio/issues/54)
2222

@@ -41,9 +41,7 @@ library("DALEX")
4141
library("modelStudio")
4242

4343
# fit a model
44-
model <- glm(survived ~.,
45-
data = titanic_imputed,
46-
family = "binomial")
44+
model <- glm(survived ~., data = titanic_imputed, family = "binomial")
4745

4846
# create an explainer for the model
4947
explainer <- explain(model,
@@ -59,18 +57,18 @@ modelStudio(explainer)
5957

6058
![](man/figures/long.gif)
6159

62-
## R & Python Examples [more](http://modelstudio.drwhy.ai/articles/vignette_examples.html)
60+
## R & Python Examples [more](http://modelstudio.drwhy.ai/articles/ms-r-python-examples.html)
6361

6462
The `modelStudio()` function uses `DALEX` explainers created with `DALEX::explain()` or `DALEXtra::explain_*()`.
6563

6664
```r
67-
# update main dependencies
68-
install.packages("ingredients")
69-
install.packages("iBreakDown")
70-
7165
# packages for explainer objects
7266
install.packages("DALEX")
7367
install.packages("DALEXtra")
68+
69+
# update main dependencies
70+
install.packages("ingredients")
71+
install.packages("iBreakDown")
7472
```
7573

7674
### mlr [dashboard](https://modeloriented.github.io/modelStudio/mlr.html)
@@ -87,19 +85,16 @@ data <- DALEX::titanic_imputed
8785

8886
# split the data
8987
index <- sample(1:nrow(data), 0.7*nrow(data))
90-
train <- data[index, ]
91-
test <- data[-index, ]
88+
train <- data[index,]
89+
test <- data[-index,]
9290

9391
# mlr ClassifTask takes target as factor
9492
train$survived <- as.factor(train$survived)
9593

9694
# fit a model
97-
task <- makeClassifTask(id = "titanic",
98-
data = train,
99-
target = "survived")
95+
task <- makeClassifTask(id = "titanic", data = train, target = "survived")
10096

101-
learner <- makeLearner("classif.ranger",
102-
predict.type = "prob")
97+
learner <- makeLearner("classif.ranger", predict.type = "prob")
10398

10499
model <- train(learner, task)
105100

@@ -110,7 +105,7 @@ explainer <- explain_mlr(model,
110105
label = "mlr")
111106

112107
# pick observations
113-
new_observation <- test[1:2, ]
108+
new_observation <- test[1:2,]
114109
rownames(new_observation) <- c("id1", "id2")
115110

116111
# make a studio for the model
@@ -132,17 +127,18 @@ data <- DALEX::titanic_imputed
132127

133128
# split the data
134129
index <- sample(1:nrow(data), 0.7*nrow(data))
135-
train <- data[index, ]
136-
test <- data[-index, ]
130+
train <- data[index,]
131+
test <- data[-index,]
137132

138133
train_matrix <- model.matrix(survived ~.-1, train)
139134
test_matrix <- model.matrix(survived ~.-1, test)
140135

141136
# fit a model
142137
xgb_matrix <- xgb.DMatrix(train_matrix, label = train$survived)
143-
params <- list(eta = 0.01, subsample = 0.6, max_depth = 7, min_child_weight = 3,
144-
objective = "binary:logistic", eval_metric = "auc")
145-
model <- xgb.train(params, xgb_matrix, nrounds = 1000)
138+
139+
params <- list(max_depth = 7, objective = "binary:logistic", eval_metric = "auc")
140+
141+
model <- xgb.train(params, xgb_matrix, nrounds = 500)
146142

147143
# create an explainer for the model
148144
explainer <- explain(model,
@@ -151,7 +147,7 @@ explainer <- explain(model,
151147
label = "xgboost")
152148

153149
# pick observations
154-
new_observation <- test_matrix[1:2,,drop=FALSE]
150+
new_observation <- test_matrix[1:2, , drop=FALSE]
155151
rownames(new_observation) <- c("id1", "id2")
156152

157153
# make a studio for the model
@@ -170,6 +166,11 @@ pip3 install dalex --force
170166

171167
Use `pickle` Python module and `reticulate` R package to easily make a studio for a model.
172168

169+
```{r eval = FALSE}
170+
# package for pickle load
171+
install.packages("reticulate")
172+
```
173+
173174
In this example we will fit a `Pipeline MLPClassifier` model on `titanic` data.
174175

175176
First, use `dalex` in Python:
@@ -193,45 +194,47 @@ y = data.survived
193194
X_train, X_test, y_train, y_test = train_test_split(X, y)
194195

195196
# fit a pipeline model
196-
numeric_features = ['age', 'fare', 'sibsp', 'parch']
197-
numeric_transformer = Pipeline(
197+
numerical_features = ['age', 'fare', 'sibsp', 'parch']
198+
numerical_transformer = Pipeline(
198199
steps=[
199200
('imputer', SimpleImputer(strategy='median')),
200201
('scaler', StandardScaler())
201-
]
202+
]
202203
)
203204
categorical_features = ['gender', 'class', 'embarked']
204205
categorical_transformer = Pipeline(
205206
steps=[
206207
('imputer', SimpleImputer(strategy='constant', fill_value='missing')),
207208
('onehot', OneHotEncoder(handle_unknown='ignore'))
208-
]
209+
]
209210
)
210211

211212
preprocessor = ColumnTransformer(
212213
transformers=[
213-
('num', numeric_transformer, numeric_features),
214+
('num', numerical_transformer, numerical_features),
214215
('cat', categorical_transformer, categorical_features)
215-
]
216+
]
216217
)
217218

219+
classifier = MLPClassifier(hidden_layer_sizes=(150,100,50), max_iter=500)
220+
218221
model = Pipeline(
219222
steps=[
220223
('preprocessor', preprocessor),
221-
('classifier', MLPClassifier(hidden_layer_sizes=(150,100,50), max_iter=500))
222-
]
224+
('classifier', classifier)
225+
]
223226
)
224227
model.fit(X_train, y_train)
225228

226229
# create an explainer for the model
227-
explainer = dx.Explainer(model, X_test, y_test, label = 'scikit-learn')
230+
explainer = dx.Explainer(model, data=X_test, y=y_test, label='scikit-learn')
228231

229232
#! remove residual_function before dump !
230233
explainer.residual_function = None
231234

232235
# pack the explainer into a pickle file
233236
import pickle
234-
pickle_out = open("explainer_scikitlearn.pickle","wb")
237+
pickle_out = open('explainer_scikitlearn.pickle', 'wb')
235238
pickle.dump(explainer, pickle_out)
236239
pickle_out.close()
237240
```
@@ -241,7 +244,7 @@ Then, use `modelStudio` in R:
241244
```r
242245
# load the explainer from the pickle file
243246
library(reticulate)
244-
explainer <- py_load_object('explainer_scikitlearn.pickle', pickle = "pickle")
247+
explainer <- py_load_object("explainer_scikitlearn.pickle", pickle = "pickle")
245248

246249
# make a studio for the model
247250
library(modelStudio)
@@ -261,9 +264,9 @@ or with [`r2d3::save_d3_html()`](https://rstudio.github.io/r2d3/articles/publish
261264

262265
- Theoretical introduction to the plots: [Explanatory Model Analysis. Explore, Explain and Examine Predictive Models.](https://pbiecek.github.io/ema)
263266

264-
- Vignette: [modelStudio - R & python examples](https://modeloriented.github.io/modelStudio/articles/vignette_examples.html)
267+
- Vignette: [modelStudio - R & Python examples](https://modeloriented.github.io/modelStudio/articles/ms-r-python-examples.html)
265268

266-
- Vignette: [modelStudio - perks and features](https://modeloriented.github.io/modelStudio/articles/vignette_modelStudio.html)
269+
- Vignette: [modelStudio - perks and features](https://modeloriented.github.io/modelStudio/articles/ms-perks-features.html)
267270

268271
- Conference poster: [MLinPL2019](misc/MLinPL2019_modelStudio_poster.pdf)
269272

0 commit comments

Comments
 (0)