7
7
[ ![ DecisionTree] ( http://pkg.julialang.org/badges/DecisionTree_0.6.svg )] ( http://pkg.julialang.org/?pkg=DecisionTree&ver=0.6 )
8
8
[ ![ DecisionTree] ( http://pkg.julialang.org/badges/DecisionTree_0.7.svg )] ( http://pkg.julialang.org/?pkg=DecisionTree&ver=0.7 )
9
9
10
- Julia implementation of Decision Trees & Random Forests
10
+ Julia implementation of Decision Tree and Random Forest algorithms
11
11
12
12
## Classification
13
13
* pre-pruning (max depth, min leaf size)
@@ -72,7 +72,7 @@ Also have a look at these [classification](https://github.com/cstjean/ScikitLear
72
72
73
73
## Native API
74
74
### Classification Example
75
- Pruned Tree Classifier
75
+ Decision Tree Classifier
76
76
``` julia
77
77
# train full-tree classifier
78
78
model = build_tree (labels, features)
@@ -87,11 +87,21 @@ apply_tree_proba(model, [5.9,3.0,5.1,1.9], ["setosa", "versicolor", "virginica"]
87
87
# run n-fold cross validation for pruned tree,
88
88
# using 90% purity threshold pruning, and 3 CV folds
89
89
accuracy = nfoldCV_tree (labels, features, 0.9 , 3 )
90
+
91
+ # set of classification build_tree() parameters and respective default values
92
+ # max_depth: maximum depth of the decision tree (default: -1, no maximum)
93
+ # min_samples_leaf: the minimum number of samples each leaf needs to have (default: 1)
94
+ # min_samples_split: the minimum number of samples in needed for a split (default: 2)
95
+ # min_purity_increase: minimum purity needed for a split (default: 0.0)
96
+ # nsubfeatures: number of features to select at random (default: 0, keep all)
97
+ nsubfeatures= 0 ; maxdepth= - 1 ; min_samples_leaf= 1 ; min_samples_split= 2 ; min_purity_increase= 0.0 ;
98
+ model = build_tree (labels, features, nsubfeatures, maxdepth, min_samples_leaf, min_samples_split, min_purity_increase)
99
+
90
100
```
91
101
Random Forest Classifier
92
102
``` julia
93
103
# train random forest classifier
94
- # using 2 random features, 10 trees, 0.5 portion of samples per tree (optional) , and a maximum tree depth of 6 (optional)
104
+ # using 2 random features, 10 trees, 0.5 portion of samples per tree, and a maximum tree depth of 6
95
105
model = build_forest (labels, features, 2 , 10 , 0.5 , 6 )
96
106
# apply learned model
97
107
apply_forest (model, [5.9 ,3.0 ,5.1 ,1.9 ])
@@ -100,6 +110,14 @@ apply_forest_proba(model, [5.9,3.0,5.1,1.9], ["setosa", "versicolor", "virginica
100
110
# run n-fold cross validation for forests
101
111
# using 2 random features, 10 trees, 3 folds, and 0.5 portion of samples per tree (optional)
102
112
accuracy = nfoldCV_forest (labels, features, 2 , 10 , 3 , 0.5 )
113
+
114
+ # set of classification build_forest() parameters and respective default values
115
+ # nsubfeatures: number of features to consider at random per split (default: 0, keep all)
116
+ # ntrees: number of trees to train (default: 10)
117
+ # partialsampling: fraction of samples to train each tree on (default: 0.7)
118
+ # max_depth: maximum depth of the decision trees (default: no maximum)
119
+ nsubfeatures= 0 ; ntrees= 10 ; partialsampling= 0.7 ; maxdepth= - 1 ;
120
+ model = build_forest (labels, features, nsubfeatures, ntrees, partialsampling, maxdepth)
103
121
```
104
122
Adaptive-Boosted Decision Stumps Classifier
105
123
``` julia
@@ -129,11 +147,21 @@ apply_tree(model, [-0.9,3.0,5.1,1.9,0.0])
129
147
# run n-fold cross validation, using 3 folds and averaging of 5 samples per leaf (optional)
130
148
# returns array of coefficients of determination (R^2)
131
149
r2 = nfoldCV_tree (labels, features, 3 , 5 )
150
+
151
+ # set of regression build_tree() parameters and respective default values
152
+ # max_depth: maximum depth of the decision tree (default: -1, no maximum)
153
+ # min_samples_leaf: the minimum number of samples each leaf needs to have (default: 5)
154
+ # min_samples_split: the minimum number of samples in needed for a split (default: 2)
155
+ # min_purity_increase: minimum purity needed for a split (default: 0.0)
156
+ # nsubfeatures: number of features to select at random (default: 0, keep all)
157
+ min_samples_leaf = 5 ; nsubfeatures = 0 ; max_depth = - 1 ; min_samples_split = 2 ; min_purity_increase = 0.0 ;
158
+ model = build_tree (labels, features, min_samples_leaf, nsubfeatures, max_depth, min_samples_split, min_purity_increase)
159
+
132
160
```
133
161
Regression Random Forest
134
162
``` julia
135
163
# train regression forest, using 2 random features, 10 trees,
136
- # averaging of 5 samples per leaf (optional) , and 0.7 portion of samples per tree (optional)
164
+ # averaging of 5 samples per leaf, and 0.7 portion of samples per tree
137
165
model = build_forest (labels, features, 2 , 10 , 5 , 0.7 )
138
166
# apply learned model
139
167
apply_forest (model, [- 0.9 ,3.0 ,5.1 ,1.9 ,0.0 ])
@@ -142,4 +170,13 @@ apply_forest(model, [-0.9,3.0,5.1,1.9,0.0])
142
170
# and 0.7 porition of samples per tree (optional)
143
171
# returns array of coefficients of determination (R^2)
144
172
r2 = nfoldCV_forest (labels, features, 2 , 10 , 3 , 5 , 0.7 )
173
+
174
+ # set of regression build_forest() parameters and respective default values
175
+ # nsubfeatures: number of features to consider at random per split (default: 0, keep all)
176
+ # ntrees: number of trees to train (default: 10)
177
+ # partialsampling: fraction of samples to train each tree on (default: 0.7)
178
+ # max_depth: maximum depth of the decision trees (default: no maximum)
179
+ # min_samples_leaf: the minimum number of samples each leaf needs to have (default: 5)
180
+ nsubfeatures= 0 ; ntrees= 10 ; min_samples_leaf= 5 ; partialsampling= 0.7 ; max_depth= - 1 ;
181
+ model = build_forest (labels, features, nsubfeatures, ntrees, min_samples_leaf, partialsampling, max_depth)
145
182
```
0 commit comments