Skip to content

Commit e6a7e5a

Browse files
authored
Merge pull request #69 from bensadeghi/param_names
Param names
2 parents b5e6d66 + 6fc5d08 commit e6a7e5a

File tree

9 files changed

+136
-136
lines changed

9 files changed

+136
-136
lines changed

README.md

Lines changed: 16 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -93,9 +93,9 @@ accuracy = nfoldCV_tree(labels, features, 0.9, 3)
9393
# min_samples_leaf: the minimum number of samples each leaf needs to have (default: 1)
9494
# min_samples_split: the minimum number of samples in needed for a split (default: 2)
9595
# min_purity_increase: minimum purity needed for a split (default: 0.0)
96-
# nsubfeatures: number of features to select at random (default: 0, keep all)
97-
nsubfeatures=0; maxdepth=-1; min_samples_leaf=1; min_samples_split=2; min_purity_increase=0.0;
98-
model = build_tree(labels, features, nsubfeatures, maxdepth, min_samples_leaf, min_samples_split, min_purity_increase)
96+
# n_subfeatures: number of features to select at random (default: 0, keep all)
97+
n_subfeatures=0; max_depth=-1; min_samples_leaf=1; min_samples_split=2; min_purity_increase=0.0;
98+
model = build_tree(labels, features, n_subfeatures, max_depth, min_samples_leaf, min_samples_split, min_purity_increase)
9999

100100
```
101101
Random Forest Classifier
@@ -112,12 +112,12 @@ apply_forest_proba(model, [5.9,3.0,5.1,1.9], ["setosa", "versicolor", "virginica
112112
accuracy = nfoldCV_forest(labels, features, 2, 10, 3, 0.5)
113113

114114
# set of classification build_forest() parameters and respective default values
115-
# nsubfeatures: number of features to consider at random per split (default: 0, keep all)
116-
# ntrees: number of trees to train (default: 10)
117-
# partialsampling: fraction of samples to train each tree on (default: 0.7)
115+
# n_subfeatures: number of features to consider at random per split (default: 0, keep all)
116+
# n_trees: number of trees to train (default: 10)
117+
# partial_sampling: fraction of samples to train each tree on (default: 0.7)
118118
# max_depth: maximum depth of the decision trees (default: no maximum)
119-
nsubfeatures=0; ntrees=10; partialsampling=0.7; maxdepth=-1;
120-
model = build_forest(labels, features, nsubfeatures, ntrees, partialsampling, maxdepth)
119+
n_subfeatures=0; n_trees=10; partial_sampling=0.7; max_depth=-1;
120+
model = build_forest(labels, features, n_subfeatures, n_trees, partial_sampling, max_depth)
121121
```
122122
Adaptive-Boosted Decision Stumps Classifier
123123
```julia
@@ -153,9 +153,9 @@ r2 = nfoldCV_tree(labels, features, 3, 5)
153153
# min_samples_leaf: the minimum number of samples each leaf needs to have (default: 5)
154154
# min_samples_split: the minimum number of samples in needed for a split (default: 2)
155155
# min_purity_increase: minimum purity needed for a split (default: 0.0)
156-
# nsubfeatures: number of features to select at random (default: 0, keep all)
157-
min_samples_leaf = 5; nsubfeatures = 0; max_depth = -1; min_samples_split = 2; min_purity_increase = 0.0;
158-
model = build_tree(labels, features, min_samples_leaf, nsubfeatures, max_depth, min_samples_split, min_purity_increase)
156+
# n_subfeatures: number of features to select at random (default: 0, keep all)
157+
min_samples_leaf = 5; n_subfeatures = 0; max_depth = -1; min_samples_split = 2; min_purity_increase = 0.0;
158+
model = build_tree(labels, features, min_samples_leaf, n_subfeatures, max_depth, min_samples_split, min_purity_increase)
159159

160160
```
161161
Regression Random Forest
@@ -172,11 +172,11 @@ apply_forest(model, [-0.9,3.0,5.1,1.9,0.0])
172172
r2 = nfoldCV_forest(labels, features, 2, 10, 3, 5, 0.7)
173173

174174
# set of regression build_forest() parameters and respective default values
175-
# nsubfeatures: number of features to consider at random per split (default: 0, keep all)
176-
# ntrees: number of trees to train (default: 10)
177-
# partialsampling: fraction of samples to train each tree on (default: 0.7)
175+
# n_subfeatures: number of features to consider at random per split (default: 0, keep all)
176+
# n_trees: number of trees to train (default: 10)
177+
# partial_sampling: fraction of samples to train each tree on (default: 0.7)
178178
# max_depth: maximum depth of the decision trees (default: no maximum)
179179
# min_samples_leaf: the minimum number of samples each leaf needs to have (default: 5)
180-
nsubfeatures=0; ntrees=10; min_samples_leaf=5; partialsampling=0.7; max_depth=-1;
181-
model = build_forest(labels, features, nsubfeatures, ntrees, min_samples_leaf, partialsampling, max_depth)
180+
n_subfeatures=0; n_trees=10; min_samples_leaf=5; partial_sampling=0.7; max_depth=-1;
181+
model = build_forest(labels, features, n_subfeatures, n_trees, min_samples_leaf, partial_sampling, max_depth)
182182
```

src/classification/main.jl

Lines changed: 18 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -70,24 +70,24 @@ function build_stump(labels::Vector, features::Matrix, weights=[0];
7070
Leaf(majority_vote(r_labels), r_labels))
7171
end
7272

73-
function build_tree(labels::Vector, features::Matrix, nsubfeatures=0, maxdepth=-1,
73+
function build_tree(labels::Vector, features::Matrix, n_subfeatures=0, max_depth=-1,
7474
min_samples_leaf=1, min_samples_split=2, min_purity_increase=0.0;
7575
rng=Base.GLOBAL_RNG)
7676
rng = mk_rng(rng)::AbstractRNG
77-
if maxdepth < -1
78-
error("Unexpected value for maxdepth: $(maxdepth) (expected: maxdepth >= 0, or maxdepth = -1 for infinite depth)")
77+
if max_depth < -1
78+
error("Unexpected value for max_depth: $(max_depth) (expected: max_depth >= 0, or max_depth = -1 for infinite depth)")
7979
end
80-
if maxdepth == -1
81-
maxdepth = typemax(Int64)
80+
if max_depth == -1
81+
max_depth = typemax(Int64)
8282
end
83-
if nsubfeatures == 0
84-
nsubfeatures = size(features, 2)
83+
if n_subfeatures == 0
84+
n_subfeatures = size(features, 2)
8585
end
8686
min_samples_leaf = Int64(min_samples_leaf)
8787
min_samples_split = Int64(min_samples_split)
8888
min_purity_increase = Float64(min_purity_increase)
8989
t = treeclassifier.fit(
90-
features, labels, nsubfeatures, maxdepth,
90+
features, labels, n_subfeatures, max_depth,
9191
min_samples_leaf, min_samples_split, min_purity_increase,
9292
rng=rng)
9393

@@ -191,23 +191,23 @@ end
191191
apply_tree_proba(tree::LeafOrNode, features::Matrix, labels) =
192192
stack_function_results(row->apply_tree_proba(tree, row, labels), features)
193193

194-
function build_forest(labels::Vector, features::Matrix, nsubfeatures=0, ntrees=10, partialsampling=0.7, maxdepth=-1; rng=Base.GLOBAL_RNG)
194+
function build_forest(labels::Vector, features::Matrix, n_subfeatures=0, n_trees=10, partial_sampling=0.7, max_depth=-1; rng=Base.GLOBAL_RNG)
195195
rng = mk_rng(rng)::AbstractRNG
196-
partialsampling = partialsampling > 1.0 ? 1.0 : partialsampling
196+
partial_sampling = partial_sampling > 1.0 ? 1.0 : partial_sampling
197197
Nlabels = length(labels)
198-
Nsamples = _int(partialsampling * Nlabels)
199-
forest = @parallel (vcat) for i in 1:ntrees
198+
Nsamples = _int(partial_sampling * Nlabels)
199+
forest = @parallel (vcat) for i in 1:n_trees
200200
inds = rand(rng, 1:Nlabels, Nsamples)
201-
build_tree(labels[inds], features[inds,:], nsubfeatures, maxdepth;
201+
build_tree(labels[inds], features[inds,:], n_subfeatures, max_depth;
202202
rng=rng)
203203
end
204204
return Ensemble([forest;])
205205
end
206206

207207
function apply_forest(forest::Ensemble, features::Vector)
208-
ntrees = length(forest)
209-
votes = Array{Any}(ntrees)
210-
for i in 1:ntrees
208+
n_trees = length(forest)
209+
votes = Array{Any}(n_trees)
210+
for i in 1:n_trees
211211
votes[i] = apply_tree(forest.trees[i], features)
212212
end
213213
if typeof(votes[1]) <: Float64
@@ -247,12 +247,12 @@ apply_forest_proba(forest::Ensemble, features::Matrix, labels) =
247247
stack_function_results(row->apply_forest_proba(forest, row, labels),
248248
features)
249249

250-
function build_adaboost_stumps(labels::Vector, features::Matrix, niterations::Integer; rng=Base.GLOBAL_RNG)
250+
function build_adaboost_stumps(labels::Vector, features::Matrix, n_iterations::Integer; rng=Base.GLOBAL_RNG)
251251
N = length(labels)
252252
weights = ones(N) / N
253253
stumps = Node[]
254254
coeffs = Float64[]
255-
for i in 1:niterations
255+
for i in 1:n_iterations
256256
new_stump = build_stump(labels, features, weights; rng=rng)
257257
predictions = apply_tree(new_stump, features)
258258
err = _weighted_error(labels, predictions, weights)

src/measures.jl

Lines changed: 13 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -86,11 +86,11 @@ function _nfoldCV(classifier::Symbol, labels, features, args...)
8686
if classifier == :tree
8787
pruning_purity = args[1]
8888
elseif classifier == :forest
89-
nsubfeatures = args[1]
90-
ntrees = args[2]
91-
partialsampling = args[3]
89+
n_subfeatures = args[1]
90+
n_trees = args[2]
91+
partial_sampling = args[3]
9292
elseif classifier == :stumps
93-
niterations = args[1]
93+
n_iterations = args[1]
9494
end
9595
N = length(labels)
9696
ntest = _int(floor(N / nfolds))
@@ -111,10 +111,10 @@ function _nfoldCV(classifier::Symbol, labels, features, args...)
111111
end
112112
predictions = apply_tree(model, test_features)
113113
elseif classifier == :forest
114-
model = build_forest(train_labels, train_features, nsubfeatures, ntrees, partialsampling)
114+
model = build_forest(train_labels, train_features, n_subfeatures, n_trees, partial_sampling)
115115
predictions = apply_forest(model, test_features)
116116
elseif classifier == :stumps
117-
model, coeffs = build_adaboost_stumps(train_labels, train_features, niterations)
117+
model, coeffs = build_adaboost_stumps(train_labels, train_features, n_iterations)
118118
predictions = apply_adaboost_stumps(model, coeffs, test_features)
119119
end
120120
cm = confusion_matrix(test_labels, predictions)
@@ -127,8 +127,8 @@ function _nfoldCV(classifier::Symbol, labels, features, args...)
127127
end
128128

129129
nfoldCV_tree(labels::Vector, features::Matrix, pruning_purity::Real, nfolds::Integer) = _nfoldCV(:tree, labels, features, pruning_purity, nfolds)
130-
nfoldCV_forest(labels::Vector, features::Matrix, nsubfeatures::Integer, ntrees::Integer, nfolds::Integer, partialsampling=0.7) = _nfoldCV(:forest, labels, features, nsubfeatures, ntrees, partialsampling, nfolds)
131-
nfoldCV_stumps(labels::Vector, features::Matrix, niterations::Integer, nfolds::Integer) = _nfoldCV(:stumps, labels, features, niterations, nfolds)
130+
nfoldCV_forest(labels::Vector, features::Matrix, n_subfeatures::Integer, n_trees::Integer, nfolds::Integer, partial_sampling=0.7) = _nfoldCV(:forest, labels, features, n_subfeatures, n_trees, partial_sampling, nfolds)
131+
nfoldCV_stumps(labels::Vector, features::Matrix, n_iterations::Integer, nfolds::Integer) = _nfoldCV(:stumps, labels, features, n_iterations, nfolds)
132132

133133
### Regression ###
134134

@@ -152,10 +152,10 @@ function _nfoldCV{T<:Float64}(regressor::Symbol, labels::Vector{T}, features::Ma
152152
if regressor == :tree
153153
maxlabels = args[1]
154154
elseif regressor == :forest
155-
nsubfeatures = args[1]
156-
ntrees = args[2]
155+
n_subfeatures = args[1]
156+
n_trees = args[2]
157157
maxlabels = args[3]
158-
partialsampling = args[4]
158+
partial_sampling = args[4]
159159
end
160160
N = length(labels)
161161
ntest = _int(floor(N / nfolds))
@@ -173,7 +173,7 @@ function _nfoldCV{T<:Float64}(regressor::Symbol, labels::Vector{T}, features::Ma
173173
model = build_tree(train_labels, train_features, maxlabels, 0)
174174
predictions = apply_tree(model, test_features)
175175
elseif regressor == :forest
176-
model = build_forest(train_labels, train_features, nsubfeatures, ntrees, maxlabels, partialsampling)
176+
model = build_forest(train_labels, train_features, n_subfeatures, n_trees, maxlabels, partial_sampling)
177177
predictions = apply_forest(model, test_features)
178178
end
179179
err = mean_squared_error(test_labels, predictions)
@@ -190,6 +190,6 @@ function _nfoldCV{T<:Float64}(regressor::Symbol, labels::Vector{T}, features::Ma
190190
end
191191

192192
nfoldCV_tree{T<:Float64}(labels::Vector{T}, features::Matrix, nfolds::Integer, maxlabels::Integer=5) = _nfoldCV(:tree, labels, features, maxlabels, nfolds)
193-
nfoldCV_forest{T<:Float64}(labels::Vector{T}, features::Matrix, nsubfeatures::Integer, ntrees::Integer, nfolds::Integer, maxlabels::Integer=5, partialsampling=0.7) = _nfoldCV(:forest, labels, features, nsubfeatures, ntrees, maxlabels, partialsampling, nfolds)
193+
nfoldCV_forest{T<:Float64}(labels::Vector{T}, features::Matrix, n_subfeatures::Integer, n_trees::Integer, nfolds::Integer, maxlabels::Integer=5, partial_sampling=0.7) = _nfoldCV(:forest, labels, features, n_subfeatures, n_trees, maxlabels, partial_sampling, nfolds)
194194

195195

src/regression/main.jl

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -9,24 +9,24 @@ function build_stump{T<:Float64}(labels::Vector{T}, features::Matrix; rng=Base.G
99
end
1010

1111
function build_tree{T<:Float64}(
12-
labels::Vector{T}, features::Matrix, min_samples_leaf=5, nsubfeatures=0,
12+
labels::Vector{T}, features::Matrix, min_samples_leaf=5, n_subfeatures=0,
1313
max_depth=-1, min_samples_split=2, min_purity_increase=0.0;
1414
rng=Base.GLOBAL_RNG)
1515
rng = mk_rng(rng)::AbstractRNG
1616
if max_depth < -1
17-
error("Unexpected value for maxdepth: $(maxdepth) (expected: max_depth >= 0, or max_depth = -1 for infinite depth)")
17+
error("Unexpected value for max_depth: $(max_depth) (expected: max_depth >= 0, or max_depth = -1 for infinite depth)")
1818
end
1919
if max_depth == -1
2020
max_depth = typemax(Int64)
2121
end
22-
if nsubfeatures == 0
23-
nsubfeatures = size(features, 2)
22+
if n_subfeatures == 0
23+
n_subfeatures = size(features, 2)
2424
end
2525
min_samples_leaf = Int64(min_samples_leaf)
2626
min_samples_split = Int64(min_samples_split)
2727
min_purity_increase = Float64(min_purity_increase)
2828
t = treeregressor.fit(
29-
features, labels, nsubfeatures, max_depth,
29+
features, labels, n_subfeatures, max_depth,
3030
min_samples_leaf, min_samples_split, min_purity_increase,
3131
rng=rng)
3232

@@ -42,14 +42,14 @@ function build_tree{T<:Float64}(
4242
return _convert(t)
4343
end
4444

45-
function build_forest{T<:Float64}(labels::Vector{T}, features::Matrix, nsubfeatures=0, ntrees=10, min_samples_leaf=5, partialsampling=0.7, max_depth=-1; rng=Base.GLOBAL_RNG)
45+
function build_forest{T<:Float64}(labels::Vector{T}, features::Matrix, n_subfeatures=0, n_trees=10, min_samples_leaf=5, partial_sampling=0.7, max_depth=-1; rng=Base.GLOBAL_RNG)
4646
rng = mk_rng(rng)::AbstractRNG
47-
partialsampling = partialsampling > 1.0 ? 1.0 : partialsampling
47+
partial_sampling = partial_sampling > 1.0 ? 1.0 : partial_sampling
4848
Nlabels = length(labels)
49-
Nsamples = _int(partialsampling * Nlabels)
50-
forest = @parallel (vcat) for i in 1:ntrees
49+
Nsamples = _int(partial_sampling * Nlabels)
50+
forest = @parallel (vcat) for i in 1:n_trees
5151
inds = rand(rng, 1:Nlabels, Nsamples)
52-
build_tree(labels[inds], features[inds,:], min_samples_leaf, nsubfeatures, max_depth; rng=rng)
52+
build_tree(labels[inds], features[inds,:], min_samples_leaf, n_subfeatures, max_depth; rng=rng)
5353
end
5454
return Ensemble([forest;])
5555
end

0 commit comments

Comments
 (0)