Skip to content

Commit 107f8ef

Browse files
committed
Changed multiple files to allow for use of hyperopt Spaces for Tree-Parzen
1 parent bb84c53 commit 107f8ef

File tree

9 files changed

+309
-217
lines changed

9 files changed

+309
-217
lines changed

lkauto/algorithms/als.py

Lines changed: 59 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
from hyperopt import hp
12
from lenskit.als import BiasedMFScorer, ImplicitMFScorer
23
from ConfigSpace import ConfigurationSpace, UniformIntegerHyperparameter, UniformFloatHyperparameter, CategoricalHyperparameter
34

@@ -8,16 +9,26 @@ def __init__(self, features, feedback="implicit", **kwargs):
89
self.features = features # store the features as an instance variable for testing
910

1011
@staticmethod
11-
def get_default_configspace(**kwargs):
12+
def get_default_configspace(hyperopt = False, **kwargs):
1213
"""
13-
return default configurationspace
14+
return default (hyperopt) configurationspace
1415
"""
15-
features = UniformIntegerHyperparameter('features', lower=5, upper=10000, default_value=1000, log=True)
16-
ureg = UniformFloatHyperparameter('ureg', lower=0.01, upper=0.1, default_value=0.1, log=True)
17-
ireg = UniformFloatHyperparameter('ireg', lower=0.01, upper=0.1, default_value=0.1, log=True)
16+
if hyperopt:
17+
cs = {
18+
"algo": "ImplicitMF",
19+
"features": hp.uniformint("features", 5, 10000),
20+
"ureq": hp.uniform("ureq", 0.01, 0.1),
21+
"ireq": hp.uniform("ireq", 0.01, 0.1),
22+
}
23+
24+
else:
25+
features = UniformIntegerHyperparameter('features', lower=5, upper=10000, default_value=1000, log=True)
26+
ureg = UniformFloatHyperparameter('ureg', lower=0.01, upper=0.1, default_value=0.1, log=True)
27+
ireg = UniformFloatHyperparameter('ireg', lower=0.01, upper=0.1, default_value=0.1, log=True)
28+
29+
cs = ConfigurationSpace()
30+
cs.add([features, ureg, ireg])
1831

19-
cs = ConfigurationSpace()
20-
cs.add([features, ureg, ireg])
2132
return cs
2233

2334

@@ -28,39 +39,51 @@ def __init__(self, features, feedback="explicit", **kwargs):
2839
self.features = features # store the features as an instance variable for testing
2940

3041
@staticmethod
31-
def get_default_configspace(**kwargs):
42+
def get_default_configspace(hyperopt = False, **kwargs):
3243
"""
3344
return default configuration spaces for hyperparameter
3445
"""
3546

36-
"""
37-
The authors of the original ALS paper (https://link.springer.com/chapter/10.1007/978-3-540-68880-8_32) stated:
38-
The most important discovery we made is that ALS-WR never overfits the data if we either increase
39-
the number of iterations or the number of hidden features.
40-
The paper stated that the improvement of the performance maximized around 1000 features.
41-
Therefore, we will set the upper bound and the default value of features to 10000.
42-
Since the authors just evaluated on one larger dataset, we still allow smaller and larger feature numbers
43-
but set the default value to 1000.
44-
"""
45-
# features = Integer('features', bounds=(2, 10000), default=1000, log=True) # No default value given
46-
# no default value given but we set the default value to 1000???
47-
features = UniformIntegerHyperparameter('features', lower=2, upper=10000, default_value=1000, log=True)
48-
"""
49-
The authors of the original ALS paper set the range of the regularization hyperparameter to from 0.03 - 0.065.
50-
https://link.springer.com/chapter/10.1007/978-3-540-68880-8_32
51-
Therefore we set the lower bound of the two regularization parameters (ureg and ireg) to 0.065.
52-
LensKit sets the default regularization hyperparameter to 0.1 Therefore we set the upper bound of the two
53-
regularization parameters (ureg and ireg) to 0.1.
54-
"""
55-
ureg = UniformFloatHyperparameter('ureg', lower=0.01, upper=0.1, default_value=0.1, log=True)
56-
ireg = UniformFloatHyperparameter('ireg', lower=0.01, upper=0.1, default_value=0.1, log=True)
47+
if hyperopt:
48+
cs = {
49+
"algo": "ALSBiasedMF",
50+
"features": hp.uniformint("features", 2, 10000),
51+
"ureq": hp.uniform("ureq", 0.01, 0.1),
52+
"ireq": hp.uniform("ireq", 0.01, 0.1),
53+
"bias": hp.choice("bias", [True, False]),
54+
}
5755

58-
"""
59-
The damping hyperparameter en- or disables a damping factor.
60-
In the future we may want to tune the damping values as well.
61-
"""
62-
bias = CategoricalHyperparameter('bias', choices=[True, False], default_value=True)
56+
else:
57+
58+
"""
59+
The authors of the original ALS paper (https://link.springer.com/chapter/10.1007/978-3-540-68880-8_32) stated:
60+
The most important discovery we made is that ALS-WR never overfits the data if we either increase
61+
the number of iterations or the number of hidden features.
62+
The paper stated that the improvement of the performance maximized around 1000 features.
63+
Therefore, we will set the upper bound and the default value of features to 10000.
64+
Since the authors just evaluated on one larger dataset, we still allow smaller and larger feature numbers
65+
but set the default value to 1000.
66+
"""
67+
# features = Integer('features', bounds=(2, 10000), default=1000, log=True) # No default value given
68+
# no default value given but we set the default value to 1000???
69+
features = UniformIntegerHyperparameter('features', lower=2, upper=10000, default_value=1000, log=True)
70+
"""
71+
The authors of the original ALS paper set the range of the regularization hyperparameter to from 0.03 - 0.065.
72+
https://link.springer.com/chapter/10.1007/978-3-540-68880-8_32
73+
Therefore we set the lower bound of the two regularization parameters (ureg and ireg) to 0.065.
74+
LensKit sets the default regularization hyperparameter to 0.1 Therefore we set the upper bound of the two
75+
regularization parameters (ureg and ireg) to 0.1.
76+
"""
77+
ureg = UniformFloatHyperparameter('ureg', lower=0.01, upper=0.1, default_value=0.1, log=True)
78+
ireg = UniformFloatHyperparameter('ireg', lower=0.01, upper=0.1, default_value=0.1, log=True)
79+
80+
"""
81+
The damping hyperparameter en- or disables a damping factor.
82+
In the future we may want to tune the damping values as well.
83+
"""
84+
bias = CategoricalHyperparameter('bias', choices=[True, False], default_value=True)
85+
86+
cs = ConfigurationSpace()
87+
cs.add([features, ureg, ireg, bias])
6388

64-
cs = ConfigurationSpace()
65-
cs.add([features, ureg, ireg, bias])
6689
return cs

lkauto/algorithms/bias.py

Lines changed: 18 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
from hyperopt import hp
12
from lenskit.basic.bias import BiasScorer
23
from ConfigSpace import ConfigurationSpace, UniformFloatHyperparameter
34

@@ -9,7 +10,7 @@ def __init__(self, feedback="explicit", **kwargs):
910
self.feedback = feedback
1011

1112
@staticmethod
12-
def get_default_configspace(number_item: int, number_user: int):
13+
def get_default_configspace(number_item: int, number_user: int, hyperopt = False):
1314
"""
1415
return default configurationspace
1516
"""
@@ -18,13 +19,21 @@ def get_default_configspace(number_item: int, number_user: int):
1819
LensKit does not give any hint on how to setup the damping values for the Bias algorithm.
1920
Therefore we evaluated the algorithms performance on 70 datasets to come up with the following ranges
2021
"""
21-
# lower bound 0 does not work because of log=True
22-
item_damping = UniformFloatHyperparameter('item_damping', lower=1e-5*number_item, upper=85*number_item,
23-
default_value=0.0025*number_item, log=True)
24-
user_damping = UniformFloatHyperparameter('user_damping', lower=1e-5*number_user, upper=85*number_user,
25-
default_value=0.0025*number_user, log=True)
26-
27-
cs = ConfigurationSpace()
28-
cs.add([item_damping, user_damping])
22+
if hyperopt:
23+
cs = {
24+
"algo": "Bias",
25+
"item_damping": hp.uniform("item_damping", 1e-5*number_item, 85*number_item),
26+
"user_damping": hp.uniform("user_damping", 1e-5*number_user, 85*number_user),
27+
}
28+
29+
else:
30+
# lower bound 0 does not work because of log=True
31+
item_damping = UniformFloatHyperparameter('item_damping', lower=1e-5*number_item, upper=85*number_item,
32+
default_value=0.0025*number_item, log=True)
33+
user_damping = UniformFloatHyperparameter('user_damping', lower=1e-5*number_user, upper=85*number_user,
34+
default_value=0.0025*number_user, log=True)
35+
36+
cs = ConfigurationSpace()
37+
cs.add([item_damping, user_damping])
2938

3039
return cs

lkauto/algorithms/funksvd.py

Lines changed: 38 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
from hyperopt import hp
12
from lenskit.funksvd import FunkSVDScorer
23
from ConfigSpace import ConfigurationSpace, UniformIntegerHyperparameter, UniformFloatHyperparameter
34

@@ -10,35 +11,46 @@ def __init__(self, features, feedback="explicit", **kwargs):
1011
self.features = features
1112

1213
@staticmethod
13-
def get_default_configspace(**kwargs):
14+
def get_default_configspace(hyperopt = False, **kwargs):
1415
"""
15-
return default configurationspace
16+
return default (hyperopt) configurationspace
1617
"""
17-
features = UniformIntegerHyperparameter('features', lower=2, upper=10000, default_value=1000, log=True)
1818

19-
"""
20-
The authors of the original FunkSVD paper (https://sifter.org/~simon/journal/20061211.html) stated:
21-
Lrate is the learning rate, a rather arbitrary number which I fortuitously set to 0.001 on day one
22-
and regretted it every time I tried anything else after that. Err is the residual error from the
23-
current prediction.
24-
But the original dataset just evaluated the performance on the netflix price dataset. Other datasets
25-
perform well on ranges around 0.001.
26-
Therefore, the pip install -e .loatHyperparameter('lrate', lower=0.0001, upper=0.01, default_value=0.001)
19+
if hyperopt:
20+
cs = {
21+
"algo": "FunkSVD",
22+
"features": hp.uniformint("features", 2, 10000),
23+
"lrate": hp.uniform("lrate", 0.0001, 0.01),
24+
"reg": hp.uniform("reg", 0.001, 0.1),
25+
"damping": hp.uniform("damping", 0.01, 1000),
26+
}
2727

28-
"""
29-
lrate = UniformFloatHyperparameter('lrate', lower=0.0001, upper=0.01, default_value=0.001)
30-
"""
31-
The authors of the original FunkSVD paper (https://sifter.org/~simon/journal/20061211.html) stated:
32-
The point here is to try to cut down on over fitting, ultimately allowing us to use
33-
more features. Last I recall, Vincent liked K=0.02 or so, with well over 100 features (singular vector
34-
pairs--if you can still call them that).
35-
The default value of 0.02 is considered for the range. The range is set to a close range around the 0.02 value.
36-
The default value is taken from the LensKit Library.
37-
"""""
38-
reg = UniformFloatHyperparameter('reg', lower=0.001, upper=0.1, default_value=0.015)
39-
damping = UniformFloatHyperparameter('damping', lower=0.01, upper=1000, default_value=5, log=True)
40-
41-
cs = ConfigurationSpace()
42-
cs.add([features, lrate, reg, damping])
28+
else:
29+
features = UniformIntegerHyperparameter('features', lower=2, upper=10000, default_value=1000, log=True)
30+
31+
"""
32+
The authors of the original FunkSVD paper (https://sifter.org/~simon/journal/20061211.html) stated:
33+
Lrate is the learning rate, a rather arbitrary number which I fortuitously set to 0.001 on day one
34+
and regretted it every time I tried anything else after that. Err is the residual error from the
35+
current prediction.
36+
But the original dataset just evaluated the performance on the netflix price dataset. Other datasets
37+
perform well on ranges around 0.001.
38+
Therefore, the pip install -e .loatHyperparameter('lrate', lower=0.0001, upper=0.01, default_value=0.001)
39+
40+
"""
41+
lrate = UniformFloatHyperparameter('lrate', lower=0.0001, upper=0.01, default_value=0.001)
42+
"""
43+
The authors of the original FunkSVD paper (https://sifter.org/~simon/journal/20061211.html) stated:
44+
The point here is to try to cut down on over fitting, ultimately allowing us to use
45+
more features. Last I recall, Vincent liked K=0.02 or so, with well over 100 features (singular vector
46+
pairs--if you can still call them that).
47+
The default value of 0.02 is considered for the range. The range is set to a close range around the 0.02 value.
48+
The default value is taken from the LensKit Library.
49+
"""""
50+
reg = UniformFloatHyperparameter('reg', lower=0.001, upper=0.1, default_value=0.015)
51+
damping = UniformFloatHyperparameter('damping', lower=0.01, upper=1000, default_value=5, log=True)
52+
53+
cs = ConfigurationSpace()
54+
cs.add([features, lrate, reg, damping])
4355

4456
return cs

lkauto/algorithms/item_knn.py

Lines changed: 45 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
from hyperopt import hp
12
from lenskit.knn.item import ItemKNNScorer, ItemKNNConfig
23
from ConfigSpace import UniformIntegerHyperparameter, UniformFloatHyperparameter
34
from ConfigSpace import ConfigurationSpace
@@ -21,47 +22,56 @@ def __init__(self, max_nbrs, min_nbrs=1, min_sim=1e-6, feedback="implicit", **kw
2122
super().__init__(config=config, **kwargs)
2223

2324
@staticmethod
24-
def get_default_configspace(**kwargs):
25+
def get_default_configspace(hyperopt = False, **kwargs):
2526
"""
26-
return default configurationspace
27+
return default (hyperopt) configurationspace
2728
Default configuration spaces for hyperparameters are defined here.
2829
"""
2930

30-
"""
31-
The max_nbrs hyperparameter is set to 10000 in LensKit-Auto. Generally speaking, the higher the max_nbrs
32-
hyperparameter value, the better the performance. But the computational cost will increase
33-
exponentially by increasing the max_nbrs hyperparameter value. 10000 is a reasonable value for max_nbrs
34-
hyperparameter since it has relatively good performance and is still able
35-
to run in a reasonable amount of time.
36-
"""
37-
max_nbrs = UniformIntegerHyperparameter('max_nbrs', lower=1, upper=10000, default_value=1000, log=True)
31+
if hyperopt:
32+
cs = {
33+
"algo": "ItemItem",
34+
"max_nbrs": hp.uniformint("max_nbrs", 1, 10000),
35+
"min_nbrs": hp.uniformint("min_nbrs", 1, 1000),
36+
"min_sim": hp.uniform("min_sim", 1.0e-10, 1.0e-2),
37+
}
3838

39-
"""
40-
The min_sim hyperparameter describes the minimum number of neighbors for scoring each item.
41-
Since the LensKit default value for the min_nbrs hyperparameter is 1, we set the lower bound to 1.
42-
The upper bound is set to the max_nbrs hyperparameter value.
43-
Therefore, the upper bound of min_nbrs is set to 10000 to cover the full possible range of the
44-
min_nbrs hyperparameter.
45-
"""
46-
min_nbrs = UniformIntegerHyperparameter('min_nbrs', lower=1, upper=1000, default_value=1, log=True)
39+
else:
40+
"""
41+
The max_nbrs hyperparameter is set to 10000 in LensKit-Auto. Generally speaking, the higher the max_nbrs
42+
hyperparameter value, the better the performance. But the computational cost will increase
43+
exponentially by increasing the max_nbrs hyperparameter value. 10000 is a reasonable value for max_nbrs
44+
hyperparameter since it has relatively good performance and is still able
45+
to run in a reasonable amount of time.
46+
"""
47+
max_nbrs = UniformIntegerHyperparameter('max_nbrs', lower=1, upper=10000, default_value=1000, log=True)
4748

48-
"""
49-
The min_sim hyperparameter describes the minimum threshold for similarity between items. It is commonly
50-
refered as the minimum support constraint. The min_sim hyperparameter limits the number of items that are taken
51-
into account for the similarity calculation.
52-
The following constrains are taken from :cite:t:`Deshpande2004-ht`
53-
A high value will result in a higher-order scheme that uses
54-
very few itemsets and as such it does not utilize its full potential, whereas a low value may lead to an
55-
exponentially large number of itemsets, making it computationally intractable.
56-
Unfortunately, there are no good ways to a priori select the value of support. This is because for a
57-
given value of σ the number of frequent item sets that exist in a dataset depends on the dataset’s density
58-
and the item co-occurrence patterns in the various rows.
59-
Since the paper already states that it is very difficult to find the best value, we define a large bound around
60-
the default LensKit value.
61-
"""
62-
min_sim = UniformFloatHyperparameter('min_sim', lower=1.0e-10, upper=1.0e-2, default_value=1.0e-6, log=True)
49+
"""
50+
The min_sim hyperparameter describes the minimum number of neighbors for scoring each item.
51+
Since the LensKit default value for the min_nbrs hyperparameter is 1, we set the lower bound to 1.
52+
The upper bound is set to the max_nbrs hyperparameter value.
53+
Therefore, the upper bound of min_nbrs is set to 10000 to cover the full possible range of the
54+
min_nbrs hyperparameter.
55+
"""
56+
min_nbrs = UniformIntegerHyperparameter('min_nbrs', lower=1, upper=1000, default_value=1, log=True)
57+
58+
"""
59+
The min_sim hyperparameter describes the minimum threshold for similarity between items. It is commonly
60+
refered as the minimum support constraint. The min_sim hyperparameter limits the number of items that are taken
61+
into account for the similarity calculation.
62+
The following constrains are taken from :cite:t:`Deshpande2004-ht`
63+
A high value will result in a higher-order scheme that uses
64+
very few itemsets and as such it does not utilize its full potential, whereas a low value may lead to an
65+
exponentially large number of itemsets, making it computationally intractable.
66+
Unfortunately, there are no good ways to a priori select the value of support. This is because for a
67+
given value of σ the number of frequent item sets that exist in a dataset depends on the dataset’s density
68+
and the item co-occurrence patterns in the various rows.
69+
Since the paper already states that it is very difficult to find the best value, we define a large bound around
70+
the default LensKit value.
71+
"""
72+
min_sim = UniformFloatHyperparameter('min_sim', lower=1.0e-10, upper=1.0e-2, default_value=1.0e-6, log=True)
6373

64-
cs = ConfigurationSpace()
65-
cs.add([max_nbrs, min_nbrs, min_sim])
74+
cs = ConfigurationSpace()
75+
cs.add([max_nbrs, min_nbrs, min_sim])
6676

6777
return cs

0 commit comments

Comments
 (0)