ISG-Siegen
diff --git a/‎lkauto/algorithms/als.py
Lines changed: 59 additions & 36 deletions b/‎lkauto/algorithms/als.py
Lines changed: 59 additions & 36 deletions
diff --git a/‎lkauto/algorithms/bias.py
Lines changed: 18 additions & 9 deletions b/‎lkauto/algorithms/bias.py
Lines changed: 18 additions & 9 deletions
diff --git a/‎lkauto/algorithms/funksvd.py
Lines changed: 38 additions & 26 deletions b/‎lkauto/algorithms/funksvd.py
Lines changed: 38 additions & 26 deletions
diff --git a/‎lkauto/algorithms/item_knn.py
Lines changed: 45 additions & 35 deletions b/‎lkauto/algorithms/item_knn.py
Lines changed: 45 additions & 35 deletions
@@ -1,3 +1,4 @@
+from hyperopt import hp
 from lenskit.als import BiasedMFScorer, ImplicitMFScorer
 from ConfigSpace import ConfigurationSpace, UniformIntegerHyperparameter, UniformFloatHyperparameter, CategoricalHyperparameter
 
@@ -8,16 +9,26 @@ def __init__(self, features, feedback="implicit", **kwargs):
         self.features = features  # store the features as an instance variable for testing
 
     @staticmethod
-    def get_default_configspace(**kwargs):
+    def get_default_configspace(hyperopt = False, **kwargs):
         """
-              return default configurationspace
+              return default (hyperopt) configurationspace
         """
-        features = UniformIntegerHyperparameter('features', lower=5, upper=10000, default_value=1000, log=True)
-        ureg = UniformFloatHyperparameter('ureg', lower=0.01, upper=0.1, default_value=0.1, log=True)
-        ireg = UniformFloatHyperparameter('ireg', lower=0.01, upper=0.1, default_value=0.1, log=True)
+        if hyperopt:
+            cs = {
+                "algo": "ImplicitMF",
+                "features": hp.uniformint("features", 5, 10000),
+                "ureq": hp.uniform("ureq", 0.01, 0.1),
+                "ireq": hp.uniform("ireq", 0.01, 0.1),
+            }
+
+        else:
+            features = UniformIntegerHyperparameter('features', lower=5, upper=10000, default_value=1000, log=True)
+            ureg = UniformFloatHyperparameter('ureg', lower=0.01, upper=0.1, default_value=0.1, log=True)
+            ireg = UniformFloatHyperparameter('ireg', lower=0.01, upper=0.1, default_value=0.1, log=True)
+
+            cs = ConfigurationSpace()
+            cs.add([features, ureg, ireg])
 
-        cs = ConfigurationSpace()
-        cs.add([features, ureg, ireg])
         return cs
 
 
@@ -28,39 +39,51 @@ def __init__(self, features, feedback="explicit", **kwargs):
         self.features = features  # store the features as an instance variable for testing
 
     @staticmethod
-    def get_default_configspace(**kwargs):
+    def get_default_configspace(hyperopt = False, **kwargs):
         """
                return default configuration spaces for hyperparameter
         """
 
-        """
-        The authors of the original ALS paper (https://link.springer.com/chapter/10.1007/978-3-540-68880-8_32) stated:
-        The most important discovery we made is that ALS-WR never overfits the data if we either increase
-        the number of iterations or the number of hidden features.
-        The paper stated that the improvement of the performance maximized around 1000 features.
-        Therefore, we will set the upper bound and the default value of features to 10000.
-        Since the authors just evaluated on one larger dataset, we still allow smaller and larger feature numbers
-        but set the default value to 1000.
-        """
-        # features = Integer('features', bounds=(2, 10000), default=1000, log=True)  # No default value given
-        # no default value given but we set the default value to 1000???
-        features = UniformIntegerHyperparameter('features', lower=2, upper=10000, default_value=1000, log=True)
-        """
-        The authors of the original ALS paper set the range of the regularization hyperparameter to from 0.03 - 0.065.
-        https://link.springer.com/chapter/10.1007/978-3-540-68880-8_32
-        Therefore we set the lower bound of the two regularization parameters (ureg and ireg) to 0.065.
-        LensKit sets the default regularization hyperparameter to 0.1 Therefore we set the upper bound of the two
-        regularization parameters (ureg and ireg) to 0.1.
-        """
-        ureg = UniformFloatHyperparameter('ureg', lower=0.01, upper=0.1, default_value=0.1, log=True)
-        ireg = UniformFloatHyperparameter('ireg', lower=0.01, upper=0.1, default_value=0.1, log=True)
+        if hyperopt:
+            cs = {
+                "algo": "ALSBiasedMF",
+                "features": hp.uniformint("features", 2, 10000),
+                "ureq": hp.uniform("ureq", 0.01, 0.1),
+                "ireq": hp.uniform("ireq", 0.01, 0.1),
+                "bias": hp.choice("bias", [True, False]),
+            }
 
-        """
-        The damping hyperparameter en- or disables a damping factor.
-        In the future we may want to tune the damping values as well.
-        """
-        bias = CategoricalHyperparameter('bias', choices=[True, False], default_value=True)
+        else:
+
+            """
+            The authors of the original ALS paper (https://link.springer.com/chapter/10.1007/978-3-540-68880-8_32) stated:
+            The most important discovery we made is that ALS-WR never overfits the data if we either increase
+            the number of iterations or the number of hidden features.
+            The paper stated that the improvement of the performance maximized around 1000 features.
+            Therefore, we will set the upper bound and the default value of features to 10000.
+            Since the authors just evaluated on one larger dataset, we still allow smaller and larger feature numbers
+            but set the default value to 1000.
+            """
+            # features = Integer('features', bounds=(2, 10000), default=1000, log=True)  # No default value given
+            # no default value given but we set the default value to 1000???
+            features = UniformIntegerHyperparameter('features', lower=2, upper=10000, default_value=1000, log=True)
+            """
+            The authors of the original ALS paper set the range of the regularization hyperparameter to from 0.03 - 0.065.
+            https://link.springer.com/chapter/10.1007/978-3-540-68880-8_32
+            Therefore we set the lower bound of the two regularization parameters (ureg and ireg) to 0.065.
+            LensKit sets the default regularization hyperparameter to 0.1 Therefore we set the upper bound of the two
+            regularization parameters (ureg and ireg) to 0.1.
+            """
+            ureg = UniformFloatHyperparameter('ureg', lower=0.01, upper=0.1, default_value=0.1, log=True)
+            ireg = UniformFloatHyperparameter('ireg', lower=0.01, upper=0.1, default_value=0.1, log=True)
+
+            """
+            The damping hyperparameter en- or disables a damping factor.
+            In the future we may want to tune the damping values as well.
+            """
+            bias = CategoricalHyperparameter('bias', choices=[True, False], default_value=True)
+
+            cs = ConfigurationSpace()
+            cs.add([features, ureg, ireg, bias])
 
-        cs = ConfigurationSpace()
-        cs.add([features, ureg, ireg, bias])
         return cs
@@ -1,3 +1,4 @@
+from hyperopt import hp
 from lenskit.basic.bias import BiasScorer
 from ConfigSpace import ConfigurationSpace, UniformFloatHyperparameter
 
@@ -9,7 +10,7 @@ def __init__(self, feedback="explicit", **kwargs):
         self.feedback = feedback
 
     @staticmethod
-    def get_default_configspace(number_item: int, number_user: int):
+    def get_default_configspace(number_item: int, number_user: int, hyperopt = False):
         """
             return default configurationspace
         """
@@ -18,13 +19,21 @@ def get_default_configspace(number_item: int, number_user: int):
         LensKit does not give any hint on how to setup the damping values for the Bias algorithm.
         Therefore we evaluated the algorithms performance on 70 datasets to come up with the following ranges
         """
-        # lower bound 0 does not work because of log=True
-        item_damping = UniformFloatHyperparameter('item_damping', lower=1e-5*number_item, upper=85*number_item,
-                                                  default_value=0.0025*number_item, log=True)
-        user_damping = UniformFloatHyperparameter('user_damping', lower=1e-5*number_user, upper=85*number_user,
-                                                  default_value=0.0025*number_user, log=True)
-
-        cs = ConfigurationSpace()
-        cs.add([item_damping, user_damping])
+        if hyperopt:
+            cs = {
+                "algo": "Bias",
+                "item_damping": hp.uniform("item_damping", 1e-5*number_item, 85*number_item),
+                "user_damping": hp.uniform("user_damping", 1e-5*number_user, 85*number_user),
+            }
+
+        else:
+            # lower bound 0 does not work because of log=True
+            item_damping = UniformFloatHyperparameter('item_damping', lower=1e-5*number_item, upper=85*number_item,
+                                                      default_value=0.0025*number_item, log=True)
+            user_damping = UniformFloatHyperparameter('user_damping', lower=1e-5*number_user, upper=85*number_user,
+                                                      default_value=0.0025*number_user, log=True)
+
+            cs = ConfigurationSpace()
+            cs.add([item_damping, user_damping])
 
         return cs
@@ -1,3 +1,4 @@
+from hyperopt import hp
 from lenskit.funksvd import FunkSVDScorer
 from ConfigSpace import ConfigurationSpace, UniformIntegerHyperparameter, UniformFloatHyperparameter
 
@@ -10,35 +11,46 @@ def __init__(self, features, feedback="explicit", **kwargs):
         self.features = features
 
     @staticmethod
-    def get_default_configspace(**kwargs):
+    def get_default_configspace(hyperopt = False, **kwargs):
         """
-               return default configurationspace
+               return default (hyperopt) configurationspace
         """
-        features = UniformIntegerHyperparameter('features', lower=2, upper=10000, default_value=1000, log=True)
 
-        """
-        The authors of the original FunkSVD paper (https://sifter.org/~simon/journal/20061211.html) stated:
-        Lrate is the learning rate, a rather arbitrary number which I fortuitously set to 0.001 on day one
-        and regretted it every time I tried anything else after that. Err is the residual error from the
-        current prediction.
-        But the original dataset just evaluated the performance on the netflix price dataset. Other datasets
-        perform well on ranges around 0.001.
-        Therefore, the pip install -e .loatHyperparameter('lrate', lower=0.0001, upper=0.01, default_value=0.001)
+        if hyperopt:
+            cs = {
+                "algo": "FunkSVD",
+                "features": hp.uniformint("features", 2, 10000),
+                "lrate": hp.uniform("lrate", 0.0001, 0.01),
+                "reg": hp.uniform("reg", 0.001, 0.1),
+                "damping": hp.uniform("damping", 0.01, 1000),
+            }
 
-        """
-        lrate = UniformFloatHyperparameter('lrate', lower=0.0001, upper=0.01, default_value=0.001)
-        """
-        The authors of the original FunkSVD paper (https://sifter.org/~simon/journal/20061211.html) stated:
-        The point here is to try to cut down on over fitting, ultimately allowing us to use
-        more features. Last I recall, Vincent liked K=0.02 or so, with well over 100 features (singular vector
-        pairs--if you can still call them that).
-        The default value of 0.02 is considered for the range. The range is set to a close range around the 0.02 value.
-        The default value is taken from the LensKit Library.
-        """""
-        reg = UniformFloatHyperparameter('reg', lower=0.001, upper=0.1, default_value=0.015)
-        damping = UniformFloatHyperparameter('damping', lower=0.01, upper=1000, default_value=5, log=True)
-
-        cs = ConfigurationSpace()
-        cs.add([features, lrate, reg, damping])
+        else:
+            features = UniformIntegerHyperparameter('features', lower=2, upper=10000, default_value=1000, log=True)
+
+            """
+            The authors of the original FunkSVD paper (https://sifter.org/~simon/journal/20061211.html) stated:
+            Lrate is the learning rate, a rather arbitrary number which I fortuitously set to 0.001 on day one
+            and regretted it every time I tried anything else after that. Err is the residual error from the
+            current prediction.
+            But the original dataset just evaluated the performance on the netflix price dataset. Other datasets
+            perform well on ranges around 0.001.
+            Therefore, the pip install -e .loatHyperparameter('lrate', lower=0.0001, upper=0.01, default_value=0.001)
+
+            """
+            lrate = UniformFloatHyperparameter('lrate', lower=0.0001, upper=0.01, default_value=0.001)
+            """
+            The authors of the original FunkSVD paper (https://sifter.org/~simon/journal/20061211.html) stated:
+            The point here is to try to cut down on over fitting, ultimately allowing us to use
+            more features. Last I recall, Vincent liked K=0.02 or so, with well over 100 features (singular vector
+            pairs--if you can still call them that).
+            The default value of 0.02 is considered for the range. The range is set to a close range around the 0.02 value.
+            The default value is taken from the LensKit Library.
+            """""
+            reg = UniformFloatHyperparameter('reg', lower=0.001, upper=0.1, default_value=0.015)
+            damping = UniformFloatHyperparameter('damping', lower=0.01, upper=1000, default_value=5, log=True)
+
+            cs = ConfigurationSpace()
+            cs.add([features, lrate, reg, damping])
 
         return cs
@@ -1,3 +1,4 @@
+from hyperopt import hp
 from lenskit.knn.item import ItemKNNScorer, ItemKNNConfig
 from ConfigSpace import UniformIntegerHyperparameter, UniformFloatHyperparameter
 from ConfigSpace import ConfigurationSpace
@@ -21,47 +22,56 @@ def __init__(self, max_nbrs, min_nbrs=1, min_sim=1e-6, feedback="implicit", **kw
         super().__init__(config=config, **kwargs)
 
     @staticmethod
-    def get_default_configspace(**kwargs):
+    def get_default_configspace(hyperopt = False, **kwargs):
         """
-        return default configurationspace
+        return default (hyperopt) configurationspace
         Default configuration spaces for hyperparameters are defined here.
         """
 
-        """
-        The max_nbrs hyperparameter is set to 10000 in LensKit-Auto. Generally speaking, the higher the max_nbrs
-        hyperparameter value, the better the performance. But the computational cost will increase
-        exponentially by increasing the max_nbrs hyperparameter value. 10000 is a reasonable value for max_nbrs
-        hyperparameter since it has relatively good performance and is still able
-        to run in a reasonable amount of time.
-        """
-        max_nbrs = UniformIntegerHyperparameter('max_nbrs', lower=1, upper=10000, default_value=1000, log=True)
+        if hyperopt:
+            cs = {
+                "algo": "ItemItem",
+                "max_nbrs": hp.uniformint("max_nbrs", 1, 10000),
+                "min_nbrs": hp.uniformint("min_nbrs", 1, 1000),
+                "min_sim": hp.uniform("min_sim", 1.0e-10, 1.0e-2),
+            }
 
-        """
-        The min_sim hyperparameter describes the minimum number of neighbors for scoring each item.
-        Since the LensKit default value for the min_nbrs hyperparameter is 1, we set the lower bound  to 1.
-        The upper bound is set to the max_nbrs hyperparameter value.
-        Therefore, the upper bound of min_nbrs is set to 10000 to cover the full possible range of the
-        min_nbrs hyperparameter.
-        """
-        min_nbrs = UniformIntegerHyperparameter('min_nbrs', lower=1, upper=1000, default_value=1, log=True)
+        else:
+            """
+            The max_nbrs hyperparameter is set to 10000 in LensKit-Auto. Generally speaking, the higher the max_nbrs
+            hyperparameter value, the better the performance. But the computational cost will increase
+            exponentially by increasing the max_nbrs hyperparameter value. 10000 is a reasonable value for max_nbrs
+            hyperparameter since it has relatively good performance and is still able
+            to run in a reasonable amount of time.
+            """
+            max_nbrs = UniformIntegerHyperparameter('max_nbrs', lower=1, upper=10000, default_value=1000, log=True)
 
-        """
-        The min_sim hyperparameter describes the minimum threshold for similarity between items. It is commonly
-        refered as the minimum support constraint. The min_sim hyperparameter limits the number of items that are taken
-        into account for the similarity calculation.
-        The following constrains are taken from :cite:t:`Deshpande2004-ht`
-        A high value will result in a higher-order scheme that uses
-        very few itemsets and as such it does not utilize its full potential, whereas a low value may lead to an
-        exponentially large number of itemsets, making it computationally intractable.
-        Unfortunately, there are no good ways to a priori select the value of support. This is because for a
-        given value of σ the number of frequent item sets that exist in a dataset depends on the dataset’s density
-        and the item co-occurrence patterns in the various rows.
-        Since the paper already states that it is very difficult to find the best value, we define a large bound around
-        the default LensKit value.
-        """
-        min_sim = UniformFloatHyperparameter('min_sim', lower=1.0e-10, upper=1.0e-2, default_value=1.0e-6, log=True)
+            """
+            The min_sim hyperparameter describes the minimum number of neighbors for scoring each item.
+            Since the LensKit default value for the min_nbrs hyperparameter is 1, we set the lower bound  to 1.
+            The upper bound is set to the max_nbrs hyperparameter value.
+            Therefore, the upper bound of min_nbrs is set to 10000 to cover the full possible range of the
+            min_nbrs hyperparameter.
+            """
+            min_nbrs = UniformIntegerHyperparameter('min_nbrs', lower=1, upper=1000, default_value=1, log=True)
+
+            """
+            The min_sim hyperparameter describes the minimum threshold for similarity between items. It is commonly
+            refered as the minimum support constraint. The min_sim hyperparameter limits the number of items that are taken
+            into account for the similarity calculation.
+            The following constrains are taken from :cite:t:`Deshpande2004-ht`
+            A high value will result in a higher-order scheme that uses
+            very few itemsets and as such it does not utilize its full potential, whereas a low value may lead to an
+            exponentially large number of itemsets, making it computationally intractable.
+            Unfortunately, there are no good ways to a priori select the value of support. This is because for a
+            given value of σ the number of frequent item sets that exist in a dataset depends on the dataset’s density
+            and the item co-occurrence patterns in the various rows.
+            Since the paper already states that it is very difficult to find the best value, we define a large bound around
+            the default LensKit value.
+            """
+            min_sim = UniformFloatHyperparameter('min_sim', lower=1.0e-10, upper=1.0e-2, default_value=1.0e-6, log=True)
 
-        cs = ConfigurationSpace()
-        cs.add([max_nbrs, min_nbrs, min_sim])
+            cs = ConfigurationSpace()
+            cs.add([max_nbrs, min_nbrs, min_sim])
 
         return cs