Skip to content

Commit 46bcfa6

Browse files
author
Max
committed
Mainly updated get_default_configuration_space.py to use Datasets (or Iterator[TTSplit]) as data.
Also updated some imports in bayesian_optimization.py, implicit_evaler.py and lkauto.py
1 parent 5d3c451 commit 46bcfa6

File tree

5 files changed

+52
-13
lines changed

5 files changed

+52
-13
lines changed

lkauto/implicit/implicit_evaler.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,8 @@
11
import numpy as np
22
import pandas as pd
33
from ConfigSpace import ConfigurationSpace
4-
from lenskit import topn, batch
4+
from lenskit.basic import topn
5+
from lenskit import batch
56
import logging
67

78
from lkauto.utils.filer import Filer

lkauto/lkauto.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@
2626
def get_best_prediction_model(train: pd.DataFrame,
2727
validation: pd.DataFrame = None,
2828
cs: ConfigurationSpace = None,
29-
optimization_metric=rmse,
29+
optimization_metric=RMSE,
3030
optimization_strategie: str = 'bayesian',
3131
time_limit_in_sec: int = 2700,
3232
num_evaluations: int = 500,
@@ -228,7 +228,7 @@ def get_best_prediction_model(train: pd.DataFrame,
228228
def get_best_recommender_model(train: pd.DataFrame,
229229
validation: pd.DataFrame = None,
230230
cs: ConfigurationSpace = None,
231-
optimization_metric=ndcg,
231+
optimization_metric=NDCG,
232232
optimization_strategie: str = 'bayesian',
233233
time_limit_in_sec: int = 2700,
234234
num_evaluations: int = 500,

lkauto/optimization_strategies/bayesian_optimization.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
import pandas as pd
22
from ConfigSpace import Configuration, ConfigurationSpace
3-
from smac.facade.smac_hpo_facade import SMAC4HPO
4-
from smac.scenario.scenario import Scenario
3+
from smac.facade import HyperparameterOptimizationFacade as SMAC4HPO
4+
from smac.scenario import Scenario
55

66
from lkauto.explicit.explicit_evaler import ExplicitEvaler
77
from lkauto.implicit.implicit_evaler import ImplicitEvaler

lkauto/utils/get_default_configuration_space.py

Lines changed: 24 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,10 @@
11
import pandas as pd
2+
3+
from typing import Iterator, Union
24
from ConfigSpace import Categorical
35
from ConfigSpace import ConfigurationSpace
6+
from lenskit.data import Dataset
7+
from lenskit.splitting import TTSplit
48

59
from lkauto.algorithms.als import BiasedMF
610
from lkauto.algorithms.als import ImplicitMF
@@ -11,13 +15,13 @@
1115
from lkauto.algorithms.user_knn import UserUser
1216

1317

14-
def get_default_configuration_space(data: pd.DataFrame,
18+
def get_default_configuration_space(data: Union[Dataset, Iterator[TTSplit]],
1519
val_fold_indices,
1620
feedback: str,
17-
validation: pd.DataFrame = None,
21+
validation: Iterator[TTSplit] = None,
1822
random_state=42) -> ConfigurationSpace:
1923
"""
20-
returns the default configuration space for all included rating predictions algorithms
24+
returns the default configuration space for all included rating prediction algorithms
2125
2226
Parameters
2327
----------
@@ -42,6 +46,7 @@ def get_default_configuration_space(data: pd.DataFrame,
4246

4347
# get minimum number of items and users for the given train split
4448

49+
"""
4550
num_items = 0
4651
num_users = 0
4752
if validation is None:
@@ -57,6 +62,22 @@ def get_default_configuration_space(data: pd.DataFrame,
5762
num_items = data['item'].nunique()
5863
if data['user'].nunique() < num_users or num_users == 0:
5964
num_users = data['user'].nunique()
65+
"""
66+
67+
num_items = 0
68+
num_users = 0
69+
70+
if validation is None and not isinstance(data, Dataset):
71+
for fold in data:
72+
if fold.train.item_count < num_items or num_items == 0:
73+
num_items = fold.train.item_count
74+
if fold.train.user_count < num_users or num_users == 0:
75+
num_users = fold.train.user_count
76+
else:
77+
if data.item_count < num_items or num_items == 0:
78+
num_items = data.item_count
79+
if data.user_count < num_users or num_users == 0:
80+
num_users = data.user_count
6081

6182
# define configuration space
6283
cs = ConfigurationSpace(

tests/utils/test_get_default_configuration_space.py

Lines changed: 22 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
import pandas as pd
66

77
from ConfigSpace import ConfigurationSpace
8+
from lenskit.data import from_interactions_df, DatasetBuilder
89

910
from lkauto.utils.get_default_configuration_space import get_default_configuration_space
1011
from lkauto.utils.validation_split import validation_split
@@ -16,12 +17,28 @@ class TestGetDefaultConfigurationSpace(unittest.TestCase):
1617

1718
def setUp(self):
1819
self.random_state = 42
19-
self.df = pd.DataFrame(np.ones((100, 3)), columns=["user", "item", "rating", ])
20-
self.val_fold_indices = validation_split(data=self.df, random_state=42)
20+
self.df = pd.DataFrame(np.array([[1,1,1],
21+
[1,2,2],
22+
[1,3,3],
23+
[2,1,1],
24+
[2,2,2],
25+
[2,3,3],
26+
[3,1,1],
27+
[3,2,2],
28+
[3,3,3],
29+
[4,1,1],
30+
[4,2,2],
31+
[4,3,3],
32+
[5,1,1],
33+
[5,2,2],
34+
[5,3,3]]), columns=["user", "item", "rating", ])
35+
# self.df = pd.DataFrame(np.ones((100, 3), dtype=int), columns=["user", "item", "rating", ])
36+
self.ds = from_interactions_df(self.df, user_col="user", item_col="item", rating_col="rating")
37+
self.val_fold_indices = validation_split(data=self.ds, random_state=42)
2138

2239
def test_getDefaultConfigurationSpace_givenInvalidFeedback_valueErrorThrown(self):
2340
with self.assertRaises(ValueError) as cm:
24-
get_default_configuration_space(feedback="", data=self.df,
41+
get_default_configuration_space(feedback="", data=self.ds,
2542
val_fold_indices=self.val_fold_indices,
2643
random_state=self.random_state)
2744
self.assertEqual("Unknown feedback type: ", cm.exception.args[0])
@@ -34,7 +51,7 @@ def test_getDefaultConfigurationSpace_givenInvalidFeedback_valueErrorThrown(self
3451
def test_getDefaultConfigurationSpace_GivenImplicitAndValidInputs_CorrectConfigSpaceReturnedExpected(self):
3552
algorithm_list_string = 'ItemItem, FunkSVD, UserUser, ImplicitMF, BiasedSVD'
3653

37-
result = get_default_configuration_space(feedback='implicit', data=self.df,
54+
result = get_default_configuration_space(feedback='implicit', data=self.ds,
3855
val_fold_indices=self.val_fold_indices,
3956
random_state=self.random_state)
4057

@@ -50,7 +67,7 @@ def test_getDefaultConfigurationSpace_GivenImplicitAndValidInputs_CorrectConfigS
5067
def test_getDefaultConfigurationSpace_GivenExplicitAndValidInputs_CorrectConfigSpaceReturnedExpected(self):
5168
algorithm_list_string = 'ItemItem, UserUser, FunkSVD, BiasedSVD, ALSBiasedMF, Bias'
5269

53-
result = get_default_configuration_space(feedback='explicit', data=self.df,
70+
result = get_default_configuration_space(feedback='explicit', data=self.ds,
5471
val_fold_indices=self.val_fold_indices,
5572
random_state=self.random_state)
5673

0 commit comments

Comments
 (0)