Skip to content

Commit 7e020bd

Browse files
author
Max
committed
Updated implicit and explicit evaler. Saves with the correct metric-name now
1 parent 209187e commit 7e020bd

File tree

3 files changed

+68
-62
lines changed

3 files changed

+68
-62
lines changed

lkauto/explicit/explicit_evaler.py

Lines changed: 27 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -86,7 +86,7 @@ def __init__(self,
8686
frac=self.split_frac,
8787
random_state=self.random_state)
8888
else:
89-
self.train_test_splits = iter([TTSplit(train, validation)])
89+
self.train_test_splits = None
9090

9191
def evaluate(self, config_space: ConfigurationSpace) -> float:
9292
""" evaluates model defined in config_space
@@ -112,54 +112,40 @@ def evaluate(self, config_space: ConfigurationSpace) -> float:
112112
# get model from configuration space
113113
model = get_model_from_cs(config_space, feedback='explicit')
114114

115-
'''
116-
# loop over validation folds
117-
for fold in range(self.split_folds):
118-
if self.validation is None:
119-
# get validation split by fold index
120-
validation_train = self.train.loc[self.train_test_splits[fold]["train"], :]
121-
validation_test = self.train.loc[self.train_test_splits[fold]["validation"], :]
122-
else:
123-
validation_train = self.train
124-
validation_test = self.validation
125-
126-
# split validation data into X and y
127-
x_validation_test = validation_test.copy()
128-
y_validation_test = validation_test.copy()
129-
130-
# process validation split
131-
x_validation_test = x_validation_test.drop('rating', inplace=False, axis=1)
132-
y_validation_test = y_validation_test[['rating']].iloc[:, 0]
133-
115+
if self.validation is None:
116+
for fold in self.train_test_splits:
117+
validation_train = self.train_test_splits.train
118+
validation_test = self.train_test_splits.test
134119

135-
# fit and predict model from configuration
136-
model.fit(validation_train)
137-
predictions = model.predict(x_validation_test)
138-
predictions.index = x_validation_test.index
120+
pipeline = predict_pipeline(scorer=model)
121+
fit_pipeline = pipeline.clone()
122+
fit_pipeline.train(data=validation_train)
139123

140-
# calculate error_metric and append to numpy array
141-
error_metric = np.append(error_metric,
142-
self.optimization_metric(predictions, y_validation_test, missing='ignore'))
124+
recs = predict(fit_pipeline, validation_test)
143125

144-
validation_data = pd.concat([validation_data, predictions], axis=0)
145-
'''
126+
run_analysis = RunAnalysis()
127+
run_analysis.add_metric(self.optimization_metric)
128+
error_results = run_analysis.measure(recs, validation_test)
146129

147-
for fold in self.train_test_splits:
148-
validation_train = fold.train
149-
validation_test = fold.test
130+
error_metric = np.append(error_metric, error_results)
131+
validation_data = pd.concat([validation_data, recs], ignore_index=True)
132+
else:
133+
for fold in range(self.split_folds):
134+
validation_train = self.train
135+
validation_test = self.validation
150136

151-
pipeline = predict_pipeline(scorer=model)
152-
fit_pipeline = pipeline.clone()
153-
fit_pipeline.train(data=validation_train)
137+
pipeline = predict_pipeline(scorer=model)
138+
fit_pipeline = pipeline.clone()
139+
fit_pipeline.train(data=validation_train)
154140

155-
recs = predict(fit_pipeline, validation_test.keys())
141+
recs = predict(fit_pipeline, validation_test)
156142

157-
run_analysis = RunAnalysis()
158-
run_analysis.add_metric(self.optimization_metric)
159-
error_results = run_analysis.measure(recs, validation_test)
143+
run_analysis = RunAnalysis()
144+
run_analysis.add_metric(self.optimization_metric)
145+
error_results = run_analysis.measure(recs, validation_test)
160146

161-
error_metric = np.append(error_metric, error_results)
162-
validation_data = pd.concat([validation_data, recs], ignore_index=True)
147+
error_metric = np.append(error_metric, error_results)
148+
validation_data = pd.concat([validation_data, recs], ignore_index=True)
163149

164150
# Save validation data for reproducibility and ensembling
165151
self.top_n_runs = update_top_n_runs(config_space=config_space,

lkauto/implicit/implicit_evaler.py

Lines changed: 40 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
from lenskit import batch
77
from lenskit.data import Dataset
88
from lenskit.metrics import RunAnalysis
9+
from lenskit.splitting import TTSplit
910
import logging
1011

1112
from lenskit.pipeline import predict_pipeline
@@ -110,33 +111,52 @@ def evaluate(self, config_space: ConfigurationSpace) -> float:
110111
# get model form configuration space
111112
model = get_model_from_cs(config_space, feedback='implicit')
112113

113-
# iterate over validation folds
114-
for fold in range(self.split_folds):
115-
# get validation split by index
116-
if self.validation is None:
117-
validation_train = self.val_fold_indices[fold].train
118-
validation_test = self.val_fold_indices[fold].test
119-
else:
114+
if self.validation is None:
115+
for fold in self.val_fold_indices:
116+
validation_train = fold.train
117+
validation_test = fold.test
118+
119+
pipeline = predict_pipeline(scorer=model)
120+
fit_pipeline = pipeline.clone()
121+
fit_pipeline.train(validation_train)
122+
123+
recs = predict(fit_pipeline, validation_test)
124+
125+
# create rec list analysis
126+
rla = RunAnalysis()
127+
rla.add_metric(self.optimization_metric)
128+
129+
# compute scores
130+
scores = rla.measure(recs, validation_test)
131+
132+
# store data
133+
validation_data = pd.concat([validation_data, recs.to_df()], axis=0)
134+
# the first (index 0) column should contain the means for the metrics (rows)
135+
metric_scores = np.append(metric_scores, scores.list_summary()[self.optimization_metric.__name__].iloc[0])
136+
else:
137+
for fold in range(self.split_folds):
120138
validation_train = self.train
121139
validation_test = self.validation
122140

123-
pipeline = predict_pipeline(scorer=model)
124-
fit_pipeline = pipeline.clone()
125-
fit_pipeline.train(validation_train)
141+
pipeline = predict_pipeline(scorer=model)
142+
fit_pipeline = pipeline.clone()
143+
fit_pipeline.train(validation_train)
144+
145+
recs = predict(fit_pipeline, validation_test)
126146

127-
recs = predict(fit_pipeline, validation_test.keys())
147+
# create rec list analysis
148+
rla = RunAnalysis()
149+
rla.add_metric(self.optimization_metric)
128150

129-
# create rec list analysis
130-
rla = RunAnalysis()
131-
rla.add_metric(self.optimization_metric)
151+
# compute scores
152+
scores = rla.measure(recs, validation_test)
132153

133-
# compute scores
134-
scores = rla.measure(recs, validation_test)
154+
# store data
155+
validation_data = pd.concat([validation_data, recs.to_df()], axis=0)
156+
# the first (index 0) column should contain the means for the metrics (rows)
157+
metric_scores = np.append(metric_scores,
158+
scores.list_summary()[self.optimization_metric.__name__].iloc[0])
135159

136-
# store data
137-
validation_data = pd.concat([validation_data, recs], axis=0)
138-
# the first (index 0) column should contain the means for the metrics (rows)
139-
metric_scores = np.append(metric_scores, scores.list_summary()[self.optimization_metric.__name].iloc[0])
140160

141161
# save validation data
142162
self.filer.save_validataion_data(config_space=config_space,

lkauto/lkauto.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -231,7 +231,7 @@ def get_best_recommender_model(train: Dataset,
231231
cs: ConfigurationSpace = None,
232232
optimization_metric=NDCG,
233233
optimization_strategie: str = 'bayesian',
234-
time_limit_in_sec: int = 2700,
234+
time_limit_in_sec: int = 600,
235235
num_evaluations: int = 500,
236236
random_state=None,
237237
split_folds: int = 1,

0 commit comments

Comments
 (0)