@@ -86,7 +86,7 @@ def __init__(self,
86
86
frac = self .split_frac ,
87
87
random_state = self .random_state )
88
88
else :
89
- self .train_test_splits = iter ([ TTSplit ( train , validation )])
89
+ self .train_test_splits = None
90
90
91
91
def evaluate (self , config_space : ConfigurationSpace ) -> float :
92
92
""" evaluates model defined in config_space
@@ -112,54 +112,40 @@ def evaluate(self, config_space: ConfigurationSpace) -> float:
112
112
# get model from configuration space
113
113
model = get_model_from_cs (config_space , feedback = 'explicit' )
114
114
115
- '''
116
- # loop over validation folds
117
- for fold in range(self.split_folds):
118
- if self.validation is None:
119
- # get validation split by fold index
120
- validation_train = self.train.loc[self.train_test_splits[fold]["train"], :]
121
- validation_test = self.train.loc[self.train_test_splits[fold]["validation"], :]
122
- else:
123
- validation_train = self.train
124
- validation_test = self.validation
125
-
126
- # split validation data into X and y
127
- x_validation_test = validation_test.copy()
128
- y_validation_test = validation_test.copy()
129
-
130
- # process validation split
131
- x_validation_test = x_validation_test.drop('rating', inplace=False, axis=1)
132
- y_validation_test = y_validation_test[['rating']].iloc[:, 0]
133
-
115
+ if self .validation is None :
116
+ for fold in self .train_test_splits :
117
+ validation_train = self .train_test_splits .train
118
+ validation_test = self .train_test_splits .test
134
119
135
- # fit and predict model from configuration
136
- model.fit(validation_train)
137
- predictions = model.predict(x_validation_test)
138
- predictions.index = x_validation_test.index
120
+ pipeline = predict_pipeline (scorer = model )
121
+ fit_pipeline = pipeline .clone ()
122
+ fit_pipeline .train (data = validation_train )
139
123
140
- # calculate error_metric and append to numpy array
141
- error_metric = np.append(error_metric,
142
- self.optimization_metric(predictions, y_validation_test, missing='ignore'))
124
+ recs = predict (fit_pipeline , validation_test )
143
125
144
- validation_data = pd.concat([validation_data, predictions], axis=0)
145
- '''
126
+ run_analysis = RunAnalysis ()
127
+ run_analysis .add_metric (self .optimization_metric )
128
+ error_results = run_analysis .measure (recs , validation_test )
146
129
147
- for fold in self .train_test_splits :
148
- validation_train = fold .train
149
- validation_test = fold .test
130
+ error_metric = np .append (error_metric , error_results )
131
+ validation_data = pd .concat ([validation_data , recs ], ignore_index = True )
132
+ else :
133
+ for fold in range (self .split_folds ):
134
+ validation_train = self .train
135
+ validation_test = self .validation
150
136
151
- pipeline = predict_pipeline (scorer = model )
152
- fit_pipeline = pipeline .clone ()
153
- fit_pipeline .train (data = validation_train )
137
+ pipeline = predict_pipeline (scorer = model )
138
+ fit_pipeline = pipeline .clone ()
139
+ fit_pipeline .train (data = validation_train )
154
140
155
- recs = predict (fit_pipeline , validation_test . keys () )
141
+ recs = predict (fit_pipeline , validation_test )
156
142
157
- run_analysis = RunAnalysis ()
158
- run_analysis .add_metric (self .optimization_metric )
159
- error_results = run_analysis .measure (recs , validation_test )
143
+ run_analysis = RunAnalysis ()
144
+ run_analysis .add_metric (self .optimization_metric )
145
+ error_results = run_analysis .measure (recs , validation_test )
160
146
161
- error_metric = np .append (error_metric , error_results )
162
- validation_data = pd .concat ([validation_data , recs ], ignore_index = True )
147
+ error_metric = np .append (error_metric , error_results )
148
+ validation_data = pd .concat ([validation_data , recs ], ignore_index = True )
163
149
164
150
# Save validation data for reproducibility and ensembling
165
151
self .top_n_runs = update_top_n_runs (config_space = config_space ,
0 commit comments