Skip to content

Commit 8ebcf48

Browse files
authored
Merge pull request #73 from argrecsys/dev
Dev - Model Optimizer (with Optuna)
2 parents cdf9b90 + 1ac379f commit 8ebcf48

File tree

1 file changed

+22
-12
lines changed

1 file changed

+22
-12
lines changed

code/ArgumentClassifier/src/model_optimizer.py

Lines changed: 22 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,16 @@
11
# -*- coding: utf-8 -*-
22
"""
3-
Created on Mon Apr 24 23:27:29 2023
3+
Created on Mon Apr 24, 2023
4+
Updated on Wen Apr 26, 2023
45
5-
@author: Usuario
6+
@author: Andrés Segura-Tinoco
67
"""
78

89
# Import Custom libraries
910
from util import files as ufl
1011

1112
# Import ML libraries
12-
import pandas as pd
13+
import time
1314
import numpy as np
1415
import optuna
1516
import lightgbm as lgb
@@ -21,7 +22,6 @@ def load_dataset():
2122
filepath = "../../../data/dataset.csv"
2223
label_column = "label"
2324
dataset = ufl.get_df_from_csv(filepath)
24-
print("len:", len(dataset))
2525

2626
# Features (X) and labels (y)
2727
X = dataset.drop(label_column, axis=1).values
@@ -36,33 +36,41 @@ def load_dataset():
3636
def objective(trial):
3737
data, target = load_dataset()
3838

39-
train_x, valid_x, train_y, valid_y = train_test_split(data, target, test_size=0.2)
40-
dtrain = lgb.Dataset(train_x, label=train_y)
39+
X_train, X_test, y_train, y_test = train_test_split(data, target, test_size=0.2)
40+
dtrain = lgb.Dataset(X_train, label=y_train)
4141

4242
param = {
4343
"objective": "binary",
4444
"metric": "binary_logloss",
4545
"verbosity": -1,
4646
"boosting_type": "gbdt",
47+
"seed": 42,
48+
"learning_rate": trial.suggest_float("learning_rate", 1e-2, 1),
49+
"n_estimators": trial.suggest_int("n_estimators", 150, 300),
50+
"num_leaves": trial.suggest_int("num_leaves", 20, 3000, step=20),
51+
"max_depth": trial.suggest_int("max_depth", 2, 10),
52+
"min_data_in_leaf": trial.suggest_int("min_data_in_leaf", 5, 50),
53+
"max_bin": trial.suggest_int("max_bin", 100, 300),
4754
"lambda_l1": trial.suggest_float("lambda_l1", 1e-8, 10.0, log=True),
4855
"lambda_l2": trial.suggest_float("lambda_l2", 1e-8, 10.0, log=True),
49-
"num_leaves": trial.suggest_int("num_leaves", 2, 256),
56+
"min_gain_to_split": trial.suggest_float("min_gain_to_split", 0, 10),
5057
"feature_fraction": trial.suggest_float("feature_fraction", 0.4, 1.0),
5158
"bagging_fraction": trial.suggest_float("bagging_fraction", 0.4, 1.0),
5259
"bagging_freq": trial.suggest_int("bagging_freq", 1, 7),
53-
"min_child_samples": trial.suggest_int("min_child_samples", 5, 100),
5460
}
5561

5662
gbm = lgb.train(param, dtrain)
57-
preds = gbm.predict(valid_x)
58-
pred_labels = np.rint(preds)
59-
accuracy = sklearn.metrics.accuracy_score(valid_y, pred_labels)
63+
preds = gbm.predict(X_test)
64+
y_pred = np.rint(preds)
65+
accuracy = sklearn.metrics.accuracy_score(y_test, y_pred)
6066

6167
return accuracy
6268

6369
if __name__ == "__main__":
70+
start_time = time.time()
6471
study = optuna.create_study(direction="maximize")
65-
study.optimize(objective, n_trials=100)
72+
study.optimize(objective, n_trials=200)
73+
elapsed_time = (time.time() - start_time)
6674

6775
print("Number of finished trials: {}".format(len(study.trials)))
6876

@@ -74,3 +82,5 @@ def objective(trial):
7482
print(" Params: ")
7583
for key, value in trial.params.items():
7684
print(" {}: {}".format(key, value))
85+
86+
print("Elapsed time:", elapsed_time)

0 commit comments

Comments
 (0)