1
1
# -*- coding: utf-8 -*-
2
2
"""
3
- Created on Mon Apr 24 23:27:29 2023
3
+ Created on Mon Apr 24, 2023
4
+ Updated on Wen Apr 26, 2023
4
5
5
- @author: Usuario
6
+ @author: Andrés Segura-Tinoco
6
7
"""
7
8
8
9
# Import Custom libraries
9
10
from util import files as ufl
10
11
11
12
# Import ML libraries
12
- import pandas as pd
13
+ import time
13
14
import numpy as np
14
15
import optuna
15
16
import lightgbm as lgb
@@ -21,7 +22,6 @@ def load_dataset():
21
22
filepath = "../../../data/dataset.csv"
22
23
label_column = "label"
23
24
dataset = ufl .get_df_from_csv (filepath )
24
- print ("len:" , len (dataset ))
25
25
26
26
# Features (X) and labels (y)
27
27
X = dataset .drop (label_column , axis = 1 ).values
@@ -36,33 +36,41 @@ def load_dataset():
36
36
def objective (trial ):
37
37
data , target = load_dataset ()
38
38
39
- train_x , valid_x , train_y , valid_y = train_test_split (data , target , test_size = 0.2 )
40
- dtrain = lgb .Dataset (train_x , label = train_y )
39
+ X_train , X_test , y_train , y_test = train_test_split (data , target , test_size = 0.2 )
40
+ dtrain = lgb .Dataset (X_train , label = y_train )
41
41
42
42
param = {
43
43
"objective" : "binary" ,
44
44
"metric" : "binary_logloss" ,
45
45
"verbosity" : - 1 ,
46
46
"boosting_type" : "gbdt" ,
47
+ "seed" : 42 ,
48
+ "learning_rate" : trial .suggest_float ("learning_rate" , 1e-2 , 1 ),
49
+ "n_estimators" : trial .suggest_int ("n_estimators" , 150 , 300 ),
50
+ "num_leaves" : trial .suggest_int ("num_leaves" , 20 , 3000 , step = 20 ),
51
+ "max_depth" : trial .suggest_int ("max_depth" , 2 , 10 ),
52
+ "min_data_in_leaf" : trial .suggest_int ("min_data_in_leaf" , 5 , 50 ),
53
+ "max_bin" : trial .suggest_int ("max_bin" , 100 , 300 ),
47
54
"lambda_l1" : trial .suggest_float ("lambda_l1" , 1e-8 , 10.0 , log = True ),
48
55
"lambda_l2" : trial .suggest_float ("lambda_l2" , 1e-8 , 10.0 , log = True ),
49
- "num_leaves " : trial .suggest_int ( "num_leaves " , 2 , 256 ),
56
+ "min_gain_to_split " : trial .suggest_float ( "min_gain_to_split " , 0 , 10 ),
50
57
"feature_fraction" : trial .suggest_float ("feature_fraction" , 0.4 , 1.0 ),
51
58
"bagging_fraction" : trial .suggest_float ("bagging_fraction" , 0.4 , 1.0 ),
52
59
"bagging_freq" : trial .suggest_int ("bagging_freq" , 1 , 7 ),
53
- "min_child_samples" : trial .suggest_int ("min_child_samples" , 5 , 100 ),
54
60
}
55
61
56
62
gbm = lgb .train (param , dtrain )
57
- preds = gbm .predict (valid_x )
58
- pred_labels = np .rint (preds )
59
- accuracy = sklearn .metrics .accuracy_score (valid_y , pred_labels )
63
+ preds = gbm .predict (X_test )
64
+ y_pred = np .rint (preds )
65
+ accuracy = sklearn .metrics .accuracy_score (y_test , y_pred )
60
66
61
67
return accuracy
62
68
63
69
if __name__ == "__main__" :
70
+ start_time = time .time ()
64
71
study = optuna .create_study (direction = "maximize" )
65
- study .optimize (objective , n_trials = 100 )
72
+ study .optimize (objective , n_trials = 200 )
73
+ elapsed_time = (time .time () - start_time )
66
74
67
75
print ("Number of finished trials: {}" .format (len (study .trials )))
68
76
@@ -74,3 +82,5 @@ def objective(trial):
74
82
print (" Params: " )
75
83
for key , value in trial .params .items ():
76
84
print (" {}: {}" .format (key , value ))
85
+
86
+ print ("Elapsed time:" , elapsed_time )
0 commit comments