rsteca · amarrerod · May 15, 2019 · May 15, 2019 · May 15, 2019 · May 31, 2019
diff --git a/README.md b/README.md
@@ -20,8 +20,6 @@ or clone the repo and just type the following on your shell:
 Usage examples
 --------------
 
-
-
 Example of usage:
 
 ```python
@@ -103,3 +101,71 @@ score_results = (({'x': 1.0, 'y': -1.0, 'z': True}, 1.1353352832366128),
  ({'x': 0.0, 'y': 0.0, 'z': True}, 2.0),
  ({'x': 0.0, 'y': -1.0, 'z': True}, 2.0))
 ```
+
+Using Keras model 
+
+```python
+from evolutionary_search import EvolutionaryAlgorithmSearchCV
+import numpy as np
+import tensorflow as tf
+from tensorflow import keras
+import random
+
+SEED = 13
+np.random.seed(SEED)
+tf.random.set_seed(SEED)
+random.seed(SEED)
+
+#...
+# LOAD YOUR DATA, PREPROCESS AND SPLIT IT
+# ...
+
+def build_model(n_hidden=1, n_neurons=30, learning_rate=0.05, input_shape=[9], activation="relu"):
+    model = keras.models.Sequential()
+    options = {"input_shape": input_shape}
+    for layer in range(n_hidden):
+        model.add(keras.layers.Dense(
+            n_neurons, activation=activation, **options))
+        options = {}
+
+    model.add(keras.layers.Dense(13, activation="softmax", **options))
+    s = 20 * len(X_train) // 32
+    learning_rate_fn = keras.optimizers.schedules.ExponentialDecay(
+        learning_rate, s, 0.1)
+    optimizer = keras.optimizers.SGD(learning_rate_fn)
+    model.compile(loss="sparse_categorical_crossentropy",
+                  optimizer=optimizer, metrics=["accuracy"])
+    return model
+
+# Create a keras wrapper for scikit-sklearn using the build_model function
+model = keras.wrappers.scikit_learn.KerasClassifier(build_model)
+
+# Extra params for fit method must be passed to __init__ in recent version of scikit-sklearn
+fit_params = {
+    "epochs": 300,
+    "validation_data": (X_valid, y_valid),
+    "callbacks": [keras.callbacks.EarlyStopping(patience=10),
+                keras.callbacks.ModelCheckpoint(model_filename,
+                                                save_best_only=True)]
+    }
+params = {
+        "n_hidden": [2, 3],
+        "n_neurons": np.arange(10, 500),
+        "learning_rate": [0.001, 0.025, 0.03]
+    }
+search = EvolutionaryAlgorithmSearchCV(estimator=model,
+                                        params=params,
+                                        scoring="accuracy",
+                                        cv=cv,
+                                        verbose=0,
+                                        population_size=50,
+                                        gene_mutation_prob=0.10,
+                                        gene_crossover_prob=0.5,
+                                        tournament_size=3,
+                                        generations_number=5,
+                                        fit_params=fit_params)
+
+search.fit(X_train, y_train)
+print(f"Best score found is: {search.best_score_}")
+print(f"Best params are: {search.best_params_}")
+```
diff --git a/evolutionary_search/cv.py b/evolutionary_search/cv.py
@@ -13,6 +13,7 @@
 from sklearn.metrics.scorer import check_scoring
 from sklearn.utils.validation import _num_samples, indexable
 
+
 def enum(**enums):
     return type('Enum', (), enums)
 
@@ -97,7 +98,8 @@ def _evalFunction(individual, name_values, X, y, scorer, cv, iid, fit_params,
         score = score_cache[paramkey]
     else:
         for train, test in cv.split(X, y):
-            assert len(train) > 0 and len(test) > 0, "Training and/or testing not long enough for evaluation."
+            assert len(train) > 0 and len(
+                test) > 0, "Training and/or testing not long enough for evaluation."
             _score = _fit_and_score(estimator=individual.est, X=X, y=y, scorer=scorer,
                                     train=train, test=test, verbose=verbose,
                                     parameters=parameters, fit_params=fit_params,
@@ -292,7 +294,7 @@ def __init__(self, estimator, params, scoring=None, cv=4,
                  n_jobs=1, iid=True, error_score='raise',
                  fit_params={}):
         super(EvolutionaryAlgorithmSearchCV, self).__init__(
-            estimator=estimator, scoring=scoring, fit_params=fit_params,
+            estimator=estimator, scoring=scoring,
             iid=iid, refit=refit, cv=cv, verbose=verbose,
             error_score=error_score)
         self.params = params
@@ -309,8 +311,10 @@ def __init__(self, estimator, params, scoring=None, cv=4,
         self.best_params_ = None
         self.score_cache = {}
         self.n_jobs = n_jobs
+        self.fit_params = fit_params
         creator.create("FitnessMax", base.Fitness, weights=(1.0,))
-        creator.create("Individual", list, est=clone(self.estimator), fitness=creator.FitnessMax)
+        creator.create("Individual", list, est=clone(
+            self.estimator), fitness=creator.FitnessMax)
 
     @property
     def possible_params(self):
@@ -322,28 +326,34 @@ def cv_results_(self):
         if self._cv_results is None:  # This is to cache the answer until updated
             # Populate output and return
             # If not already fit, returns an empty dictionary
-            possible_params = self.possible_params  # Pre-load property for use in this function
+            # Pre-load property for use in this function
+            possible_params = self.possible_params
             out = defaultdict(list)
             for p, gen in enumerate(self.all_history_):
                 # Get individuals and indexes, their list of scores,
                 # and additionally the name_values for this set of parameters
 
                 idxs, individuals, each_scores = zip(*[(idx, indiv, np.mean(indiv.fitness.values))
-                                                for idx, indiv in list(gen.genealogy_history.items())
-                                                if indiv.fitness.valid and not np.all(np.isnan(indiv.fitness.values))])
+                                                       for idx, indiv in list(gen.genealogy_history.items())
+                                                       if indiv.fitness.valid and not np.all(np.isnan(indiv.fitness.values))])
 
                 name_values, _, _ = _get_param_types_maxint(possible_params[p])
 
                 # Add to output
                 out['param_index'] += [p] * len(idxs)
                 out['index'] += idxs
                 out['params'] += [_individual_to_params(indiv, name_values)
-                                for indiv in individuals]
-                out['mean_test_score'] += [np.nanmean(scores) for scores in each_scores]
-                out['std_test_score'] += [np.nanstd(scores) for scores in each_scores]
-                out['min_test_score'] += [np.nanmin(scores) for scores in each_scores]
-                out['max_test_score'] += [np.nanmax(scores) for scores in each_scores]
-                out['nan_test_score?'] += [np.any(np.isnan(scores)) for scores in each_scores]
+                                  for indiv in individuals]
+                out['mean_test_score'] += [np.nanmean(scores)
+                                           for scores in each_scores]
+                out['std_test_score'] += [np.nanstd(scores)
+                                          for scores in each_scores]
+                out['min_test_score'] += [np.nanmin(scores)
+                                          for scores in each_scores]
+                out['max_test_score'] += [np.nanmax(scores)
+                                          for scores in each_scores]
+                out['nan_test_score?'] += [np.any(np.isnan(scores))
+                                           for scores in each_scores]
             self._cv_results = out
 
         return self._cv_results
@@ -384,15 +394,19 @@ def _fit(self, X, y, parameter_dict):
 
         toolbox = base.Toolbox()
 
-        name_values, gene_type, maxints = _get_param_types_maxint(parameter_dict)
+        name_values, gene_type, maxints = _get_param_types_maxint(
+            parameter_dict)
         if self.gene_type is None:
             self.gene_type = gene_type
 
         if self.verbose:
-            print("Types %s and maxint %s detected" % (self.gene_type, maxints))
+            print("Types %s and maxint %s detected" %
+                  (self.gene_type, maxints))
 
-        toolbox.register("individual", _initIndividual, creator.Individual, maxints=maxints)
-        toolbox.register("population", tools.initRepeat, list, toolbox.individual)
+        toolbox.register("individual", _initIndividual,
+                         creator.Individual, maxints=maxints)
+        toolbox.register("population", tools.initRepeat,
+                         list, toolbox.individual)
 
         # If n_jobs is an int, greater than 1 or less than 0 (indicating to use as
         # many jobs as possible) then we are going to create a default pool.
@@ -416,18 +430,22 @@ def _fit(self, X, y, parameter_dict):
             try:
                 toolbox.register("map", self.n_jobs)
             except Exception:
-                raise TypeError("n_jobs must be either an integer or map function. Received: {}".format(type(self.n_jobs)))
+                raise TypeError("n_jobs must be either an integer or map function. Received: {}".format(
+                    type(self.n_jobs)))
 
         toolbox.register("evaluate", _evalFunction,
                          name_values=name_values, X=X, y=y,
                          scorer=self.scorer_, cv=cv, iid=self.iid, verbose=self.verbose,
                          error_score=self.error_score, fit_params=self.fit_params,
                          score_cache=self.score_cache)
 
-        toolbox.register("mate", _cxIndividual, indpb=self.gene_crossover_prob, gene_type=self.gene_type)
+        toolbox.register("mate", _cxIndividual,
+                         indpb=self.gene_crossover_prob, gene_type=self.gene_type)
 
-        toolbox.register("mutate", _mutIndividual, indpb=self.gene_mutation_prob, up=maxints)
-        toolbox.register("select", tools.selTournament, tournsize=self.tournament_size)
+        toolbox.register("mutate", _mutIndividual,
+                         indpb=self.gene_mutation_prob, up=maxints)
+        toolbox.register("select", tools.selTournament,
+                         tournsize=self.tournament_size)
 
         pop = toolbox.population(n=self.population_size)
         hof = tools.HallOfFame(1)
@@ -446,7 +464,8 @@ def _fit(self, X, y, parameter_dict):
         hist.update(pop)
 
         if self.verbose:
-            print('--- Evolve in {0} possible combinations ---'.format(np.prod(np.array(maxints) + 1)))
+            print(
+                '--- Evolve in {0} possible combinations ---'.format(np.prod(np.array(maxints) + 1)))
 
         pop, logbook = algorithms.eaSimple(pop, toolbox, cxpb=0.5, mutpb=0.2,
                                            ngen=self.generations_number, stats=stats,

diff --git a/setup.py b/setup.py
@@ -25,9 +25,9 @@
     package_dir={'': '.'},
     packages=find_packages('.'),
     install_requires=[
-        'numpy>=1.9.3',
-        'scipy>=0.16.0',
-        'deap>=1.0.2',
+        'numpy>=1.16.4',
+        'scipy>=1.3.0',
+        'deap>=1.2.2',
         'scikit-learn>=0.18.0',
     ],
 )