Ray consecutive (#101)

kengz · web-flow · commit 6f0330028688 · 2018-03-04T12:13:50.000-05:00
* fix ray cleanup issue to prevent consecutive run issue

* update a2c spec

* prepare benchmark

* change fitness marker opacity
diff --git a/slm_lab/experiment/analysis.py b/slm_lab/experiment/analysis.py
@@ -156,7 +156,7 @@ def plot_experiment(experiment_spec, experiment_df):
                 x=guard_cat_x, xaxis=f'x{col_idx+1}',
                 showlegend=False, mode='markers',
                 marker={
-                    'symbol': 'circle-open-dot', 'color': experiment_df['fitness'],
+                    'symbol': 'circle-open-dot', 'color': experiment_df['fitness'], 'opacity': 0.5,
                     # dump first quarter of colorscale that is too bright
                     'cmin': min_fitness - 0.25 * (max_fitness - min_fitness), 'cmax': max_fitness,
                     'colorscale': 'YIGnBu', 'reversescale': True
diff --git a/slm_lab/experiment/control.py b/slm_lab/experiment/control.py
@@ -3,12 +3,12 @@
 Creates and controls the units of SLM lab: EvolutionGraph, Experiment, Trial, Session
 '''
 from copy import deepcopy
+from importlib import reload
 from slm_lab.agent import AgentSpace
 from slm_lab.env import EnvSpace
 from slm_lab.experiment import analysis, search
 from slm_lab.experiment.monitor import AEBSpace, InfoSpace
 from slm_lab.lib import logger, util, viz
-import importlib
 import numpy as np
 import os
 import pandas as pd
@@ -22,7 +22,7 @@ def init_thread_vars(spec, info_space, unit):
         info_space.tick(unit)
     if logger.to_init(spec, info_space):
         os.environ['PREPATH'] = analysis.get_prepath(spec, info_space)
-        importlib.reload(logger)
+        reload(logger)
 
 
 class Session:
@@ -172,6 +172,7 @@ def init_trial_and_run(self, spec, info_space):
         return trial_data
 
     def close(self):
+        reload(search)  # to fix ray consecutive run crash due to bad cleanup
         logger.info('Experiment done, closing.')
 
     def run(self):
diff --git a/slm_lab/experiment/search.py b/slm_lab/experiment/search.py
@@ -141,11 +141,11 @@ def generate_config(self):
     def run(self):
         '''
         Implement the main run_trial loop.
-        Remember to call ray init and disconnect before and after loop.
+        Remember to call ray init and cleanup before and after loop.
         '''
         ray.init()
         # loop for max_trial: generate_config(); run_trial.remote(config)
-        ray.disconnect()
+        ray.worker.cleanup()
         raise NotImplementedError
         return trial_data_dict
 
@@ -177,7 +177,7 @@ def run(self):
                 pending_ids.append(ray_id)
 
         trial_data_dict.update(get_ray_results(pending_ids, ray_id_to_config))
-        ray.disconnect()
+        ray.worker.cleanup()
         return trial_data_dict
 
 
@@ -286,5 +286,5 @@ def run(self):
                 population = algorithms.varAnd(
                     population, toolbox, cxpb=0.5, mutpb=0.5)
 
-        ray.disconnect()
+        ray.worker.cleanup()
         return trial_data_dict
diff --git a/slm_lab/spec/actor_critic.json b/slm_lab/spec/actor_critic.json
@@ -14,7 +14,7 @@
         "num_step_returns": 100,
         "training_frequency": 32,
         "training_iters_per_batch": 8,
-        "use_GAE": true,
+        "use_GAE": false,
         "policy_loss_weight": 1.0,
         "val_loss_weight": 1.0
       },
@@ -51,16 +51,16 @@
       "num": 1
     },
     "meta": {
-      "max_session": 2,
-      "max_trial": 200,
-      "search": "RandomSearch",
+      "max_session": 4,
+      "max_trial": null,
+      "search": "EvolutionarySearch",
+      "max_generation": 4,
       "train_mode": true
     },
     "search": {
       "agent": [{
         "algorithm": {
-          "gamma__uniform": [0.9, 0.9999],
-          "lambda__uniform": [0, 1]
+          "gamma__uniform": [0.9, 0.9999]
         },
         "net": {
           "hid_layers__choice": [[16], [64], [32, 16], [64, 32]],
@@ -69,9 +69,7 @@
           },
           "optim_critic": {
             "lr__uniform": [0.001, 0.2]
-          },
-          "decay_lr_frequency__choice": [500, 1000, 5000, 10000],
-          "decay_lr_min_timestep__choice": [1000, 2000, 5000]
+          }
         }
       }]
     }
@@ -269,10 +267,10 @@
             [32, 16]
           ],
           "optim_actor": {
-            "lr__choice": [0.002, 0.02]
+            "lr__uniform": [0.002, 0.02]
           },
           "optim_critic": {
-            "lr__choice": [0.002, 0.02]
+            "lr__uniform": [0.002, 0.02]
           }
         }
       }]
diff --git a/slm_lab/spec/dqn.json b/slm_lab/spec/dqn.json
@@ -1,4 +1,71 @@
 {
+  "dqn_benchmark": {
+    "agent": [{
+      "name": "DQN",
+      "algorithm": {
+        "name": "DQN",
+        "action_policy": "boltzmann",
+        "action_policy_update": "linear_decay",
+        "explore_var_start": 1.5,
+        "explore_var_end": 0.3,
+        "explore_anneal_epi": 10,
+        "gamma": 0.999,
+        "training_epoch": 4,
+        "training_frequency": 10,
+        "training_iters_per_batch": 1,
+        "training_min_timestep": 10
+      },
+      "memory": {
+        "name": "Replay",
+        "max_size": 10000
+      },
+      "net": {
+        "type": "MLPNet",
+        "hid_layers": [64],
+        "hid_layers_activation": "sigmoid",
+        "optim": {
+          "name": "Adam",
+          "lr": 0.02
+        },
+        "batch_size": 32,
+        "decay_lr": true,
+        "decay_lr_frequency": 400,
+        "decay_lr_min_timestep": 1400,
+        "update_type": "replace",
+        "update_frequency": 1,
+        "polyak_weight": 0.9
+      }
+    }],
+    "env": [{
+      "name": "CartPole-v0",
+      "max_timestep": null,
+      "max_episode": 200
+    }],
+    "body": {
+      "product": "outer",
+      "num": 1
+    },
+    "meta": {
+      "max_session": 4,
+      "max_trial": null,
+      "search": "EvolutionarySearch",
+      "max_generation": 4,
+      "train_mode": true
+    },
+    "search": {
+      "agent": [{
+        "algorithm": {
+          "explore_anneal_epi__randint": [10, 60]
+        },
+        "net": {
+          "hid_layers__choice": [[16], [64], [32, 16], [64, 32]],
+          "optim": {
+            "lr__uniform": [0.001, 0.2]
+          }
+        }
+      }]
+    }
+  },
   "dqn_cartpole": {
     "agent": [{
       "name": "DQN",
diff --git a/slm_lab/spec/reinforce.json b/slm_lab/spec/reinforce.json
@@ -39,9 +39,10 @@
       "num": 1
     },
     "meta": {
-      "max_session": 2,
-      "max_trial": 200,
-      "search": "RandomSearch",
+      "max_session": 4,
+      "max_trial": null,
+      "search": "EvolutionarySearch",
+      "max_generation": 4,
       "train_mode": true
     },
     "search": {
@@ -52,10 +53,8 @@
         "net": {
           "hid_layers__choice": [[16], [64], [32, 16], [64, 32]],
           "optim": {
-            "lr__uniform": [0.001, 0.1]
-          },
-          "decay_lr_frequency__choice": [500, 1000, 5000, 10000],
-          "decay_lr_min_timestep__choice": [1000, 2000, 5000]
+            "lr__uniform": [0.001, 0.2]
+          }
         }
       }]
     }