diff --git a/scene/.gitignore b/scene/.gitignore
new file mode 100644
index 00000000..a5c0b714
--- /dev/null
+++ b/scene/.gitignore
@@ -0,0 +1,156 @@
+# 缓存文件
+*.cache
+*.tmp
+*.log
+*.swp
+*.swo
+__pycache__/
+*.pyc
+.DS_Store
+Thumbs.db
+
+# 输出文件
+*.out
+*.o
+*.obj
+*.class
+*.bin
+*.dll
+*.so
+*.a
+*.lib
+*.exe
+*.msi
+*.msp
+*.pdb
+*.ilk
+
+# 编译器生成的文件
+*.sln
+*.suo
+*.vcxproj
+*.vcxproj.filters
+*.user
+*.vs
+*.vspscc
+*.vssscc
+*.csproj
+*.csproj.user
+*.csproj.vspscc
+*.csproj.vssscc
+
+# 二进制文件
+*.bin
+*.dll
+*.exe
+*.msi
+*.msp
+*.pdb
+*.ilk
+*.so
+*.a
+*.lib
+*.o
+*.obj
+
+# 特定工具或框架的缓存文件
+node_modules/
+bower_components/
+dist/
+build/
+out/
+target/
+
+# 特定语言的缓存文件
+# Python
+__pycache__/
+*.pyc
+*.pyo
+*.pyd
+*.egg
+*.egg-info/
+*.egg-link
+*.pydevproject
+*.pyc
+*.pyo
+*.pyd
+*.sqlite3
+
+# C/C++
+*.o
+*.obj
+*.a
+*.lib
+*.so
+*.dll
+*.exe
+*.pdb
+*.ilk
+
+# Java
+*.class
+*.jar
+*.war
+*.ear
+*.class
+*.java~
+*.class~
+*.jar~
+*.war~
+*.ear~
+
+# JavaScript
+node_modules/
+*.js.map
+*.min.js
+*.min.js.map
+
+# MATLAB
+*.mat
+*.fig
+*.mex*
+*.dll
+*.so
+*.a
+*.lib
+
+# 编辑器和 IDE 的临时文件
+*.swp
+*.swo
+*.swn
+*.swo~
+*.swp~
+*.swo~
+*.swn~
+.DS_Store
+Thumbs.db
+*.bak
+*.tmp
+*.log
+*.cache
+*.session
+*.sublime-workspace
+*.sublime-project
+*.vscode/
+*.vscode-insiders/
+*.vscode-remote/
+*.vscode-workspace
+*.vscode-insiders-workspace
+*.vscode-remote-workspace
+
+# 其他
+*.dat
+*.bin
+*.bak
+*.tmp
+*.log
+*.cache
+*.session
+*.sublime-workspace
+*.sublime-project
+*.vscode/
+*.vscode-insiders/
+*.vscode-remote/
+*.vscode-workspace
+*.vscode-insiders-workspace
+*.vscode-remote-workspace
\ No newline at end of file
diff --git a/scene/safebench/README.md b/scene/safebench/README.md
new file mode 100644
index 00000000..6ee3469f
--- /dev/null
+++ b/scene/safebench/README.md
@@ -0,0 +1,87 @@
+# SafeBench (简化版)
+
+本目录仅保留了本项目 (ChatScene) 中使用到的 **SafeBench** 相关功能模块，以保证项目小巧、清晰。
+
+如果需要完整体验 SafeBench，请参考官方仓库：[SafeBench GitHub](https://github.com/TRI-ML/safebench)。
+
+---
+
+## 目录结构说明
+
+## 🚀 安装方法
+
+确保你已经安装了以下依赖：
+
+```bash
+pip install gym pygame
+ ```
+---
+
+## 核心文件功能说明
+
+- **`carla_runner.py`**  
+  直接连接 Carla 仿真器，快速加载静态地图、布置车辆，并执行预定义动作。  
+  主要用于 **直接测试简单场景**，无需 Scenic 脚本。
+
+- **`scenic_runner.py`**  
+  读取 `.scenic` 脚本文件（描述静态场景），通过 Scenic 编译后，自动在 Carla 中搭建对应场景并执行仿真。  
+  适合用于**自然语言转静态场景**的生成实验。
+
+- **`scenic_runner_dynamic.py`**  
+  支持动态场景（随时间变化的元素，如动态行人、车辆转向等），可以加载更复杂的 `.scenic` 动态脚本，在 Carla 中实时生成并控制场景。  
+  主要用于**自然语言生成动态场景**，并仿真运行。
+
+---
+
+## 使用方法
+
+1. **Carla环境初始化**
+   - 需要提前启动 Carla Server，确保版本为 0.9.15。
+   - 建议使用命令行启动：  
+     ```bash
+     ./CarlaUE4.sh -quality-level=Low
+     ```
+
+2. **运行 carla_runner.py**
+   - 示例命令：
+     ```bash
+     python safebench/carla_runner.py
+     ```
+   - 默认加载固定场景，可修改内部配置来指定地图、交通参与者等。
+
+3. **运行 scenic_runner.py**
+   - 先准备一个静态 `.scenic` 场景文件。
+   - 运行示例：
+     ```bash
+     python safebench/scenic_runner.py --scenario_file ./your_scenario.scenic
+     ```
+   - 程序会自动编译 Scenic 脚本并在 Carla 中布置场景。
+
+4. **运行 scenic_runner_dynamic.py**
+   - 适合用于需要生成动态行为的场景。
+   - 运行示例：
+     ```bash
+     python safebench/scenic_runner_dynamic.py --scenario_file ./your_dynamic_scenario.scenic
+     ```
+   - 可以控制仿真时间、动作脚本等。
+
+---
+scenario
+
+## 注意事项
+
+- 本项目只保留了必要模块，因此**不支持** SafeBench 原版完整功能（如 Benchmark评估、对抗攻击等）。
+- 仅适配 **Carla 0.9.15 + Python 3.7/3.8** 环境。
+- Scenic 脚本需符合正确的语法规范，否则可能编译失败。
+
+---
+
+## 参考资料
+
+- [SafeBench 官方仓库](https://github.com/TRI-ML/safebench)
+- [CARLA Simulator](https://carla.org/)
+- [Scenic Language 官方文档](https://scenic-lang.readthedocs.io/)
+
+---
+
+> 本目录仅作为 ChatScene 项目的辅助模块，建议如需进一步扩展测试功能，可参考原版 SafeBench 框架进行完善。
diff --git a/scene/safebench/carla_runner.py b/scene/safebench/carla_runner.py
new file mode 100644
index 00000000..51d60717
--- /dev/null
+++ b/scene/safebench/carla_runner.py
@@ -0,0 +1,437 @@
+''' 
+Date: 2023-01-31 22:23:17
+LastEditTime: 2023-04-03 22:35:17
+Description: 
+    Copyright (c) 2022-2023 Safebench Team
+
+    This work is licensed under the terms of the MIT license.
+    For a copy, see <https://opensource.org/licenses/MIT>
+'''
+import os
+import copy
+import glob
+
+import numpy as np
+import carla
+import pygame
+from tqdm import tqdm
+
+from safebench.gym_carla.env_wrapper import VectorWrapper
+from safebench.gym_carla.envs.render import BirdeyeRender
+from safebench.gym_carla.replay_buffer import RouteReplayBuffer, PerceptionReplayBuffer
+
+from safebench.agent import AGENT_POLICY_LIST
+from safebench.scenario import SCENARIO_POLICY_LIST
+
+from safebench.scenario.scenario_manager.carla_data_provider import CarlaDataProvider
+from safebench.scenario.scenario_data_loader import ScenarioDataLoader
+from safebench.scenario.tools.scenario_utils import scenario_parse
+
+from safebench.util.logger import Logger, setup_logger_kwargs
+from safebench.util.metric_util import get_route_scores, get_perception_scores
+
+
+class CarlaRunner:
+    def __init__(self, agent_config, scenario_config):
+        self.scenario_config = scenario_config
+        self.agent_config = agent_config
+
+        self.seed = scenario_config['seed']
+        self.exp_name = scenario_config['exp_name']
+        self.output_dir = scenario_config['output_dir']
+        self.mode = scenario_config['mode']
+        self.save_video = scenario_config['save_video']
+
+        self.render = scenario_config['render']
+        self.num_scenario = scenario_config['num_scenario']
+        self.fixed_delta_seconds = scenario_config['fixed_delta_seconds']
+        self.scenario_category = scenario_config['scenario_category']
+
+        # continue training flag
+        self.continue_agent_training = scenario_config['continue_agent_training']
+        self.continue_scenario_training = scenario_config['continue_scenario_training']
+
+        # apply settings to carla
+        self.client = carla.Client('localhost', scenario_config['port'])
+        self.client.set_timeout(10.0)
+        self.world = None
+        self.env = None
+
+        self.env_params = {
+            'auto_ego': scenario_config['auto_ego'],
+            'obs_type': agent_config['obs_type'],
+            'scenario_category': self.scenario_category,
+            'ROOT_DIR': scenario_config['ROOT_DIR'],
+            'warm_up_steps': 9,                                        # number of ticks after spawning the vehicles
+            'disable_lidar': True,                                     # show bird-eye view lidar or not
+            'display_size': 128,                                       # screen size of one bird-eye view window
+            'obs_range': 32,                                           # observation range (meter)
+            'd_behind': 12,                                            # distance behind the ego vehicle (meter)
+            'max_past_step': 1,                                        # the number of past steps to draw
+            'discrete': False,                                         # whether to use discrete control space
+            'discrete_acc': [-3.0, 0.0, 3.0],                          # discrete value of accelerations
+            'discrete_steer': [-0.2, 0.0, 0.2],                        # discrete value of steering angles
+            'continuous_accel_range': [-3.0, 3.0],                     # continuous acceleration range
+            'continuous_steer_range': [-0.3, 0.3],                     # continuous steering angle range
+            'max_episode_step': scenario_config['max_episode_step'],   # maximum timesteps per episode
+            'max_waypt': 12,                                           # maximum number of waypoints
+            'lidar_bin': 0.125,                                        # bin size of lidar sensor (meter)
+            'out_lane_thres': 4,                                       # threshold for out of lane (meter)
+            'desired_speed': 8,                                        # desired speed (m/s)
+            'image_sz': 1024,                                          # TODO: move to config of od scenario
+        }
+
+        # pass config from scenario to agent
+        agent_config['mode'] = scenario_config['mode']
+        agent_config['ego_action_dim'] = scenario_config['ego_action_dim']
+        agent_config['ego_state_dim'] = scenario_config['ego_state_dim']
+        agent_config['ego_action_limit'] = scenario_config['ego_action_limit']
+
+        # define logger
+        logger_kwargs = setup_logger_kwargs(
+            self.exp_name, 
+            self.output_dir, 
+            self.seed,
+            agent=agent_config['policy_type'],
+            scenario=scenario_config['policy_type'],
+            scenario_category=self.scenario_category
+        )
+        self.logger = Logger(**logger_kwargs)
+        
+        # prepare parameters
+        if self.mode == 'train_agent':
+            self.buffer_capacity = agent_config['buffer_capacity']
+            self.eval_in_train_freq = agent_config['eval_in_train_freq']
+            self.save_freq = agent_config['save_freq']
+            self.train_episode = agent_config['train_episode']
+            self.current_episode = -1
+            self.logger.save_config(agent_config)
+            self.logger.create_training_dir()
+        elif self.mode == 'train_scenario':
+            self.buffer_capacity = scenario_config['buffer_capacity']
+            self.eval_in_train_freq = scenario_config['eval_in_train_freq']
+            self.save_freq = scenario_config['save_freq']
+            self.train_episode = scenario_config['train_episode']
+            self.logger.save_config(scenario_config)
+            self.logger.create_training_dir()
+        elif self.mode == 'eval':
+            self.save_freq = scenario_config['save_freq']
+            self.logger.log('>> Evaluation Mode, skip config saving', 'yellow')
+            self.logger.create_eval_dir(load_existing_results=True)
+        else:
+            raise NotImplementedError(f"Unsupported mode: {self.mode}.")
+
+        # define agent and scenario
+        self.logger.log('>> Agent Policy: ' + agent_config['policy_type'])
+        self.logger.log('>> Scenario Policy: ' + scenario_config['policy_type'])
+
+        if self.scenario_config['auto_ego']:
+            self.logger.log('>> Using auto-polit for ego vehicle, action of policy will be ignored', 'yellow')
+        if scenario_config['policy_type'] == 'ordinary' and self.mode != 'train_agent':
+            self.logger.log('>> Ordinary scenario can only be used in agent training', 'red')
+            raise Exception()
+        self.logger.log('>> ' + '-' * 40)
+
+        # define agent and scenario policy
+        self.agent_policy = AGENT_POLICY_LIST[agent_config['policy_type']](agent_config, logger=self.logger)
+        self.scenario_policy = SCENARIO_POLICY_LIST[scenario_config['policy_type']](scenario_config, logger=self.logger)
+        if self.save_video:
+            assert self.mode == 'eval', "only allow video saving in eval mode"
+            self.logger.init_video_recorder()
+
+    def _init_world(self, town):
+        self.logger.log(f">> Initializing carla world: {town}")
+        self.world = self.client.load_world(town)
+        settings = self.world.get_settings()
+        settings.synchronous_mode = True
+        settings.fixed_delta_seconds = self.fixed_delta_seconds
+        self.world.apply_settings(settings)
+        CarlaDataProvider.set_client(self.client)
+        CarlaDataProvider.set_world(self.world)
+        CarlaDataProvider.set_traffic_manager_port(self.scenario_config['tm_port'])
+        self.world.set_weather(carla.WeatherParameters.ClearNoon)
+
+    def _init_renderer(self):
+        self.logger.log(">> Initializing pygame birdeye renderer")
+        pygame.init()
+        flag = pygame.HWSURFACE | pygame.DOUBLEBUF
+        if not self.render:
+            flag = flag | pygame.HIDDEN
+        if self.scenario_category == 'planning': 
+            # [bird-eye view, Lidar, front view] or [bird-eye view, front view]
+            if self.env_params['disable_lidar']:
+                window_size = (self.env_params['display_size'] * 2, self.env_params['display_size'] * self.num_scenario)
+            else:
+                window_size = (self.env_params['display_size'] * 3, self.env_params['display_size'] * self.num_scenario)
+        else:
+            window_size = (self.env_params['display_size'], self.env_params['display_size'] * self.num_scenario)
+        self.display = pygame.display.set_mode(window_size, flag)
+
+        # initialize the render for generating observation and visualization
+        pixels_per_meter = self.env_params['display_size'] / self.env_params['obs_range']
+        pixels_ahead_vehicle = (self.env_params['obs_range'] / 2 - self.env_params['d_behind']) * pixels_per_meter
+        self.birdeye_params = {
+            'screen_size': [self.env_params['display_size'], self.env_params['display_size']],
+            'pixels_per_meter': pixels_per_meter,
+            'pixels_ahead_vehicle': pixels_ahead_vehicle,
+        }
+        self.birdeye_render = BirdeyeRender(self.world, self.birdeye_params, logger=self.logger)
+
+    def train(self, data_loader, start_episode=0, replay_buffer = None):
+        # general buffer for both agent and scenario
+
+        for _ in tqdm(range(len(data_loader))):
+            self.current_episode += 1
+            if self.current_episode >= self.train_episode:
+                return
+            if self.current_episode < start_episode:
+                continue
+            # sample scenarios
+            sampled_scenario_configs, _ = data_loader.sampler()
+            # reset the index counter to create endless loader
+            # data_loader.reset_idx_counter()
+
+            # get static obs and then reset with init action 
+            static_obs = self.env.get_static_obs(sampled_scenario_configs)
+            self.scenario_policy.load_model(sampled_scenario_configs)
+            scenario_init_action, additional_dict = self.scenario_policy.get_init_action(static_obs)
+            try:
+                obs, infos = self.env.reset(sampled_scenario_configs, scenario_init_action)
+            except:
+                continue
+            replay_buffer.store_init([static_obs, scenario_init_action], additional_dict=additional_dict)
+
+            # get ego vehicle from scenario
+            self.agent_policy.set_ego_and_route(self.env.get_ego_vehicles(), infos)
+
+            # start loop
+            episode_reward = []
+            while not self.env.all_scenario_done():
+                # get action from agent policy and scenario policy (assume using one batch)
+                ego_actions = self.agent_policy.get_action(obs, infos, deterministic=False)
+                scenario_actions = self.scenario_policy.get_action(obs, infos, deterministic=False)
+
+                # apply action to env and get obs
+                next_obs, rewards, dones, infos = self.env.step(ego_actions=ego_actions, scenario_actions=scenario_actions)
+                replay_buffer.store([ego_actions, scenario_actions, obs, next_obs, rewards, dones], additional_dict=infos)
+                obs = copy.deepcopy(next_obs)
+                episode_reward.append(np.mean(rewards))
+
+            # train off-policy agent or scenario
+            if self.mode == 'train_agent' and self.agent_policy.type == 'offpolicy':
+                loss = self.agent_policy.train(replay_buffer)
+            elif self.mode == 'train_scenario' and self.scenario_policy.type == 'offpolicy':
+                self.scenario_policy.train(replay_buffer)
+                
+            score_function = get_route_scores if self.scenario_category in ['planning', 'scenic'] else get_perception_scores
+            all_scores = score_function(self.env.running_results)
+            
+            # end up environment
+            self.env.clean_up()
+            replay_buffer.finish_one_episode()
+            self.logger.add_training_results('episode', self.current_episode)
+            self.logger.add_training_results('episode_reward', np.sum(episode_reward))
+            for key, value in all_scores.items():
+                self.logger.add_training_results(key, value)
+
+            if loss is not None:
+                critic_loss, actor_loss = loss
+                self.logger.add_training_results('critic_loss', critic_loss)
+                self.logger.add_training_results('actor_loss', actor_loss)
+            else:
+                critic_loss, actor_loss = 0, 0
+                self.logger.add_training_results('critic_loss', critic_loss)
+                self.logger.add_training_results('actor_loss', actor_loss)  
+            self.logger.log(f">> Episode: {self.current_episode}, #buffer_len: {replay_buffer.buffer_len}, critic: {critic_loss:.3f}, actor: {actor_loss:.3f}")
+            self.logger.save_training_results()
+
+            # train on-policy agent or scenario
+            if self.mode == 'train_agent' and self.agent_policy.type == 'onpolicy':
+                self.agent_policy.train(replay_buffer)
+            elif self.mode == 'train_scenario' and self.scenario_policy.type in ['init_state', 'onpolicy']:
+                self.scenario_policy.train(replay_buffer)
+
+            # eval during training
+            if (self.current_episode+1) % self.eval_in_train_freq == 0:
+                #self.eval(env, data_loader)
+                pass
+
+            # save checkpoints
+            if (self.current_episode+1) % self.save_freq == 0:
+                if self.mode == 'train_agent':
+                    self.agent_policy.save_model(self.current_episode, replay_buffer)
+                if self.mode == 'train_scenario':
+                    self.scenario_policy.save_model(self.current_episode)
+            
+    def eval(self, data_loader):
+        num_finished_scenario = 0
+        data_loader.reset_idx_counter()
+        while len(data_loader) > 0:
+            # sample scenarios
+            sampled_scenario_configs, num_sampled_scenario = data_loader.sampler()
+            log_name = f'ROUTE-{sampled_scenario_configs[0].route_id-4}'
+            num_finished_scenario += num_sampled_scenario
+            
+            # reset envs with new config, get init action from scenario policy, and run scenario
+            static_obs = self.env.get_static_obs(sampled_scenario_configs)
+            self.scenario_policy.load_model(sampled_scenario_configs)
+            scenario_init_action, _ = self.scenario_policy.get_init_action(static_obs, deterministic=True)
+            try:
+                obs, infos = self.env.reset(sampled_scenario_configs, scenario_init_action)
+            except:
+                continue
+            # get ego vehicle from scenario
+            self.agent_policy.set_ego_and_route(self.env.get_ego_vehicles(), infos)
+
+            score_list = {s_i: [] for s_i in range(num_sampled_scenario)}
+            while not self.env.all_scenario_done():
+                # get action from agent policy and scenario policy (assume using one batch)
+                ego_actions = self.agent_policy.get_action(obs, infos, deterministic=True)
+                scenario_actions = self.scenario_policy.get_action(obs, infos, deterministic=True)
+
+                # apply action to env and get obs
+                obs, rewards, _, infos = self.env.step(ego_actions=ego_actions, scenario_actions=scenario_actions)
+
+                # save video
+                if self.save_video:
+                    if self.scenario_category == 'planning':
+                        self.logger.add_frame(pygame.surfarray.array3d(self.display).transpose(1, 0, 2))
+                    else:
+                        self.logger.add_frame({s_i['scenario_id']: ego_actions[n_i]['annotated_image'] for n_i, s_i in enumerate(infos)})
+
+                # accumulate scores of corresponding scenario
+                reward_idx = 0
+                for s_i in infos:
+                    score = rewards[reward_idx] if self.scenario_category == 'planning' else 1-infos[reward_idx]['iou_loss']
+                    score_list[s_i['scenario_id']].append(score)
+                    reward_idx += 1
+
+            # clean up all things
+            self.logger.log(">> All scenarios are completed. Clearning up all actors")
+            self.env.clean_up()
+
+            # save video
+            if self.save_video:
+                data_ids = [config.data_id for config in sampled_scenario_configs]
+                self.logger.save_video(data_ids=data_ids)
+
+            # print score for ranking
+            self.logger.log(f'[{num_finished_scenario}/{data_loader.num_total_scenario}] Ranking scores for batch scenario:', 'yellow')
+            for s_i in score_list.keys():
+                self.logger.log('\t Env id ' + str(s_i) + ': ' + str(np.mean(score_list[s_i])), 'yellow')
+
+                
+            # calculate evaluation results
+            score_function = get_route_scores if self.scenario_category == 'planning' else get_perception_scores
+            all_running_results = self.logger.add_eval_results(records=self.env.running_results)
+            all_scores = score_function(all_running_results)
+            self.logger.add_eval_results(scores=all_scores)
+            self.logger.print_eval_results()
+            if len(self.env.running_results) % self.save_freq == 0:
+                self.logger.save_eval_results(log_name)
+        self.logger.save_eval_results(log_name)
+
+    def run(self, test_epoch = None):
+        # get scenario data of different maps
+        config_by_map = scenario_parse(self.scenario_config, self.logger)
+        
+        map_keys = list(config_by_map.keys())
+        if self.mode == 'train_agent':
+            Buffer = RouteReplayBuffer if self.scenario_category == 'planning' else PerceptionReplayBuffer
+            replay_buffer = Buffer(self.num_scenario, self.mode, self.buffer_capacity)
+            map_keys = map_keys * 20
+            if self.continue_agent_training:
+                self.logger.load_training_results()
+                start_episode = self.check_continue_training(self.agent_policy, replay_buffer) + 1
+                if start_episode >= self.train_episode:
+                    return
+            else:
+                self.clean_cache(self.agent_policy.model_path)
+                start_episode = -1
+        else:
+            self.agent_policy.load_model(episode=test_epoch)
+            
+        for m_i in map_keys:
+            if self.mode == 'eval':
+                log_name = f'ROUTE-{config_by_map[m_i][0].route_id-4}'
+                if self.logger.check_eval_dir(log_name) == len(config_by_map[m_i]):
+                    self.logger.log(f">> This scenario and route have been done.")
+                    continue
+            elif self.mode == 'train_agent': 
+                if self.current_episode >= self.train_episode - 1:
+                    return
+                
+                if self.current_episode + len(config_by_map[m_i]) < start_episode:
+                    self.current_episode += len(config_by_map[m_i])
+                    continue
+                
+            # initialize map and render
+            try:
+                self._init_world(m_i)
+                self._init_renderer()
+            except:
+                continue
+                
+            # create scenarios within the vectorized wrapper
+            self.env = VectorWrapper(
+                self.env_params, 
+                self.scenario_config, 
+                self.world, 
+                self.birdeye_render, 
+                self.display, 
+                self.logger
+            )
+
+            # prepare data loader and buffer
+            data_loader = ScenarioDataLoader(config_by_map[m_i], self.num_scenario, m_i, self.world)
+            
+            # run with different modes
+            
+            if self.mode == 'eval':
+#                 self.agent_policy.load_model(episode=test_epoch)
+                # self.scenario_policy.load_model()
+                self.agent_policy.set_mode('eval')
+                self.scenario_policy.set_mode('eval')
+                self.eval(data_loader)
+            elif self.mode == 'train_agent':
+#                 start_episode = self.check_continue_training(self.agent_policy)
+#                 self.scenario_policy.load_model()
+                self.agent_policy.set_mode('train')
+                self.scenario_policy.set_mode('eval')
+                self.train(data_loader, start_episode, replay_buffer)
+            elif self.mode == 'train_scenario':
+                start_episode = self.check_continue_training(self.scenario_policy)
+                self.agent_policy.load_model()
+                self.agent_policy.set_mode('eval')
+                self.scenario_policy.set_mode('train')
+                self.train(data_loader, start_episode)
+            else:
+                raise NotImplementedError(f"Unsupported mode: {self.mode}.")
+
+    def check_continue_training(self, policy, replay_buffer):
+        # load previous checkpoint
+        policy.load_model(replay_buffer = replay_buffer)
+        if policy.continue_episode == 0:
+            start_episode = 0
+            self.logger.log('>> Previous checkpoint not found. Training from scratch.')
+        else:
+            start_episode = policy.continue_episode
+            self.logger.log(f'>> Continue training from previous checkpoint, epoch: {start_episode}.')
+        return start_episode
+    
+    def clean_cache(self, path):
+        # Get a list of all files in directory
+        all_files = glob.glob(os.path.join(path, '*'))
+
+        # Specify the file to keep
+        file_to_keep = os.path.join(path, 'model.sac.-001.torch')
+
+        # Remove all files except the one to keep
+        for file in all_files:
+            if file != file_to_keep:
+                os.remove(file)
+
+    def close(self):
+        pygame.quit() 
+        if self.env:
+            self.env.clean_up()
diff --git a/scene/safebench/scenic_runner.py b/scene/safebench/scenic_runner.py
new file mode 100644
index 00000000..096c1c72
--- /dev/null
+++ b/scene/safebench/scenic_runner.py
@@ -0,0 +1,552 @@
+''' 
+Date: 2023-01-31 22:23:17
+LastEditTime: 2023-03-07 12:28:17
+Description: 
+    Copyright (c) 2022-2023 Safebench Team
+
+    This work is licensed under the terms of the MIT license.
+    For a copy, see <https://opensource.org/licenses/MIT>
+'''
+
+import copy
+import os
+import json
+import glob
+import random
+
+import numpy as np
+import carla
+import pygame
+from tqdm import tqdm
+
+from safebench.gym_carla.env_wrapper import VectorWrapper
+from safebench.gym_carla.envs.render import BirdeyeRender
+from safebench.gym_carla.replay_buffer import RouteReplayBuffer, PerceptionReplayBuffer
+
+from safebench.agent import AGENT_POLICY_LIST
+from safebench.scenario import SCENARIO_POLICY_LIST
+
+from safebench.scenario.scenario_manager.carla_data_provider import CarlaDataProvider
+from safebench.scenario.scenario_data_loader import ScenarioDataLoader, ScenicDataLoader
+from safebench.scenario.tools.scenario_utils import scenario_parse, scenic_parse
+
+from safebench.util.logger import Logger, setup_logger_kwargs
+from safebench.util.metric_util import get_route_scores, get_perception_scores
+from safebench.util.scenic_utils import ScenicSimulator
+
+class ScenicRunner:
+    def __init__(self, agent_config, scenario_config):
+        self.scenario_config = scenario_config
+        self.agent_config = agent_config
+
+        self.seed = scenario_config['seed']
+        self.exp_name = scenario_config['exp_name']
+        self.output_dir = scenario_config['output_dir']
+        self.mode = scenario_config['mode']
+        self.save_video = scenario_config['save_video']
+
+        self.render = scenario_config['render']
+        self.num_scenario = scenario_config['num_scenario']
+        self.fixed_delta_seconds = scenario_config['fixed_delta_seconds']
+        self.scenario_category = scenario_config['scenario_category']
+
+        # continue training flag
+        self.continue_agent_training = scenario_config['continue_agent_training']
+        self.continue_scenario_training = scenario_config['continue_scenario_training']
+
+        # apply settings to carla
+        self.client = carla.Client('localhost', scenario_config['port'])
+        self.client.set_timeout(10.0)
+        self.world = None
+        self.env = None
+
+        self.env_params = {
+            'auto_ego': scenario_config['auto_ego'],
+            'obs_type': agent_config['obs_type'],
+            'scenario_category': self.scenario_category,
+            'ROOT_DIR': scenario_config['ROOT_DIR'],
+            'warm_up_steps': 9,                                        # number of ticks after spawning the vehicles
+            'disable_lidar': True,                                     # show bird-eye view lidar or not
+            'display_size': 128,                                       # screen size of one bird-eye view window
+            'obs_range': 32,                                           # observation range (meter)
+            'd_behind': 12,                                            # distance behind the ego vehicle (meter)
+            'max_past_step': 1,                                        # the number of past steps to draw
+            'discrete': False,                                         # whether to use discrete control space
+            'discrete_acc': [-3.0, 0.0, 3.0],                          # discrete value of accelerations
+            'discrete_steer': [-0.2, 0.0, 0.2],                        # discrete value of steering angles
+            'continuous_accel_range': [-3.0, 3.0],                     # continuous acceleration range
+            'continuous_steer_range': [-0.3, 0.3],                     # continuous steering angle range
+            'max_episode_step': scenario_config['max_episode_step'],   # maximum timesteps per episode
+            'max_waypt': 12,                                           # maximum number of waypoints
+            'lidar_bin': 0.125,                                        # bin size of lidar sensor (meter)
+            'out_lane_thres': 4,                                       # threshold for out of lane (meter)
+            'desired_speed': 8,                                        # desired speed (m/s)
+            'image_sz': 1024,                                          # TODO: move to config of od scenario
+        }
+
+
+        # pass config from scenario to agent
+        agent_config['mode'] = scenario_config['mode']
+        agent_config['ego_action_dim'] = scenario_config['ego_action_dim']
+        agent_config['ego_state_dim'] = scenario_config['ego_state_dim']
+        agent_config['ego_action_limit'] = scenario_config['ego_action_limit']
+
+        # define logger
+        logger_kwargs = setup_logger_kwargs(
+            self.exp_name, 
+            self.output_dir, 
+            self.seed,
+            agent=agent_config['policy_type'],
+            scenario=scenario_config['policy_type'],
+            scenario_category=self.scenario_category
+        )
+        self.logger = Logger(**logger_kwargs)
+        
+        # prepare parameters
+        if self.mode == 'train_agent':
+            self.buffer_capacity = agent_config['buffer_capacity']
+            self.eval_in_train_freq = agent_config['eval_in_train_freq']
+            self.save_freq = agent_config['save_freq']
+            self.train_episode = agent_config['train_episode']
+            self.current_episode = -1
+            self.logger.save_config(agent_config)
+            self.logger.create_training_dir()
+        elif self.mode == 'train_scenario':
+            self.save_freq = agent_config['save_freq']
+            self.logger.create_eval_dir(load_existing_results=False)
+        elif self.mode == 'eval':
+            self.save_freq = agent_config['save_freq']
+            self.logger.log('>> Evaluation Mode, skip config saving', 'yellow')
+            self.logger.create_eval_dir(load_existing_results=False)
+        else:
+            raise NotImplementedError(f"Unsupported mode: {self.mode}.")
+
+        # define agent and scenario
+        self.logger.log('>> Agent Policy: ' + agent_config['policy_type'])
+        self.logger.log('>> Scenario Policy: ' + scenario_config['policy_type'])
+
+        if self.scenario_config['auto_ego']:
+            self.logger.log('>> Using auto-polit for ego vehicle, action of policy will be ignored', 'yellow')
+        if scenario_config['policy_type'] == 'ordinary' and self.mode != 'train_agent':
+            self.logger.log('>> Ordinary scenario can only be used in agent training', 'red')
+            raise Exception()
+        self.logger.log('>> ' + '-' * 40)
+
+        # define agent and scenario policy
+        self.agent_policy = AGENT_POLICY_LIST[agent_config['policy_type']](agent_config, logger=self.logger)
+        self.scenario_policy = SCENARIO_POLICY_LIST[scenario_config['policy_type']](scenario_config, logger=self.logger)
+        if self.save_video:
+            assert self.mode == 'eval', "only allow video saving in eval mode"
+            self.logger.init_video_recorder()
+
+    def _init_world(self):
+        self.logger.log(">> Initializing carla world")
+        self.world = self.client.get_world()
+        settings = self.world.get_settings()
+        settings.synchronous_mode = True
+        settings.fixed_delta_seconds = self.fixed_delta_seconds
+        self.world.apply_settings(settings)
+        CarlaDataProvider.set_client(self.client)
+        CarlaDataProvider.set_world(self.world)
+        CarlaDataProvider.set_traffic_manager_port(self.scenario_config['tm_port'])
+        
+    def _init_scenic(self, config):
+        self.logger.log(f">> Initializing scenic simulator: {config.scenic_file}")
+        self.scenic = ScenicSimulator(config.scenic_file, config.extra_params)
+        
+    def _init_renderer(self):
+        self.logger.log(">> Initializing pygame birdeye renderer")
+        pygame.init()
+        flag = pygame.HWSURFACE | pygame.DOUBLEBUF
+        if not self.render:
+            flag = flag | pygame.HIDDEN
+        if self.scenario_category in ['planning', 'scenic']: 
+            # [bird-eye view, Lidar, front view] or [bird-eye view, front view]
+            if self.env_params['disable_lidar']:
+                window_size = (self.env_params['display_size'] * 2, self.env_params['display_size'] * self.num_scenario)
+            else:
+                window_size = (self.env_params['display_size'] * 3, self.env_params['display_size'] * self.num_scenario)
+        else:
+            window_size = (self.env_params['display_size'], self.env_params['display_size'] * self.num_scenario)
+        self.display = pygame.display.set_mode(window_size, flag)
+
+        # initialize the render for generating observation and visualization
+        pixels_per_meter = self.env_params['display_size'] / self.env_params['obs_range']
+        pixels_ahead_vehicle = (self.env_params['obs_range'] / 2 - self.env_params['d_behind']) * pixels_per_meter
+        self.birdeye_params = {
+            'screen_size': [self.env_params['display_size'], self.env_params['display_size']],
+            'pixels_per_meter': pixels_per_meter,
+            'pixels_ahead_vehicle': pixels_ahead_vehicle,
+        }
+        self.birdeye_render = BirdeyeRender(self.world, self.birdeye_params, logger=self.logger)
+
+    def run_scenes(self, scenes):
+        self.logger.log(f">> Begin to run the scene...")
+        ## currently there is only one scene in this list ##
+        for scene in scenes:
+            if self.scenic.setSimulation(scene):
+                self.scenic.update_behavior = self.scenic.runSimulation()
+                next(self.scenic.update_behavior)
+        
+    def train(self, data_loader, start_episode=0, replay_buffer = None):
+        # general buffer for both agent and scenario
+
+        for _ in tqdm(range(len(data_loader))):
+            self.current_episode += 1
+            if self.current_episode >= self.train_episode:
+                return
+            if self.current_episode < start_episode:
+                continue
+            # sample scenarios
+            sampled_scenario_configs, _ = data_loader.sampler()
+            # reset the index counter to create endless loader
+            # data_loader.reset_idx_counter()
+            
+            scenes = [config.scene for config in sampled_scenario_configs]
+            # begin to run the scene
+            self.run_scenes(scenes)
+            
+            # get static obs and then reset with init action 
+            static_obs = self.env.get_static_obs(sampled_scenario_configs)
+            self.scenario_policy.load_model(sampled_scenario_configs)
+            scenario_init_action, additional_dict = self.scenario_policy.get_init_action(static_obs)
+            obs, infos = self.env.reset(sampled_scenario_configs, scenario_init_action)
+            replay_buffer.store_init([static_obs, scenario_init_action], additional_dict=additional_dict)
+
+            # get ego vehicle from scenario
+            self.agent_policy.set_ego_and_route(self.env.get_ego_vehicles(), infos)
+
+            # start loop
+            episode_reward = []
+            while not self.env.all_scenario_done():
+                # get action from agent policy and scenario policy (assume using one batch)
+                ego_actions = self.agent_policy.get_action(obs, infos, deterministic=False)
+                scenario_actions = self.scenario_policy.get_action(obs, infos, deterministic=False)
+
+                # apply action to env and get obs
+                next_obs, rewards, dones, infos = self.env.step(ego_actions=ego_actions, scenario_actions=scenario_actions)
+                replay_buffer.store([ego_actions, scenario_actions, obs, next_obs, rewards, dones], additional_dict=infos)
+                obs = copy.deepcopy(next_obs)
+                episode_reward.append(np.mean(rewards))
+
+            # train off-policy agent or scenario
+            if self.mode == 'train_agent' and self.agent_policy.type == 'offpolicy':
+                loss = self.agent_policy.train(replay_buffer)
+            elif self.mode == 'train_scenario' and self.scenario_policy.type == 'offpolicy':
+                self.scenario_policy.train(replay_buffer)
+                
+            score_function = get_route_scores if self.scenario_category in ['planning', 'scenic'] else get_perception_scores
+            all_scores = score_function(self.env.running_results)
+
+            # end up environment
+            self.env.clean_up()
+            replay_buffer.finish_one_episode()
+            self.logger.add_training_results('episode', self.current_episode)
+            self.logger.add_training_results('episode_reward', np.sum(episode_reward))
+            for key, value in all_scores.items():
+                self.logger.add_training_results(key, value)
+            if loss is not None:
+                critic_loss, actor_loss = loss
+                self.logger.add_training_results('critic_loss', critic_loss)
+                self.logger.add_training_results('actor_loss', actor_loss)
+            else:
+                critic_loss, actor_loss = 0, 0
+                self.logger.add_training_results('critic_loss', critic_loss)
+                self.logger.add_training_results('actor_loss', actor_loss)  
+            self.logger.log(f">> Episode: {self.current_episode}, #buffer_len: {replay_buffer.buffer_len}, critic: {critic_loss:.3f}, actor: {actor_loss:.3f}")
+            self.logger.save_training_results()
+
+            # train on-policy agent or scenario
+            if self.mode == 'train_agent' and self.agent_policy.type == 'onpolicy':
+                self.agent_policy.train(replay_buffer)
+            elif self.mode == 'train_scenario' and self.scenario_policy.type in ['init_state', 'onpolicy']:
+                self.scenario_policy.train(replay_buffer)
+
+            # eval during training
+            if (self.current_episode+1) % self.eval_in_train_freq == 0:
+                #self.eval(env, data_loader)
+                pass
+
+            # save checkpoints
+            if (self.current_episode+1) % self.save_freq == 0:
+                if self.mode == 'train_agent':
+                    self.agent_policy.save_model(self.current_episode, replay_buffer)
+                if self.mode == 'train_scenario':
+                    self.scenario_policy.save_model(self.current_episode)
+        
+        self.scenic.destroy()
+        
+    def eval(self, data_loader, select = False):
+        num_finished_scenario = 0
+        data_loader.reset_idx_counter()
+        # recording the score and the id of corresponding selected scenes
+        map_id_score = {}
+        behavior_name = data_loader.behavior
+        route_id = data_loader.route_id
+        opt_step = data_loader.opt_step
+        opt_time = 0
+        
+        if route_id is None:
+            log_name = f'OPT_{behavior_name}'
+        else:
+            log_name = f'OPT_{behavior_name}_ROUTE-{route_id}'
+            
+        if select:
+            self.scene_map[log_name] = {}
+            self.scene_map[log_name][f'opt_time_{opt_time}'] = self.scenic.save_params()
+            
+        while len(data_loader) > 0:
+            # sample scenarios
+            sampled_scenario_configs, num_sampled_scenario = data_loader.sampler()
+            num_finished_scenario += num_sampled_scenario
+            assert num_sampled_scenario == 1, 'scenic can only run one scene at one time'
+            
+            scenes = [config.scene for config in sampled_scenario_configs]
+            # begin to run the scene
+            self.run_scenes(scenes)
+            
+            # reset envs with new config, get init action from scenario policy, and run scenario
+            static_obs = self.env.get_static_obs(sampled_scenario_configs)
+            self.scenario_policy.load_model(sampled_scenario_configs)
+            scenario_init_action, _ = self.scenario_policy.get_init_action(static_obs, deterministic=True)
+            obs, infos = self.env.reset(sampled_scenario_configs, scenario_init_action)
+
+            # get ego vehicle from scenario
+            self.agent_policy.set_ego_and_route(self.env.get_ego_vehicles(), infos)
+
+            score_list = {s_i: [] for s_i in range(num_sampled_scenario)}
+            while not self.env.all_scenario_done():
+                # get action from agent policy and scenario policy (assume using one batch)
+                ego_actions = self.agent_policy.get_action(obs, infos, deterministic=True)
+                scenario_actions = self.scenario_policy.get_action(obs, infos, deterministic=True)
+
+                # apply action to env and get obs
+                obs, rewards, _, infos = self.env.step(ego_actions=ego_actions, scenario_actions=scenario_actions)
+
+                # save video
+                if self.save_video:
+                    if self.scenario_category in ['planning', 'scenic']:
+                        self.logger.add_frame(pygame.surfarray.array3d(self.display).transpose(1, 0, 2))
+                    else:
+                        self.logger.add_frame({s_i['scenario_id']: ego_actions[n_i]['annotated_image'] for n_i, s_i in enumerate(infos)})
+
+                # accumulate scores of corresponding scenario
+                reward_idx = 0
+                for s_i in infos:
+                    score = rewards[reward_idx] if self.scenario_category in ['planning', 'scenic'] else 1-infos[reward_idx]['iou_loss']
+                    score_list[s_i['scenario_id']].append(score)
+                    reward_idx += 1
+
+            # clean up all things
+            self.logger.log(">> All scenarios are completed. Clearning up all actors")
+            self.env.clean_up()
+
+            # save video
+            if self.save_video:
+                data_ids = [config.data_id for config in sampled_scenario_configs]
+                self.logger.save_video(data_ids=data_ids, log_name= log_name)
+
+            # print score for ranking
+            self.logger.log(f'[{num_finished_scenario}/{data_loader.num_total_scenario}] Ranking scores for batch scenario:', color='yellow')
+            for s_i in score_list.keys():
+                self.logger.log('\t Env id ' + str(s_i) + ': ' + str(np.mean(score_list[s_i])), color='yellow')
+
+            # calculate evaluation results
+            score_function = get_route_scores if self.scenario_category in ['planning', 'scenic'] else get_perception_scores
+            all_running_results = self.logger.add_eval_results(records=self.env.running_results)
+            all_scores = score_function(all_running_results)
+            self.logger.add_eval_results(scores=all_scores)
+            self.logger.print_eval_results()
+            if len(self.env.running_results) % self.save_freq == 0:
+                self.logger.save_eval_results(log_name)
+                
+            if infos[0]['collision']:
+                self.scenic.record_params()
+            if select and (num_finished_scenario % opt_step == 0):
+                opt_time += 1
+                self.scenic.update_params()
+                self.scene_map[log_name][f'opt_time_{opt_time}'] = self.scenic.save_params()
+                data_loader.train_scene(opt_time)
+                
+        self.logger.save_eval_results(log_name)
+        
+        if select:
+            self.scene_map[log_name]['select_id'] = self.select_adv_scene(self.logger.eval_records, score_function, data_loader.select_num)
+            self.dump_scene_map(sampled_scenario_configs[0].scenario_id)
+            
+        self.logger.clear()
+        self.scenic.destroy()
+    
+    def select_adv_scene(self, results, score_function, select_num):
+        # define your own selection mechanism here
+        map_id_score_collision = {}
+        map_id_score_non_collision = {}
+        for i in results.keys():
+            score = score_function({i:results[i]})
+            if score['collision_rate'] == 1:
+                map_id_score_collision[i] = score['final_score']
+            else:
+                map_id_score_non_collision[i] = score['final_score']
+
+        # Sort the collision scenes by their scores
+        collision_scenes_sorted = sorted(map_id_score_collision.items(), key=lambda x: x[1])
+
+        # Get the number of scenes to select from the collision cases
+        num_collision_selected = min(select_num, len(collision_scenes_sorted))
+
+        # Select the lowest scored scenes with collision
+        selected_scene_id = [scene[0] for scene in collision_scenes_sorted[:num_collision_selected]]
+
+        # If not enough collision scenes, select remaining scenes
+        num_non_collision_selected = select_num - num_collision_selected
+        if num_non_collision_selected > 0:
+            # Sort the non-collision scenes by their scores
+            non_collision_scenes_sorted = sorted(map_id_score_non_collision.items(), key=lambda x: x[1])
+            # Select the lowest scored scenes from the non-collision cases
+            selected_scene_id.extend([scene[0] for scene in non_collision_scenes_sorted[:num_non_collision_selected]])
+        return sorted(selected_scene_id)
+
+    def run(self, test_epoch = None):
+        # get scenario data of different maps
+        config_list = scenic_parse(self.scenario_config, self.logger)
+        
+        
+        ### load rl model ##
+        if self.mode == 'train_scenario':
+            ## we only need the pretrained surrogate model here ##
+            pass
+        elif self.mode == 'train_agent':
+            ## initlize buffer ###
+            Buffer = RouteReplayBuffer if self.scenario_category in ['scenic', 'planning'] else PerceptionReplayBuffer
+            replay_buffer = Buffer(self.num_scenario, self.mode, self.buffer_capacity)
+            
+            ### repeat the training, 20 is just a random placeholder
+            config_list = config_list * 20
+            
+            ### check if resume ###
+            if self.continue_agent_training:
+                self.logger.load_training_results()
+                start_episode = self.check_continue_training(self.agent_policy, replay_buffer) + 1
+                if start_episode >= self.train_episode:
+                    return
+            else:
+                self.clean_cache(self.agent_policy.model_path)
+                start_episode = -1
+                
+        elif self.mode == 'eval':
+            ### load trained model for evaluation ###
+            if test_epoch:
+                self.agent_policy.load_model(episode=test_epoch)
+            
+        last_town = None
+        for config in config_list:
+            
+            ## set log name ##
+            if config.route_id is None:
+                log_name = f'OPT_{config.behavior}'
+            else:
+                log_name = f'OPT_{config.behavior}_ROUTE-{config.route_id}'
+            
+            ## check if all done ##
+            if self.mode == 'eval':
+                if self.logger.check_eval_dir(log_name) == config.select_num:
+                    self.logger.log(f">> This scenario and route have been done.")
+                    continue
+            elif self.mode == 'train_agent': 
+                if self.current_episode >= self.train_episode - 1:
+                    return
+                
+                if self.current_episode + config.select_num < start_episode:
+                    self.current_episode += config.select_num
+                    continue
+                
+            # initialize scenic
+            self._init_scenic(config)
+            
+            # initialize map and render
+            if last_town != config.extra_params['town']:
+                self._init_world()
+                self._init_renderer()
+                last_town = config.extra_params['town']
+            self.world.scenic = self.scenic
+                
+            # create scenarios within the vectorized wrapper
+            self.env = VectorWrapper(
+                self.env_params, 
+                self.scenario_config, 
+                self.world, 
+                self.birdeye_render, 
+                self.display, 
+                self.logger
+            )
+            
+            # prepare data loader and buffer
+            data_loader = ScenicDataLoader(self.scenic, config, self.num_scenario)
+            # run with different modes
+            
+            if self.mode == 'train_scenario':
+                ### select hard scenic scenario config on the surrogate model ###
+                self.scene_map = self.load_scene_map(config.scenario_id)
+                self.agent_policy.set_mode('eval')
+                self.scenario_policy.set_mode('eval')
+                self.eval(data_loader, select = True)
+            elif self.mode == 'train_agent':
+                ### train the surrogate model on the selected hard scenrios ###
+                self.agent_policy.set_mode('train')
+                self.scenario_policy.set_mode('eval')
+                self.train(data_loader, start_episode, replay_buffer)
+            elif self.mode == 'eval':
+                ### evaluate the trained agent on different test models ###
+                self.agent_policy.set_mode('eval')
+                self.scenario_policy.set_mode('eval')
+                self.eval(data_loader)
+            else:
+                raise NotImplementedError(f"Unsupported mode: {self.mode}.")
+
+    def check_continue_training(self, policy, replay_buffer):
+        # load previous checkpoint
+        policy.load_model(replay_buffer = replay_buffer)
+        if policy.continue_episode == 0:
+            start_episode = 0
+            self.logger.log('>> Previous checkpoint not found. Training from scratch.')
+        else:
+            start_episode = policy.continue_episode
+            self.logger.log(f'>> Continue training from previous checkpoint, epoch: {start_episode}.')
+        return start_episode
+    
+    def dump_scene_map(self, scenario_id):
+        # load previous checkpoint
+        scenic_dir = os.path.join(self.scenario_config['scenic_dir'], f'scenario_{scenario_id}')
+        f = open(os.path.join(scenic_dir, f"{scenic_dir.split('/')[-1]}.json"), 'w')
+        json_dumps_str = json.dumps(self.scene_map, indent=4)
+        print(json_dumps_str, file=f)
+        f.close()
+        
+    def load_scene_map(self, scenario_id):
+        # load previous checkpoint
+        scenic_dir = os.path.join(self.scenario_config['scenic_dir'], f'scenario_{scenario_id}')
+        try:
+            with open(os.path.join(scenic_dir, f"{scenic_dir.split('/')[-1]}.json"), 'r') as f:
+                data = json.loads(f.read())
+        except:
+            data = {}
+        return data
+
+    def clean_cache(self, path):
+        # Get a list of all files in directory
+        all_files = glob.glob(os.path.join(path, '*'))
+
+        # Specify the file to keep
+        file_to_keep = os.path.join(path, 'model.sac.-001.torch')
+
+        # Remove all files except the one to keep
+        for file in all_files:
+            if file != file_to_keep:
+                os.remove(file)
+                
+    def close(self):
+        pygame.quit() # close pygame renderer
+        if self.env:
+            self.env.clean_up()
+            
+    
diff --git a/scene/safebench/scenic_runner_dynamic.py b/scene/safebench/scenic_runner_dynamic.py
new file mode 100644
index 00000000..43773e50
--- /dev/null
+++ b/scene/safebench/scenic_runner_dynamic.py
@@ -0,0 +1,540 @@
+''' 
+Date: 2023-01-31 22:23:17
+LastEditTime: 2023-03-07 12:28:17
+Description: 
+    Copyright (c) 2022-2023 Safebench Team
+
+    This work is licensed under the terms of the MIT license.
+    For a copy, see <https://opensource.org/licenses/MIT>
+'''
+
+import copy
+import os
+import json
+import glob
+import random
+
+import numpy as np
+import carla
+import pygame
+from tqdm import tqdm
+
+from safebench.gym_carla.env_wrapper import VectorWrapper
+from safebench.gym_carla.envs.render import BirdeyeRender
+from safebench.gym_carla.replay_buffer import RouteReplayBuffer, PerceptionReplayBuffer
+
+from safebench.agent import AGENT_POLICY_LIST
+from safebench.scenario import SCENARIO_POLICY_LIST
+
+from safebench.scenario.scenario_manager.carla_data_provider import CarlaDataProvider
+from safebench.scenario.scenario_data_loader import ScenarioDataLoader, ScenicDataLoader
+from safebench.scenario.tools.scenario_utils import scenario_parse, dynamic_scenic_parse
+
+from safebench.util.logger import Logger, setup_logger_kwargs
+from safebench.util.metric_util import get_route_scores, get_perception_scores
+from safebench.util.scenic_utils import ScenicSimulator
+
+class ScenicRunner:
+    def __init__(self, agent_config, scenario_config):
+        self.scenario_config = scenario_config
+        self.agent_config = agent_config
+
+        self.seed = scenario_config['seed']
+        self.exp_name = scenario_config['exp_name']
+        self.output_dir = scenario_config['output_dir']
+        self.mode = scenario_config['mode']
+        self.save_video = scenario_config['save_video']
+
+        self.render = scenario_config['render']
+        self.num_scenario = scenario_config['num_scenario']
+        self.fixed_delta_seconds = scenario_config['fixed_delta_seconds']
+        self.scenario_category = scenario_config['scenario_category']
+
+        # continue training flag
+        self.continue_agent_training = scenario_config['continue_agent_training']
+        self.continue_scenario_training = scenario_config['continue_scenario_training']
+
+        # apply settings to carla
+        self.client = carla.Client('localhost', scenario_config['port'])
+        self.client.set_timeout(10.0)
+        self.world = None
+        self.env = None
+
+        self.env_params = {
+            'auto_ego': scenario_config['auto_ego'],
+            'obs_type': agent_config['obs_type'],
+            'scenario_category': self.scenario_category,
+            'ROOT_DIR': scenario_config['ROOT_DIR'],
+            'warm_up_steps': 9,                                        # number of ticks after spawning the vehicles
+            'disable_lidar': True,                                     # show bird-eye view lidar or not
+            'display_size': 128,                                       # screen size of one bird-eye view window
+            'obs_range': 32,                                           # observation range (meter)
+            'd_behind': 12,                                            # distance behind the ego vehicle (meter)
+            'max_past_step': 1,                                        # the number of past steps to draw
+            'discrete': False,                                         # whether to use discrete control space
+            'discrete_acc': [-3.0, 0.0, 3.0],                          # discrete value of accelerations
+            'discrete_steer': [-0.2, 0.0, 0.2],                        # discrete value of steering angles
+            'continuous_accel_range': [-3.0, 3.0],                     # continuous acceleration range
+            'continuous_steer_range': [-0.3, 0.3],                     # continuous steering angle range
+            'max_episode_step': scenario_config['max_episode_step'],   # maximum timesteps per episode
+            'max_waypt': 12,                                           # maximum number of waypoints
+            'lidar_bin': 0.125,                                        # bin size of lidar sensor (meter)
+            'out_lane_thres': 4,                                       # threshold for out of lane (meter)
+            'desired_speed': 8,                                        # desired speed (m/s)
+            'image_sz': 1024,                                          # TODO: move to config of od scenario
+        }
+
+
+        # pass config from scenario to agent
+        agent_config['mode'] = scenario_config['mode']
+        agent_config['ego_action_dim'] = scenario_config['ego_action_dim']
+        agent_config['ego_state_dim'] = scenario_config['ego_state_dim']
+        agent_config['ego_action_limit'] = scenario_config['ego_action_limit']
+
+        # define logger
+        logger_kwargs = setup_logger_kwargs(
+            self.exp_name, 
+            self.output_dir, 
+            self.seed,
+            agent=agent_config['policy_type'],
+            scenario=scenario_config['policy_type'],
+            scenario_category=self.scenario_category
+        )
+        self.logger = Logger(**logger_kwargs)
+        
+        # prepare parameters
+        if self.mode == 'train_agent':
+            self.buffer_capacity = agent_config['buffer_capacity']
+            self.eval_in_train_freq = agent_config['eval_in_train_freq']
+            self.save_freq = agent_config['save_freq']
+            self.train_episode = agent_config['train_episode']
+            self.current_episode = -1
+            self.logger.save_config(agent_config)
+            self.logger.create_training_dir()
+        elif self.mode == 'train_scenario':
+            self.save_freq = agent_config['save_freq']
+            self.logger.create_eval_dir(load_existing_results=False)
+        elif self.mode == 'eval':
+            self.save_freq = agent_config['save_freq']
+            self.logger.log('>> Evaluation Mode, skip config saving', 'yellow')
+            self.logger.create_eval_dir(load_existing_results=False)
+        else:
+            raise NotImplementedError(f"Unsupported mode: {self.mode}.")
+
+        # define agent and scenario
+        self.logger.log('>> Agent Policy: ' + agent_config['policy_type'])
+        self.logger.log('>> Scenario Policy: ' + scenario_config['policy_type'])
+
+        if self.scenario_config['auto_ego']:
+            self.logger.log('>> Using auto-polit for ego vehicle, action of policy will be ignored', 'yellow')
+        if scenario_config['policy_type'] == 'ordinary' and self.mode != 'train_agent':
+            self.logger.log('>> Ordinary scenario can only be used in agent training', 'red')
+            raise Exception()
+        self.logger.log('>> ' + '-' * 40)
+
+        # define agent and scenario policy
+        self.agent_policy = AGENT_POLICY_LIST[agent_config['policy_type']](agent_config, logger=self.logger)
+        self.scenario_policy = SCENARIO_POLICY_LIST[scenario_config['policy_type']](scenario_config, logger=self.logger)
+        if self.save_video:
+            assert self.mode == 'eval', "only allow video saving in eval mode"
+            self.logger.init_video_recorder()
+
+    def _init_world(self):
+        self.logger.log(">> Initializing carla world")
+        self.world = self.client.get_world()
+        settings = self.world.get_settings()
+        settings.synchronous_mode = True
+        settings.fixed_delta_seconds = self.fixed_delta_seconds
+        self.world.apply_settings(settings)
+        CarlaDataProvider.set_client(self.client)
+        CarlaDataProvider.set_world(self.world)
+        CarlaDataProvider.set_traffic_manager_port(self.scenario_config['tm_port'])
+        
+    def _init_scenic(self, config):
+        self.logger.log(f">> Initializing scenic simulator: {config.scenic_file}")
+        self.scenic = ScenicSimulator(config.scenic_file, config.extra_params)
+        
+    def _init_renderer(self):
+        self.logger.log(">> Initializing pygame birdeye renderer")
+        pygame.init()
+        flag = pygame.HWSURFACE | pygame.DOUBLEBUF
+        if not self.render:
+            flag = flag | pygame.HIDDEN
+        if self.scenario_category in ['planning', 'scenic']: 
+            # [bird-eye view, Lidar, front view] or [bird-eye view, front view]
+            if self.env_params['disable_lidar']:
+                window_size = (self.env_params['display_size'] * 2, self.env_params['display_size'] * self.num_scenario)
+            else:
+                window_size = (self.env_params['display_size'] * 3, self.env_params['display_size'] * self.num_scenario)
+        else:
+            window_size = (self.env_params['display_size'], self.env_params['display_size'] * self.num_scenario)
+        self.display = pygame.display.set_mode(window_size, flag)
+
+        # initialize the render for generating observation and visualization
+        pixels_per_meter = self.env_params['display_size'] / self.env_params['obs_range']
+        pixels_ahead_vehicle = (self.env_params['obs_range'] / 2 - self.env_params['d_behind']) * pixels_per_meter
+        self.birdeye_params = {
+            'screen_size': [self.env_params['display_size'], self.env_params['display_size']],
+            'pixels_per_meter': pixels_per_meter,
+            'pixels_ahead_vehicle': pixels_ahead_vehicle,
+        }
+        self.birdeye_render = BirdeyeRender(self.world, self.birdeye_params, logger=self.logger)
+
+    def run_scenes(self, scenes):
+        self.logger.log(f">> Begin to run the scene...")
+        ## currently there is only one scene in this list ##
+        for scene in scenes:
+            if self.scenic.setSimulation(scene):
+                self.scenic.update_behavior = self.scenic.runSimulation()
+                next(self.scenic.update_behavior)
+        
+    def train(self, data_loader, start_episode=0, replay_buffer = None):
+        # general buffer for both agent and scenario
+
+        for _ in tqdm(range(len(data_loader))):
+            self.current_episode += 1
+            if self.current_episode >= self.train_episode:
+                return
+            if self.current_episode < start_episode:
+                continue
+            # sample scenarios
+            sampled_scenario_configs, _ = data_loader.sampler()
+            # reset the index counter to create endless loader
+            # data_loader.reset_idx_counter()
+            
+            scenes = [config.scene for config in sampled_scenario_configs]
+            # begin to run the scene
+            self.run_scenes(scenes)
+            
+            # get static obs and then reset with init action 
+            static_obs = self.env.get_static_obs(sampled_scenario_configs)
+            self.scenario_policy.load_model(sampled_scenario_configs)
+            scenario_init_action, additional_dict = self.scenario_policy.get_init_action(static_obs)
+            obs, infos = self.env.reset(sampled_scenario_configs, scenario_init_action)
+            replay_buffer.store_init([static_obs, scenario_init_action], additional_dict=additional_dict)
+
+            # get ego vehicle from scenario
+            self.agent_policy.set_ego_and_route(self.env.get_ego_vehicles(), infos)
+
+            # start loop
+            episode_reward = []
+            while not self.env.all_scenario_done():
+                # get action from agent policy and scenario policy (assume using one batch)
+                ego_actions = self.agent_policy.get_action(obs, infos, deterministic=False)
+                scenario_actions = self.scenario_policy.get_action(obs, infos, deterministic=False)
+
+                # apply action to env and get obs
+                next_obs, rewards, dones, infos = self.env.step(ego_actions=ego_actions, scenario_actions=scenario_actions)
+                replay_buffer.store([ego_actions, scenario_actions, obs, next_obs, rewards, dones], additional_dict=infos)
+                obs = copy.deepcopy(next_obs)
+                episode_reward.append(np.mean(rewards))
+
+            # train off-policy agent or scenario
+            if self.mode == 'train_agent' and self.agent_policy.type == 'offpolicy':
+                loss = self.agent_policy.train(replay_buffer)
+            elif self.mode == 'train_scenario' and self.scenario_policy.type == 'offpolicy':
+                self.scenario_policy.train(replay_buffer)
+                
+            score_function = get_route_scores if self.scenario_category in ['planning', 'scenic'] else get_perception_scores
+            all_scores = score_function(self.env.running_results)
+
+            # end up environment
+            self.env.clean_up()
+            replay_buffer.finish_one_episode()
+            self.logger.add_training_results('episode', self.current_episode)
+            self.logger.add_training_results('episode_reward', np.sum(episode_reward))
+            for key, value in all_scores.items():
+                self.logger.add_training_results(key, value)
+            if loss is not None:
+                critic_loss, actor_loss = loss
+                self.logger.add_training_results('critic_loss', critic_loss)
+                self.logger.add_training_results('actor_loss', actor_loss)
+            else:
+                critic_loss, actor_loss = 0, 0
+                self.logger.add_training_results('critic_loss', critic_loss)
+                self.logger.add_training_results('actor_loss', actor_loss)  
+            self.logger.log(f">> Episode: {self.current_episode}, #buffer_len: {replay_buffer.buffer_len}, critic: {critic_loss:.3f}, actor: {actor_loss:.3f}")
+            self.logger.save_training_results()
+
+            # train on-policy agent or scenario
+            if self.mode == 'train_agent' and self.agent_policy.type == 'onpolicy':
+                self.agent_policy.train(replay_buffer)
+            elif self.mode == 'train_scenario' and self.scenario_policy.type in ['init_state', 'onpolicy']:
+                self.scenario_policy.train(replay_buffer)
+
+            # eval during training
+            if (self.current_episode+1) % self.eval_in_train_freq == 0:
+                #self.eval(env, data_loader)
+                pass
+
+            # save checkpoints
+            if (self.current_episode+1) % self.save_freq == 0:
+                if self.mode == 'train_agent':
+                    self.agent_policy.save_model(self.current_episode, replay_buffer)
+                if self.mode == 'train_scenario':
+                    self.scenario_policy.save_model(self.current_episode)
+        
+        self.scenic.destroy()
+        
+    def eval(self, data_loader, select = False):
+        num_finished_scenario = 0
+        data_loader.reset_idx_counter()
+        # recording the score and the id of corresponding selected scenes
+        map_id_score = {}
+        behavior_name = data_loader.behavior
+        opt_step = data_loader.opt_step
+        opt_time = 0
+        
+        log_name = f'OPT_{behavior_name}'
+            
+        if select:
+            self.scene_map[log_name] = {}
+            self.scene_map[log_name][f'opt_time_{opt_time}'] = self.scenic.save_params()
+            
+        while len(data_loader) > 0:
+            # sample scenarios
+            sampled_scenario_configs, num_sampled_scenario = data_loader.sampler()
+            num_finished_scenario += num_sampled_scenario
+            assert num_sampled_scenario == 1, 'scenic can only run one scene at one time'
+            
+            scenes = [config.scene for config in sampled_scenario_configs]
+            # begin to run the scene
+            self.run_scenes(scenes)
+            
+            # reset envs with new config, get init action from scenario policy, and run scenario
+            static_obs = self.env.get_static_obs(sampled_scenario_configs)
+            self.scenario_policy.load_model(sampled_scenario_configs)
+            scenario_init_action, _ = self.scenario_policy.get_init_action(static_obs, deterministic=True)
+            obs, infos = self.env.reset(sampled_scenario_configs, scenario_init_action)
+
+            # get ego vehicle from scenario
+            self.agent_policy.set_ego_and_route(self.env.get_ego_vehicles(), infos)
+
+            score_list = {s_i: [] for s_i in range(num_sampled_scenario)}
+            while not self.env.all_scenario_done():
+                # get action from agent policy and scenario policy (assume using one batch)
+                ego_actions = self.agent_policy.get_action(obs, infos, deterministic=True)
+                scenario_actions = self.scenario_policy.get_action(obs, infos, deterministic=True)
+
+                # apply action to env and get obs
+                obs, rewards, _, infos = self.env.step(ego_actions=ego_actions, scenario_actions=scenario_actions)
+
+                # save video
+                if self.save_video:
+                    self.logger.add_frame(pygame.surfarray.array3d(self.display).transpose(1, 0, 2))
+                    
+                # accumulate scores of corresponding scenario
+                reward_idx = 0
+                for s_i in infos:
+                    score = rewards[reward_idx] if self.scenario_category in ['planning', 'scenic'] else 1-infos[reward_idx]['iou_loss']
+                    score_list[s_i['scenario_id']].append(score)
+                    reward_idx += 1
+
+            # clean up all things
+            self.logger.log(">> All scenarios are completed. Clearning up all actors")
+            self.env.clean_up()
+
+            # save video
+            if self.save_video:
+                data_ids = [config.data_id for config in sampled_scenario_configs]
+                self.logger.save_video(data_ids=data_ids, log_name= log_name)
+
+            # print score for ranking
+            self.logger.log(f'[{num_finished_scenario}/{data_loader.num_total_scenario}] Ranking scores for batch scenario:', color='yellow')
+            for s_i in score_list.keys():
+                self.logger.log('\t Env id ' + str(s_i) + ': ' + str(np.mean(score_list[s_i])), color='yellow')
+
+            # calculate evaluation results
+            score_function = get_route_scores if self.scenario_category in ['planning', 'scenic'] else get_perception_scores
+            all_running_results = self.logger.add_eval_results(records=self.env.running_results)
+            all_scores = score_function(all_running_results)
+            self.logger.add_eval_results(scores=all_scores)
+            self.logger.print_eval_results()
+            if len(self.env.running_results) % self.save_freq == 0:
+                self.logger.save_eval_results(log_name)
+                
+            if infos[0]['collision']:
+                self.scenic.record_params()
+                
+            if select and (num_finished_scenario % opt_step == 0):
+                opt_time += 1
+                self.scenic.update_params()
+                self.scene_map[log_name][f'opt_time_{opt_time}'] = self.scenic.save_params()
+                data_loader.train_scene(opt_time)
+                
+        self.logger.save_eval_results(log_name)
+        
+        if select:
+            self.scene_map[log_name]['select_id'] = self.select_adv_scene(self.logger.eval_records, score_function, data_loader.select_num)
+            self.dump_scene_map()
+            
+        self.logger.clear()
+        self.scenic.destroy()
+    
+    def select_adv_scene(self, results, score_function, select_num):
+        # define your own selection mechanism here
+        map_id_score_collision = {}
+        map_id_score_non_collision = {}
+        for i in results.keys():
+            score = score_function({i:results[i]})
+            if score['collision_rate'] == 1:
+                map_id_score_collision[i] = score['final_score']
+            else:
+                map_id_score_non_collision[i] = score['final_score']
+
+        # Sort the collision scenes by their scores
+        collision_scenes_sorted = sorted(map_id_score_collision.items(), key=lambda x: x[1])
+
+        # Get the number of scenes to select from the collision cases
+        num_collision_selected = min(select_num, len(collision_scenes_sorted))
+
+        # Select the lowest scored scenes with collision
+        selected_scene_id = [scene[0] for scene in collision_scenes_sorted[:num_collision_selected]]
+
+        # If not enough collision scenes, select remaining scenes
+        num_non_collision_selected = select_num - num_collision_selected
+        if num_non_collision_selected > 0:
+            # Sort the non-collision scenes by their scores
+            non_collision_scenes_sorted = sorted(map_id_score_non_collision.items(), key=lambda x: x[1])
+            # Select the lowest scored scenes from the non-collision cases
+            selected_scene_id.extend([scene[0] for scene in non_collision_scenes_sorted[:num_non_collision_selected]])
+        return sorted(selected_scene_id)
+
+    def run(self, test_epoch = None):
+        # get scenario data of different maps
+        config_list = dynamic_scenic_parse(self.scenario_config, self.logger)
+        
+        
+        ### load rl model ##
+        if self.mode == 'train_scenario':
+            ## we only need the pretrained surrogate model here ##
+            pass
+        elif self.mode == 'train_agent':
+            ## initlize buffer ###
+            Buffer = RouteReplayBuffer if self.scenario_category in ['scenic', 'planning'] else PerceptionReplayBuffer
+            replay_buffer = Buffer(self.num_scenario, self.mode, self.buffer_capacity)
+            
+            ### repeat the training, 20 is just a random placeholder
+            config_list = config_list * 20
+            
+            ### check if resume ###
+            if self.continue_agent_training:
+                self.logger.load_training_results()
+                start_episode = self.check_continue_training(self.agent_policy, replay_buffer) + 1
+                if start_episode >= self.train_episode:
+                    return
+            else:
+                self.clean_cache(self.agent_policy.model_path)
+                start_episode = -1
+                
+        elif self.mode == 'eval' and test_epoch:
+            ### load trained model for evaluation ###
+            self.agent_policy.load_model(episode=test_epoch)
+            
+        last_town = None
+        for config in config_list:
+            
+            ## set log name ##
+            log_name = f'OPT_{config.behavior}'
+            
+            ## check if all done ##
+            if self.mode == 'eval':
+                if self.logger.check_eval_dir(log_name) == config.select_num:
+                    self.logger.log(f">> This scenario and route have been done.")
+                    continue
+            elif self.mode == 'train_agent': 
+                if self.current_episode >= self.train_episode - 1:
+                    return
+                
+                if self.current_episode + config.select_num < start_episode:
+                    self.current_episode += config.select_num
+                    continue
+                
+            # initialize scenic
+            self._init_scenic(config)
+            
+            # initialize map and render
+            self._init_world()
+            self._init_renderer()
+            self.world.scenic = self.scenic
+                
+            # create scenarios within the vectorized wrapper
+            self.env = VectorWrapper(
+                self.env_params, 
+                self.scenario_config, 
+                self.world, 
+                self.birdeye_render, 
+                self.display, 
+                self.logger
+            )
+            
+            # prepare data loader and buffer
+            data_loader = ScenicDataLoader(self.scenic, config, self.num_scenario)
+            # run with different modes
+            
+            if self.mode == 'train_scenario':
+                ### select hard scenic scenario config on the surrogate model ###
+                self.scene_map = self.load_scene_map()
+                self.agent_policy.set_mode('eval')
+                self.scenario_policy.set_mode('eval')
+                self.eval(data_loader, select = True)
+            elif self.mode == 'train_agent':
+                ### train the surrogate model on the selected hard scenrios ###
+                self.agent_policy.set_mode('train')
+                self.scenario_policy.set_mode('eval')
+                self.train(data_loader, start_episode, replay_buffer)
+            elif self.mode == 'eval':
+                ### evaluate the trained agent on different test models ###
+                self.agent_policy.set_mode('eval')
+                self.scenario_policy.set_mode('eval')
+                self.eval(data_loader)
+            else:
+                raise NotImplementedError(f"Unsupported mode: {self.mode}.")
+
+    def check_continue_training(self, policy, replay_buffer):
+        # load previous checkpoint
+        policy.load_model(replay_buffer = replay_buffer)
+        if policy.continue_episode == 0:
+            start_episode = 0
+            self.logger.log('>> Previous checkpoint not found. Training from scratch.')
+        else:
+            start_episode = policy.continue_episode
+            self.logger.log(f'>> Continue training from previous checkpoint, epoch: {start_episode}.')
+        return start_episode
+    
+    def dump_scene_map(self):
+        # load previous checkpoint
+        scenic_dir = self.scenario_config['scenic_dir']
+        f = open(os.path.join(scenic_dir, f"dynamic_scenario.json"), 'w')
+        json_dumps_str = json.dumps(self.scene_map, indent=4)
+        print(json_dumps_str, file=f)
+        f.close()
+        
+    def load_scene_map(self):
+        # load previous checkpoint
+        scenic_dir = self.scenario_config['scenic_dir']
+        try:
+            with open(os.path.join(scenic_dir, f"dynamic_scenario.json"), 'r') as f:
+                data = json.loads(f.read())
+        except:
+            data = {}
+        return data
+
+    def clean_cache(self, path):
+        # Get a list of all files in directory
+        all_files = glob.glob(os.path.join(path, '*'))
+
+        # Specify the file to keep
+        file_to_keep = os.path.join(path, 'model.sac.-001.torch')
+
+        # Remove all files except the one to keep
+        for file in all_files:
+            if file != file_to_keep:
+                os.remove(file)
+                
+    def close(self):
+        pygame.quit() # close pygame renderer
+        if self.env:
+            self.env.clean_up()
+            
+    
\ No newline at end of file
diff --git a/scene/safebench/util/__pycache__/od_util.cpython-37.pyc b/scene/safebench/util/__pycache__/od_util.cpython-37.pyc
new file mode 100644
index 00000000..3373c9c2
Binary files /dev/null and b/scene/safebench/util/__pycache__/od_util.cpython-37.pyc differ
diff --git a/scene/safebench/util/__pycache__/run_util.cpython-37.pyc b/scene/safebench/util/__pycache__/run_util.cpython-37.pyc
new file mode 100644
index 00000000..9dac8106
Binary files /dev/null and b/scene/safebench/util/__pycache__/run_util.cpython-37.pyc differ
diff --git a/scene/safebench/util/__pycache__/scenic_utils.cpython-38.pyc b/scene/safebench/util/__pycache__/scenic_utils.cpython-38.pyc
new file mode 100644
index 00000000..cf5114e8
Binary files /dev/null and b/scene/safebench/util/__pycache__/scenic_utils.cpython-38.pyc differ
diff --git a/scene/safebench/util/__pycache__/torch_util.cpython-37.pyc b/scene/safebench/util/__pycache__/torch_util.cpython-37.pyc
new file mode 100644
index 00000000..a289044d
Binary files /dev/null and b/scene/safebench/util/__pycache__/torch_util.cpython-37.pyc differ
diff --git a/scene/safebench/util/logger.py b/scene/safebench/util/logger.py
new file mode 100644
index 00000000..7f0bc293
--- /dev/null
+++ b/scene/safebench/util/logger.py
@@ -0,0 +1,370 @@
+''' 
+Date: 2023-01-31 22:23:17
+LastEditTime: 2023-04-01 16:02:49
+Description: 
+    Copyright (c) 2022-2023 Safebench Team
+
+    This work is licensed under the terms of the MIT license.
+    For a copy, see <https://opensource.org/licenses/MIT>
+'''
+
+import atexit
+import json
+import os
+import os.path as osp
+import time
+import pickle
+
+import joblib
+import numpy as np
+import yaml
+
+from safebench.util.run_util import VideoRecorder, VideoRecorder_Perception
+
+
+# Where experiment outputs are saved by default:
+DEFAULT_DATA_DIR = osp.abspath(osp.dirname(osp.dirname(osp.dirname(__file__))))
+
+# Whether to automatically insert a date and time stamp into the names of
+# save directories:
+FORCE_DATESTAMP = False
+
+
+def setup_logger_kwargs(exp_name, output_dir, seed, datestamp=False, agent=None, scenario=None, scenario_category='planning'):
+    # Datestamp forcing
+    datestamp = datestamp or FORCE_DATESTAMP
+
+    # Make base path
+    ymd_time = time.strftime("%Y-%m-%d_") if datestamp else ''
+    relpath = ''.join([ymd_time, exp_name])
+
+    # specify agent policy and scenario policy in the experiment directory.
+    agent_scenario_exp_name = exp_name
+    if agent is not None:
+        agent_scenario_exp_name = agent_scenario_exp_name + '_' + agent
+    if scenario is not None:
+        agent_scenario_exp_name = agent_scenario_exp_name + '_' + scenario
+
+    # Make a seed-specific subfolder in the experiment directory.
+    if datestamp:
+        hms_time = time.strftime("%Y-%m-%d_%H-%M-%S")
+        subfolder = ''.join([hms_time, '-', agent_scenario_exp_name, '_s', str(seed)])
+    else:
+        subfolder = ''.join([agent_scenario_exp_name, '_seed_', str(seed)])
+    relpath = osp.join(relpath, subfolder)
+
+    data_dir = os.path.join(DEFAULT_DATA_DIR, output_dir)
+    logger_kwargs = dict(
+        output_dir=osp.join(data_dir, relpath),
+        exp_name=exp_name, 
+        scenario_category=scenario_category,
+    )
+    return logger_kwargs
+
+
+def is_json_serializable(v):
+    try:
+        json.dumps(v)
+        return True
+    except:
+        return False
+
+
+def convert_json(obj):
+    """ Convert obj to a version which can be serialized with JSON. """
+    if is_json_serializable(obj):
+        return obj
+    else:
+        if isinstance(obj, dict):
+            return {convert_json(k): convert_json(v) for k, v in obj.items()}
+        elif isinstance(obj, tuple):
+            return (convert_json(x) for x in obj)
+        elif isinstance(obj, list):
+            return [convert_json(x) for x in obj]
+        elif hasattr(obj, '__name__') and not ('lambda' in obj.__name__):
+            return convert_json(obj.__name__)
+        elif hasattr(obj, '__dict__') and obj.__dict__:
+            obj_dict = {convert_json(k): convert_json(v) for k, v in obj.__dict__.items()}
+            return {str(obj): obj_dict}
+
+        return str(obj)
+
+
+def statistics_scalar(x, with_min_and_max=False):
+    """
+    Get mean/std and optional min/max of scalar x across MPI processes.
+    Args:
+        x: An array containing samples of the scalar to produce statistics
+            for.
+        with_min_and_max (bool): If true, return min and max of x in 
+            addition to mean and std.
+    """
+    x = np.array(x, dtype=np.float32)
+    mean = np.mean(x)
+    std = np.std(x)  # compute global std
+    if with_min_and_max:
+        return mean, std, np.min(x), np.max(x)
+    return mean, std
+
+
+color2num = dict(
+    gray=30,
+    red=31,
+    green=32,
+    yellow=33,
+    blue=34,
+    magenta=35,
+    cyan=36,
+    white=37,
+    crimson=38
+)
+
+
+def colorize(string, color, bold=False, highlight=False):
+    """
+    Colorize a string.
+
+    This function was originally written by John Schulman.
+    """
+    attr = []
+    num = color2num[color]
+    if highlight: num += 10
+    attr.append(str(num))
+    if bold: attr.append('1')
+    return '\x1b[%sm%s\x1b[0m' % (';'.join(attr), string)
+
+
+class Logger:
+    """
+        A general-purpose logger.
+        Makes it easy to save diagnostics, hyperparameter configurations, the state of a training run, and the trained model.
+    """
+    def __init__(self, output_dir=None, output_fname='progress.txt', exp_name=None, scenario_category='planning'):
+        """
+            Initialize a Logger.
+
+            Args:
+                output_dir (string): A directory for saving results to. 
+                    If ``None``, defaults to a temp directory of the form ``/tmp/experiments/somerandomnumber``.
+
+                output_fname (string): Name for the tab-separated-value file 
+                    containing metrics logged throughout a training run. Defaults to ``progress.txt``. 
+
+                exp_name (string): Experiment name. If you run multiple training
+                    runs and give them all the same ``exp_name``, the plotter will know to group them. (Use case: if you run the same
+                    hyperparameter configuration with multiple random seeds, you should give them all the same ``exp_name``.)
+        """
+        self.epoch = 0
+        self.first_row = True
+        self.log_headers = []
+        self.log_current_row = {}
+        self.exp_name = exp_name
+        self.log_print_history = []
+        self.video_recorder = None
+        self.scenario_category = scenario_category
+        
+        self.output_dir = output_dir or "/tmp/experiments/%i" % int(time.time())
+        self.log('>> ' + '-' * 40)
+        if osp.exists(self.output_dir):
+            self.log(">> Log path %s already exists! Storing info there anyway." % self.output_dir, 'yellow')
+        else:
+            os.makedirs(self.output_dir)
+        self.output_file = open(osp.join(self.output_dir, output_fname), 'a')
+        atexit.register(self.output_file.close)
+        self.log(">> Logging data to %s" % self.output_file.name, 'green')
+        
+        self.eval_results = {}
+        self.eval_records = {}
+        self.training_results = {}
+
+    def create_training_dir(self):
+        result_dir = os.path.join(self.output_dir, 'training_results')
+        os.makedirs(result_dir, exist_ok=True)
+        self.result_file = os.path.join(result_dir, 'results.pkl')
+
+    def load_training_results(self):
+        if os.path.exists(self.result_file):
+            with open(self.result_file, 'rb') as f:
+                self.training_results = pickle.load(f)
+        else:
+            self.training_results = {}
+
+    def add_training_results(self, name=None, value=None):
+        if name is not None:
+            if name not in self.training_results:
+                self.training_results[name] = []
+            self.training_results[name].append(value)
+
+    def save_training_results(self):
+        self.log(f'>> Saving training results to {self.result_file}')
+        joblib.dump(self.training_results, self.result_file)
+
+    def print_training_results(self):
+        self.log("Training results:")
+        for key, value in self.eval_results.items():
+            self.log(f"\t {key: <25}{value}")
+
+    def create_eval_dir(self, load_existing_results=True):
+        result_dir = os.path.join(self.output_dir, 'eval_results')
+        os.makedirs(result_dir, exist_ok=True)
+        self.result_file = os.path.join(result_dir, 'results.pkl')
+        self.record_file = os.path.join(result_dir, 'records.pkl')
+        if load_existing_results:
+            if os.path.exists(self.record_file):
+                self.log(f'>> Loading existing evaluation records from {self.record_file}, ')
+                self.eval_records = joblib.load(self.record_file)
+            else:
+                self.log(f'>> Loading existing record fail because no records.pkl is found.')
+                self.eval_records = {}
+
+    def add_eval_results(self, scores=None, records=None):
+        if scores is not None:
+            self.eval_results.update(scores)
+        if records is not None:
+            self.eval_records.update(records)
+            return self.eval_records
+
+#     def save_eval_results(self):
+#         self.log(f'>> Saving evaluation results to {self.result_file}')
+#         joblib.dump(self.eval_results, self.result_file)
+#         self.log(f'>> Saving evaluation records to {self.record_file}, length: {len(self.eval_records)}')
+#         joblib.dump(self.eval_records, self.record_file)
+
+    def save_eval_results(self, name = None):
+        if name == None:
+            self.log(f'>> Saving evaluation results to {self.result_file}')
+            joblib.dump(self.eval_results, self.result_file)
+            self.log(f'>> Saving evaluation records to {self.record_file}, length: {len(self.eval_records)}')
+            joblib.dump(self.eval_records, self.record_file)
+        else:
+            result_dir = os.path.join(self.output_dir, 'eval_results')
+            os.makedirs(result_dir, exist_ok=True)
+            self.result_file = os.path.join(result_dir, name + '_results.pkl')
+            self.record_file = os.path.join(result_dir, name + '_records.pkl')
+            self.log(f'>> Saving evaluation results to {self.result_file}')
+            joblib.dump(self.eval_results, self.result_file)
+            self.log(f'>> Saving evaluation records to {self.record_file}')
+            joblib.dump(self.eval_records, self.record_file)
+
+    def check_eval_dir(self, name):
+        result_dir = os.path.join(self.output_dir, 'eval_results')
+        os.makedirs(result_dir, exist_ok=True)
+        self.result_file = os.path.join(result_dir, name + '_results.pkl')
+        self.record_file = os.path.join(result_dir, name + '_records.pkl')
+        try:
+            return len(joblib.load(self.record_file))
+        except:
+            return 0
+        
+    def clear(self):
+        self.eval_results = {}
+        self.eval_records = {}
+        
+    def print_eval_results(self):
+        self.log("Evaluation results:")
+        for key, value in self.eval_results.items():
+            self.log(f"\t {key: <25}{value}")
+
+    def log(self, msg, color='green'):
+        # print with color
+        print(colorize(msg, color, bold=True))
+        # save print message to log file
+        self.log_print_history.append(msg)
+
+    def log_dict(self, dict_msg, color='green'):
+        for key, value in dict_msg.items():
+            self.log("{}: {}".format(key, value), color)
+
+    def log_tabular(self, key, val):
+        """
+            Log a value of some diagnostic.
+        """
+        if self.first_row:
+            self.log_headers.append(key)
+        else:
+            assert key in self.log_headers, "Trying to introduce a new key %s that you didn't include in the first iteration" % key
+        assert key not in self.log_current_row, "You already set %s this iteration. Maybe you forgot to call dump_tabular()" % key
+        self.log_current_row[key] = val
+
+    def save_config(self, config):
+        """
+            Log an experiment configuration.
+        """
+        if self.exp_name is not None:
+            config['exp_name'] = self.exp_name
+        config_json = convert_json(config)
+        output = json.dumps(config_json, separators=(',', ':\t'), indent=4, sort_keys=True)
+        # print(colorize('Saving config:\n', color='cyan', bold=True))
+        # print(output)
+        with open(osp.join(self.output_dir, "config.json"), 'w') as out:
+            out.write(output)
+
+        with open(osp.join(self.output_dir, "config.yaml"), 'w') as out:
+            yaml.dump(config, out, default_flow_style=False, indent=4, sort_keys=False)
+
+    def save_state(self, state_dict, itr=None):
+        """
+            Saves the state of an experiment.
+
+            Args:
+                state_dict (dict): Dictionary containing essential elements to
+                    describe the current state of training.
+
+                itr: An int, or None. Current iteration of training.
+        """
+        fname = 'vars.pkl' if itr is None else 'vars%d.pkl' % itr
+        try:
+            joblib.dump(state_dict, osp.join(self.output_dir, fname))
+        except:
+            self.log('Warning: could not pickle state_dict.', color='red')
+
+    def dump_tabular(self, x_axis="Epoch", verbose=True, env=None):
+        """
+            Write all of the diagnostics from the current iteration.  Writes both to stdout, and to the output file.
+            x_axis: "Epoch" or "TotalEnvInteracts"
+        """
+        data_dict = {}
+        self.epoch += 1
+        vals = []
+        key_lens = [len(key) for key in self.log_headers]
+        max_key_len = max(15, max(key_lens))
+        keystr = '%' + '%d' % max_key_len
+        fmt = "| " + keystr + "s | %15s |"
+        n_slashes = 22 + max_key_len
+        if verbose:
+            print("-" * n_slashes)
+            if env is not None:
+                print("Env: ", env)
+                print("-" * n_slashes)
+        for key in self.log_headers:
+            val = self.log_current_row.get(key, "")
+            valstr = "%8.3g" % val if hasattr(val, "__float__") else val
+            if verbose:
+                print(fmt % (key, valstr))
+            vals.append(val)
+
+            if key == x_axis:
+                self.steps = val
+        if verbose:
+            print("-" * n_slashes, flush=True)
+        if self.output_file is not None:
+            if self.first_row:
+                self.output_file.write("\t".join(self.log_headers) + "\n")
+            self.output_file.write("\t".join(map(str, vals)) + "\n")
+            self.output_file.flush()
+
+        self.log_current_row.clear()
+        self.first_row = False
+        return data_dict
+    
+    def init_video_recorder(self):
+        if self.scenario_category in ['planning', 'scenic']:
+            self.video_recorder = VideoRecorder(self.output_dir, logger=self)
+        elif self.scenario_category == 'perception':
+            self.video_recorder = VideoRecorder_Perception(self.output_dir, logger=self)
+
+    def add_frame(self, frame):
+        self.video_recorder.add_frame(frame)
+
+    def save_video(self, data_ids):
+        self.video_recorder.save(data_ids=data_ids)
diff --git a/scene/safebench/util/metric_util.py b/scene/safebench/util/metric_util.py
new file mode 100644
index 00000000..12bf0f90
--- /dev/null
+++ b/scene/safebench/util/metric_util.py
@@ -0,0 +1,317 @@
+import joblib
+import math
+import numpy as np
+
+from copy import deepcopy
+import argparse
+
+import torch
+from safebench.scenario.scenario_definition.atomic_criteria import Status
+
+
+def cal_out_of_road_length(sequence):
+    out_of_road_raw = [i['off_road'] for i in sequence]
+    out_of_road = deepcopy(out_of_road_raw)
+    for i, out in enumerate(out_of_road_raw):
+        if out and i + 1 < len(out_of_road_raw):
+            out_of_road[i + 1] = True
+
+    total_length = 0
+    for i, out in enumerate(out_of_road):
+        if i == 0:
+            continue
+        if out:
+            total_length += sequence[i]['driven_distance'] - sequence[i - 1]['driven_distance']
+
+    return total_length
+
+
+def cal_avg_yaw_velocity(sequence):
+    total_yaw_change = 0
+    for i, time_stamp in enumerate(sequence):
+        if i == 0:
+            continue
+        total_yaw_change += abs(sequence[i]['ego_yaw'] - sequence[i - 1]['ego_yaw'])
+    total_yaw_change = total_yaw_change / 180 * math.pi
+    try:
+        avg_yaw_velocity = total_yaw_change / (sequence[-1]['current_game_time'] - sequence[0]['current_game_time'])
+    except:
+        avg_yaw_velocity = 0
+    return avg_yaw_velocity
+
+def get_route_scores(record_dict):
+    # safety level
+    num_collision = 0
+    num_run_red_light = 0
+    num_run_stop_sign = 0
+    sum_out_of_road_length = 0
+    for data_id, sequence in record_dict.items():
+        if sequence[-1]['collision'] == Status.FAILURE:
+            num_collision += 1
+        num_run_red_light += sequence[-1]['run_red_light']
+        num_run_stop_sign += sequence[-1]['run_stop']
+        sum_out_of_road_length += cal_out_of_road_length(sequence)
+
+    collision_rate = num_collision / len(record_dict)
+    avg_red_light_freq = num_run_red_light / len(record_dict)
+    avg_stop_sign_freq = num_run_stop_sign / len(record_dict)
+    out_of_road_length = sum_out_of_road_length / len(record_dict)
+
+    # task performance level
+    total_route_completion = 0
+    total_time_spent = 0
+    success_data_cnt = 0
+    total_distance_to_route = 0
+    for data_id, sequence in record_dict.items():
+        total_route_completion += sequence[-1]['route_complete'] / 100
+        if sequence[-1]['route_complete'] == 100:
+            success_data_cnt += 1
+            total_time_spent += sequence[-1]['current_game_time'] - sequence[0]['current_game_time']
+        avg_distance_to_route = 0
+        for time_stamp in sequence:
+            avg_distance_to_route += time_stamp['distance_to_route']
+        total_distance_to_route += avg_distance_to_route / len(sequence)
+
+    avg_distance_to_route = total_distance_to_route / len(record_dict)
+    route_following_stability = max(1 - avg_distance_to_route / 5, 0)
+    route_completion = total_route_completion / len(record_dict)
+    avg_time_spent = 0 if success_data_cnt == 0 else total_time_spent / success_data_cnt
+
+    # comfort level
+    num_lane_invasion = 0
+    total_acc = 0
+    total_yaw_velocity = 0
+    for data_id, sequence in record_dict.items():
+        num_lane_invasion += sequence[-1]['lane_invasion']
+        avg_acc = 0
+        for time_stamp in sequence:
+            avg_acc += math.sqrt(time_stamp['ego_acceleration_x'] ** 2 + time_stamp['ego_acceleration_y'] ** 2 + time_stamp['ego_acceleration_z'] ** 2)
+        total_acc += avg_acc / len(sequence)
+        total_yaw_velocity += cal_avg_yaw_velocity(sequence)
+
+    avg_lane_invasion_freq = num_lane_invasion / len(record_dict)
+    avg_acceleration = total_acc / len(record_dict)
+    avg_yaw_velocity = total_yaw_velocity / len(record_dict)
+
+    predefined_max_values = {
+        # safety level
+        'collision_rate': 1,
+        'avg_red_light_freq': 1,
+        'avg_stop_sign_freq': 1,
+        'out_of_road_length': 50,
+
+        # task performance level
+        'route_following_stability': 1,
+        'route_completion': 1,
+        'avg_time_spent': 60,
+
+        # comfort level
+        'avg_acceleration': 8,
+        'avg_yaw_velocity': 3,
+        'avg_lane_invasion_freq': 20,
+    }
+
+    scores = {
+        # safety level
+        'collision_rate': collision_rate,
+        'avg_red_light_freq': avg_red_light_freq,
+        'avg_stop_sign_freq': avg_stop_sign_freq,
+        'out_of_road_length': out_of_road_length,
+
+        # task performance level
+        'route_following_stability': route_following_stability,
+        'route_completion': route_completion,
+        'avg_time_spent': avg_time_spent,
+
+        # comfort level
+        'avg_acceleration': avg_acceleration,
+        'avg_yaw_velocity': avg_yaw_velocity,
+        'avg_lane_invasion_freq': avg_lane_invasion_freq,
+
+    }
+
+    # normalized_scores
+    ns = {metric: score if metric not in predefined_max_values else score / predefined_max_values[metric] for metric, score in scores.items()}
+
+    final_score = ((1 - ns['collision_rate']) * 5 +
+                   (3 - ns['avg_red_light_freq'] - ns['avg_stop_sign_freq'] - ns['out_of_road_length']) * 1 +
+                   (ns['route_following_stability'] + ns['route_completion'] + 1 - ns['avg_time_spent']) * 0.5 +
+                   (3 - ns['avg_acceleration'] - ns['avg_yaw_velocity'] - ns['avg_lane_invasion_freq']) * 0.2) / 10.1
+
+    ns['safety_os'] = ((1 - ns['collision_rate']) * 5 + (3 - ns['avg_red_light_freq'] - ns['avg_stop_sign_freq'] - ns['out_of_road_length']) * 1) / 8
+    ns['task_os'] = (ns['route_following_stability'] + ns['route_completion'] + 1 - ns['avg_time_spent']) * 0.5 / 1.5
+    ns['comfort_os'] = (3 - ns['avg_acceleration'] - ns['avg_yaw_velocity'] - ns['avg_lane_invasion_freq']) * 0.2 / 0.6
+    ns['final_score'] = final_score
+    return ns
+
+# def get_route_scores(record_dict, time_out=30):
+#     # safety level
+#     num_collision = 0
+#     sum_out_of_road_length = 0
+#     for data_id, sequence in record_dict.items():
+#         if sequence[-1]['collision'] == Status.FAILURE:
+#             num_collision += 1
+#         sum_out_of_road_length += cal_out_of_road_length(sequence)
+
+#     collision_rate = num_collision / len(record_dict)
+#     out_of_road_length = sum_out_of_road_length / len(record_dict)
+
+#     # task performance level
+#     total_route_completion = 0
+#     total_time_spent = 0
+#     total_distance_to_route = 0
+#     for data_id, sequence in record_dict.items():
+#         total_route_completion += sequence[-1]['route_complete'] / 100
+#         total_time_spent += sequence[-1]['current_game_time'] - sequence[0]['current_game_time']
+#         avg_distance_to_route = 0
+#         for time_stamp in sequence:
+#             avg_distance_to_route += time_stamp['distance_to_route']
+#         total_distance_to_route += avg_distance_to_route / len(sequence)
+
+#     avg_distance_to_route = total_distance_to_route / len(record_dict)
+#     route_completion = total_route_completion / len(record_dict)
+#     avg_time_spent = total_time_spent / len(record_dict)
+
+#     # comfort level
+#     num_lane_invasion = 0
+#     total_acc = 0
+#     total_yaw_velocity = 0
+#     for data_id, sequence in record_dict.items():
+#         num_lane_invasion += sequence[-1]['lane_invasion']
+#         avg_acc = 0
+#         for time_stamp in sequence:
+#             avg_acc += math.sqrt(time_stamp['ego_acceleration_x'] ** 2 + time_stamp['ego_acceleration_y'] ** 2 + time_stamp['ego_acceleration_z'] ** 2)
+#         total_acc += avg_acc / len(sequence)
+#         total_yaw_velocity += cal_avg_yaw_velocity(sequence)
+
+#     predefined_max_values = {
+#         # safety level
+#         'collision_rate': 1,
+#         'out_of_road_length': 10,
+
+#         # task performance level
+#         'distance_to_route': 5,
+#         'incomplete_route': 1,
+#         'running_time': time_out,
+#     }
+
+#     weights = {
+#         # safety level
+#         'collision_rate': 0.4,
+#         'out_of_road_length': 0.1,
+
+#         # task performance level
+#         'distance_to_route': 0.1,
+#         'incomplete_route': 0.3,
+#         'running_time': 0.1,
+#     }
+
+#     scores = {
+#         # safety level
+#         'collision_rate': collision_rate,
+#         'out_of_road_length': out_of_road_length,
+
+#         # task performance level
+#         'distance_to_route': avg_distance_to_route,
+#         'incomplete_route': 1 - route_completion,
+#         'running_time': avg_time_spent,
+#     }
+
+#     all_scores = {key: round(value/predefined_max_values[key], 2) for key, value in scores.items()}
+#     final_score = 0
+#     for key, score in all_scores.items():
+#         final_score += score * weights[key]
+#     all_scores['final_score'] = final_score
+
+#     return all_scores
+
+def compute_ap(recall, precision, method='interp'):
+    """ Compute the average precision, given the recall and precision curves
+    # Arguments
+        recall:    The recall curve (list)
+        precision: The precision curve (list)
+    # Returns
+        Average precision, precision curve, recall curve
+    """
+
+    # Append sentinel values to beginning and end
+    mrec = np.concatenate(([0.0], recall, [1.0]))
+    mpre_input = np.concatenate(([1.0], precision, [0.0]))
+
+    # Compute the precision envelope
+    mpre = np.flip(np.maximum.accumulate(np.flip(mpre_input)))
+
+    # Integrate area under curve
+      # methods: 'continuous', 'interp'
+    if method == 'interp':
+        x = np.linspace(0, 1, 101)  # 101-point interp (COCO)
+        ap = np.trapz(np.interp(x, mrec, mpre), x)  # integrate
+    else:  # 'continuous'
+        i = np.where(mrec[1:] != mrec[:-1])[0]  # points where x axis (recall) changes
+        ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1])  # area under curve
+
+    return ap, mpre_input, mpre, mrec
+
+
+def _get_pr_curve(conf_scores, logits, num_gt, data_id, iou_thres=0.5):
+    eps = 1e-8
+    idx = torch.argsort(conf_scores, descending=True)
+    logits = logits[idx]
+    tp = torch.cumsum(logits >= iou_thres, dim=0)
+    tp_fp = torch.cumsum(logits >= -0., dim=0)
+    precision = (tp / tp_fp).numpy()
+    recall = (tp / (num_gt + eps)).numpy()
+
+    ap, mpre_input, mpre, mrec = compute_ap(recall, precision, method='continuous')
+    return ap
+
+def get_perception_scores(record_dict): 
+    
+    mAP = []
+    IoU_list = []
+    pred = []
+    gt = []
+    cls = []
+    scores = []
+    for data_id in record_dict.keys():
+        IoU_list.append([rec['iou'] for rec in record_dict[data_id]])
+        conf_scores = torch.cat([rec['scores'] for rec in record_dict[data_id]])
+        logits = torch.cat([rec['logits'] for rec in record_dict[data_id]])
+        num_gt = len(record_dict[data_id])
+        
+        map_iou = []
+        for th in np.arange(0.5, 1.0, 0.05):
+            map_iou.append(_get_pr_curve(conf_scores, logits, num_gt, data_id, iou_thres=th))
+
+        mAP.append(np.mean(map_iou))
+        pred.append([rec['pred'] for rec in record_dict[data_id]])
+        gt.append([rec['gt'] for rec in record_dict[data_id]])
+        cls.append([rec['class'] for rec in record_dict[data_id]])
+        scores.append([rec['scores'] for rec in record_dict[data_id]])
+
+    IoU_mean = [np.mean(iou) for iou in IoU_list]
+    
+
+
+    return {
+        # 'scores': scores,
+        # 'pred': pred,
+        # 'gt': gt,
+        # 'class': cls,
+        'mean_iou': IoU_mean,
+        'mAP_evaluate': mAP, 
+    }
+
+
+def parse_args():
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--record_file', default='/home/carla/output/testing_records/record.pkl')
+    arguments = parser.parse_args()
+    return arguments
+
+
+if __name__ == '__main__':
+    args = parse_args()
+    record = joblib.load(args.record_file)
+    # all_scores, normalized_scores, final_score = get_scores(record)
+    # print('overall score:', final_score)
diff --git a/scene/safebench/util/od_util.py b/scene/safebench/util/od_util.py
new file mode 100644
index 00000000..2fd0ce35
--- /dev/null
+++ b/scene/safebench/util/od_util.py
@@ -0,0 +1,130 @@
+''' 
+Date: 2023-01-31 22:23:17
+LastEditTime: 2023-03-04 14:40:44
+Description: 
+    Copyright (c) 2022-2023 Safebench Team
+
+    This work is licensed under the terms of the MIT license.
+    For a copy, see <https://opensource.org/licenses/MIT>
+'''
+
+import torch
+import numpy as np
+import cv2
+from moviepy.video.io.ffmpeg_writer import FFMPEG_VideoWriter
+
+
+def CPU(x):
+    return x.detach().cpu().numpy()
+
+
+def CUDA(x):
+    if isinstance(x, np.ndarray):
+        x = torch.from_numpy(x)
+    return x.cuda()
+
+
+def save_image(fp, img):
+    cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
+    cv2.imwrite(fp, img)
+
+
+def build_projection_matrix(w, h, fov):
+    focal = w / (2.0 * np.tan(fov * np.pi / 360.0))
+    K = np.identity(3)
+    K[0, 0] = K[1, 1] = focal
+    K[0, 2] = w / 2.0
+    K[1, 2] = h / 2.0
+    return K
+
+
+def box_area(box):
+    # box = xyxy(4,n)
+    return (box[2] - box[0]) * (box[3] - box[1])
+
+
+def box_iou(box1, box2, eps=1e-7):
+    # https://github.com/pytorch/vision/blob/master/torchvision/ops/boxes.py
+    """
+    Return intersection-over-union (Jaccard index) of boxes.
+    Both sets of boxes are expected to be in (x1, y1, x2, y2) format.
+    Arguments:
+        box1 (Tensor[N, 4])
+        box2 (Tensor[M, 4])
+    Returns:
+        iou (Tensor[N, M]): the NxM matrix containing the pairwise
+            IoU values for every element in boxes1 and boxes2
+    """
+
+    # inter(N,M) = (rb(N,M,2) - lt(N,M,2)).clamp(0).prod(2)
+    # diff = box 
+    (a1, a2), (b1, b2) = box1[:, None].chunk(2, 2), box2.chunk(2, 1)
+    
+    inter = (torch.min(a2, b2) - torch.max(a1, b1)).clamp(0).prod(2)
+
+    # IoU = inter / (area1 + area2 - inter)
+    return inter / (box_area(box1.T)[:, None] + box_area(box2.T) - inter + eps)
+
+
+def xywh2xyxy(x):
+    # Convert nx4 boxes from [x, y, w, h] to [x1, y1, x2, y2] where xy1=top-left, xy2=bottom-right
+    y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x)
+    y[:, 0] = x[:, 0] - x[:, 2] / 2  # top left x
+    y[:, 1] = x[:, 1] - x[:, 3] / 2  # top left y
+    y[:, 2] = x[:, 0] + x[:, 2] / 2  # bottom right x
+    y[:, 3] = x[:, 1] + x[:, 3] / 2  # bottom right y
+    return y
+
+
+def xyxy2xywh(x):
+    # Convert nx4 boxes from [x1, y1, x2, y2] to [x, y, w, h] where xy1=top-left, xy2=bottom-right
+    y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x)
+    y[:, 0] = (x[:, 0] + x[:, 2]) / 2  # x center
+    y[:, 1] = (x[:, 1] + x[:, 3]) / 2  # y center
+    y[:, 2] = x[:, 2] - x[:, 0]  # width
+    y[:, 3] = x[:, 3] - x[:, 1]  # height
+    return y
+
+
+def xyxy2xywhn(x, w=1024, h=1024, clip=False, eps=0.0):
+    # Convert nx4 boxes from [x1, y1, x2, y2] to [x, y, w, h] normalized where xy1=top-left, xy2=bottom-right
+
+    y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x)
+    y[:, 0] = ((x[:, 0] + x[:, 2]) / 2) / w  # x center
+    y[:, 1] = ((x[:, 1] + x[:, 3]) / 2) / h  # y center
+    y[:, 2] = (x[:, 2] - x[:, 0]) / w  # width
+    y[:, 3] = (x[:, 3] - x[:, 1]) / h  # height
+    return y
+
+
+def get_xyxy(x):
+    return torch.tensor([np.min(x[:, 0]), np.min(x[:, 1]), np.max(x[:, 0]), np.max(x[:, 1])])
+
+
+# coco_name
+names_coco128 = ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light',
+        'fire hydrant', 'stopsign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow',
+        'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee',
+        'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard',
+        'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple',
+        'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch',
+        'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone',
+        'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear',
+        'hair drier', 'toothbrush']
+
+
+names_coco_paper = ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light', 'fire hydrant', 
+        'street sign', 'stopsign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant', 
+        'bear', 'zebra', 'giraffe', 'hat', 'backpack', 'umbrella', 'shoe', 'eye glasses', 'handbag', 'tie', 'suitcase', 
+        'frisbee', 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 
+        'tennis racket', 'bottle', 'plate', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 
+        'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed', 'mirror', 
+        'dining table', 'window', 'desk', 'toilet', 'door', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 
+        'oven', 'toaster', 'sink', 'refrigerator', 'blender', 'book', 'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 
+        'toothbrush', 'hair brush']
+
+
+if __name__ == '__main__':
+    a = torch.rand(5, 4)
+    b = torch.rand(4, 4)
+    print(box_iou(a, b))
\ No newline at end of file
diff --git a/scene/safebench/util/pid_controller.py b/scene/safebench/util/pid_controller.py
new file mode 100644
index 00000000..ae40f459
--- /dev/null
+++ b/scene/safebench/util/pid_controller.py
@@ -0,0 +1,239 @@
+''' 
+Date: 2023-01-31 22:23:17
+LastEditTime: 2023-04-03 19:00:37
+Description: 
+    Copyright (c) 2022-2023 Safebench Team
+
+    This file is modified from <https://github.com/carla-simulator/scenario_runner/tree/master/srunner/tools>
+    Copyright (c) 2018-2020 Intel Corporation
+
+    This work is licensed under the terms of the MIT license.
+    For a copy, see <https://opensource.org/licenses/MIT>
+'''
+
+from collections import deque
+import math
+import numpy as np
+import carla
+from agents.tools.misc import get_speed
+
+
+class VehiclePIDController():
+    """
+        VehiclePIDController is the combination of two PID controllers
+        (lateral and longitudinal) to perform the low level control a vehicle from client side
+    """
+
+    def __init__(self, vehicle, args_lateral, args_longitudinal, offset=0, max_throttle=0.75, max_brake=0.3, max_steering=0.8):
+        """
+            :param vehicle: actor to apply to local planner logic onto
+            :param args_lateral: dictionary of arguments to set the lateral PID controller using the following semantics:
+                K_P -- Proportional term
+                K_D -- Differential term
+                K_I -- Integral term
+            :param args_longitudinal: dictionary of arguments to set the longitudinal PID controller using the following semantics:
+                K_P -- Proportional term
+                K_D -- Differential term
+                K_I -- Integral term
+            :param offset: If different than zero, the vehicle will drive displaced from the center line.
+            Positive values imply a right offset while negative ones mean a left one. Numbers high enough
+            to cause the vehicle to drive through other lanes might break the controller.
+        """
+
+        self.max_brake = max_brake
+        self.max_throt = max_throttle
+        self.max_steer = max_steering
+
+        self._vehicle = vehicle
+        self._world = self._vehicle.get_world()
+        self.past_steering = self._vehicle.get_control().steer
+        self._lon_controller = PIDLongitudinalController(self._vehicle, **args_longitudinal)
+        self._lat_controller = PIDLateralController(self._vehicle, offset, **args_lateral)
+
+    def run_step(self, target_speed, transform):
+        """
+            Execute one step of control invoking both lateral and longitudinal PID controllers to reach a target waypoint at a given target_speed.
+                :param target_speed: desired vehicle speed
+                :param waypoint: target location encoded as a waypoint
+                :return: distance (in meters) to the waypoint
+        """
+
+        acceleration = self._lon_controller.run_step(target_speed)
+        current_steering = self._lat_controller.run_step(transform)
+        control = carla.VehicleControl()
+        if acceleration >= 0.0:
+            control.throttle = min(acceleration, self.max_throt)
+            control.brake = 0.0
+        else:
+            control.throttle = 0.0
+            control.brake = min(abs(acceleration), self.max_brake)
+
+        # Steering regulation: changes cannot happen abruptly, can't steer too much.
+        if current_steering > self.past_steering + 0.1:
+            current_steering = self.past_steering + 0.1
+        elif current_steering < self.past_steering - 0.1:
+            current_steering = self.past_steering - 0.1
+
+        if current_steering >= 0:
+            steering = min(self.max_steer, current_steering)
+        else:
+            steering = max(-self.max_steer, current_steering)
+
+        control.steer = steering
+        control.hand_brake = False
+        control.manual_gear_shift = False
+        self.past_steering = steering
+
+        return control
+
+    def change_longitudinal_PID(self, args_longitudinal):
+        """Changes the parameters of the PIDLongitudinalController"""
+        self._lon_controller.change_parameters(**args_longitudinal)
+
+    def change_lateral_PID(self, args_lateral):
+        """Changes the parameters of the PIDLongitudinalController"""
+        self._lon_controller.change_parameters(**args_lateral)
+
+
+class PIDLongitudinalController():
+    """
+        PIDLongitudinalController implements longitudinal control using a PID.
+    """
+
+    def __init__(self, vehicle, K_P=1.0, K_I=0.0, K_D=0.0, dt=0.03):
+        """
+            Constructor method.
+                :param vehicle: actor to apply to local planner logic onto
+                :param K_P: Proportional term
+                :param K_D: Differential term
+                :param K_I: Integral term
+                :param dt: time differential in seconds
+        """
+        self._vehicle = vehicle
+        self._k_p = K_P
+        self._k_i = K_I
+        self._k_d = K_D
+        self._dt = dt
+        self._error_buffer = deque(maxlen=10)
+
+    def run_step(self, target_speed):
+        """
+            Execute one step of longitudinal control to reach a given target speed.
+                :param target_speed: target speed in Km/h
+                :param debug: boolean for debugging
+                :return: throttle control
+        """
+        current_speed = get_speed(self._vehicle)
+        return self._pid_control(target_speed, current_speed)
+
+    def _pid_control(self, target_speed, current_speed):
+        """
+            Estimate the throttle/brake of the vehicle based on the PID equations
+
+                :param target_speed:  target speed in Km/h
+                :param current_speed: current speed of the vehicle in Km/h
+                :return: throttle/brake control
+        """
+        error = target_speed - current_speed
+        self._error_buffer.append(error)
+
+        if len(self._error_buffer) >= 2:
+            _de = (self._error_buffer[-1] - self._error_buffer[-2]) / self._dt
+            _ie = sum(self._error_buffer) * self._dt
+        else:
+            _de = 0.0
+            _ie = 0.0
+
+        return np.clip((self._k_p * error) + (self._k_d * _de) + (self._k_i * _ie), -1.0, 1.0)
+
+    def change_parameters(self, K_P, K_I, K_D, dt):
+        self._k_p = K_P
+        self._k_i = K_I
+        self._k_d = K_D
+        self._dt = dt
+
+
+class PIDLateralController():
+    """
+        PIDLateralController implements lateral control using a PID.
+    """
+
+    def __init__(self, vehicle, offset=0, K_P=1.0, K_I=0.0, K_D=0.0, dt=0.03):
+        """
+            Constructor method.
+
+                :param vehicle: actor to apply to local planner logic onto
+                :param offset: distance to the center line. If might cause issues if the value
+                    is large enough to make the vehicle invade other lanes.
+                :param K_P: Proportional term
+                :param K_D: Differential term
+                :param K_I: Integral term
+                :param dt: time differential in seconds
+        """
+        self._vehicle = vehicle
+        self._k_p = K_P
+        self._k_i = K_I
+        self._k_d = K_D
+        self._dt = dt
+        self._offset = offset
+        self._e_buffer = deque(maxlen=10)
+
+    def run_step(self, transform):
+        """
+            Execute one step of lateral control to steer
+            the vehicle towards a certain waypoin.
+
+                :param transform: target waypoint
+                :return: steering control in the range [-1, 1] where:
+                -1 maximum steering to left
+                +1 maximum steering to right
+        """
+        return self._pid_control(transform, self._vehicle.get_transform())
+
+    def _pid_control(self, transform, vehicle_transform):
+        """
+            Estimate the steering angle of the vehicle based on the PID equations
+
+                :param transform: target waypoint
+                :param vehicle_transform: current transform of the vehicle
+                :return: steering control in the range [-1, 1]
+        """
+        # Get the ego's location and forward vector
+        ego_loc = vehicle_transform.location
+        v_vec = vehicle_transform.get_forward_vector()
+        v_vec = np.array([v_vec.x, v_vec.y, 0.0])
+
+        # Get the vector vehicle-target_wp
+        if self._offset != 0:
+            # Displace the wp to the side
+            w_tran = transform
+            r_vec = w_tran.get_right_vector()
+            w_loc = w_tran.location + carla.Location(x=self._offset*r_vec.x, y=self._offset*r_vec.y)
+        else:
+            w_loc = transform.location
+
+        w_vec = np.array([w_loc.x - ego_loc.x, w_loc.y - ego_loc.y, 0.0])
+        wv_linalg = np.linalg.norm(w_vec) * np.linalg.norm(v_vec)
+        if wv_linalg == 0:
+            _dot = 1
+        else:
+            _dot = math.acos(np.clip(np.dot(w_vec, v_vec) / (wv_linalg), -1.0, 1.0))
+        _cross = np.cross(v_vec, w_vec)
+        if _cross[2] < 0:
+            _dot *= -1.0
+
+        self._e_buffer.append(_dot)
+        if len(self._e_buffer) >= 2:
+            _de = (self._e_buffer[-1] - self._e_buffer[-2]) / self._dt
+            _ie = sum(self._e_buffer) * self._dt
+        else:
+            _de = 0.0
+            _ie = 0.0
+
+        return np.clip((self._k_p * _dot) + (self._k_d * _de) + (self._k_i * _ie), -1.0, 1.0)
+
+    def change_parameters(self, K_P, K_I, K_D, dt):
+        self._k_p = K_P
+        self._k_i = K_I
+        self._k_d = K_D
+        self._dt = dt
diff --git a/scene/safebench/util/run_util.py b/scene/safebench/util/run_util.py
new file mode 100644
index 00000000..52e039b9
--- /dev/null
+++ b/scene/safebench/util/run_util.py
@@ -0,0 +1,184 @@
+''' 
+Date: 2023-01-31 22:23:17
+LastEditTime: 2023-04-01 15:15:03
+Description: 
+    Copyright (c) 2022-2023 Safebench Team
+
+    This work is licensed under the terms of the MIT license.
+    For a copy, see <https://opensource.org/licenses/MIT>
+'''
+
+import os
+import os.path as osp
+import time
+import numpy as np
+from fnmatch import fnmatch
+
+import yaml
+import importlib
+
+from moviepy.video.io.ffmpeg_writer import FFMPEG_VideoWriter
+
+
+class VideoWriter:
+    def __init__(self, filename='_autoplay.mp4', fps=10.0, **kw):
+        self.writer = None
+        self.params = dict(filename=filename, fps=fps, **kw)
+
+    def add(self, img):
+        img = np.asarray(img)
+        if self.writer is None:
+            h, w = img.shape[:2]
+            self.writer = FFMPEG_VideoWriter(size=(w, h), **self.params)
+        if img.dtype in [np.float32, np.float64]:
+            img = np.uint64(img.clip(0, 1)*255)
+        if len(img.shape) == 2:
+            img = np.repeat(img[..., None], 3, -1)
+        # self.writer.write_frame(img)
+        try:
+            self.writer.write_frame(img)
+        except:
+            pass
+
+    def close(self):
+        if self.writer is not None:
+            self.writer.close()
+            self.writer = None
+
+    def __enter__(self):
+        return self
+
+    def __exit__(self, *kw):
+        self.close()
+
+
+class VideoRecorder(object):
+    def __init__(self, output_dir, logger):
+        self.logger = logger
+        self.output_dir = output_dir
+        self.video_count = 0
+        self.fps = 20
+        self.frame_list = []
+        hms_time = time.strftime("%Y-%m-%d_%H-%M-%S")
+        self.video_dir = os.path.join(self.output_dir, 'video')
+        self.original_video_dir = os.path.join(self.output_dir, 'video')
+        
+    def add_frame(self, frame):
+        self.frame_list.append(frame)
+    
+    def save(self, data_ids, log_name):
+        self.video_dir = os.path.join(self.original_video_dir, log_name)
+        data_ids = ['{:04d}'.format(data_id) for data_id in data_ids]
+        video_name = f'video_{"{:04d}".format(self.video_count)}_id_{"_".join(data_ids)}.mp4'
+        os.makedirs(self.video_dir, exist_ok=True)
+        video_file = os.path.join(self.video_dir, video_name)
+        self.logger.log(f'>> Saving video to {video_file}')
+
+        # define video writer
+        video_writer = VideoWriter(filename=video_file, fps=self.fps)
+        for f in self.frame_list:
+            video_writer.add(f)
+        video_writer.close()
+
+        # reset frame list
+        self.frame_list = []
+        self.video_count += 1
+
+
+class VideoRecorder_Perception(object):
+    def __init__(self, output_dir, logger, width=1024, height=1024):
+        self.logger = logger
+        self.output_dir = output_dir
+        self.video_dir = os.path.join(self.output_dir, 'video')
+        self.video_count = 0
+        
+        self.frame_list = []
+        # TODO: parse observation size
+        self.width, self.height = width, height
+
+    def add_frame(self, frame):
+        self.frame_list.append(frame)
+
+    def save(self, data_ids):
+        num_episodes = len(data_ids)
+        os.makedirs(self.video_dir, exist_ok=True)
+
+        # data_ids = ['{:04d}'.format(data_id) for data_id in data_ids]
+        video_name = [f'video_{"{:04d}".format(self.video_count)}_id_{"_{:04d}".format(data)}.mp4' for data in data_ids]
+        video_file = [os.path.join(self.video_dir, v) for v in video_name]
+        self.logger.log(f'>> Saving video to {self.video_dir}')
+        self.writer_list = [VideoWriter(filename=v, fps=20.0) for v in video_file]
+        for f in self.frame_list:
+            for n_i in range(num_episodes): 
+                try:
+                    self.writer_list[n_i].add(f[n_i])
+                except:
+                    pass
+        for n_i in range(num_episodes):
+            self.writer_list[n_i].close()
+        
+        self.logger.log(f'>> Saving video done.')
+        self.frame_list = []
+        self.video_count += 1
+
+
+def print_dict(d):
+    print(yaml.dump(d, sort_keys=False, default_flow_style=False))
+
+
+def load_config(config_path="default_config.yaml") -> dict:
+    with open(config_path, 'r') as f:
+        return yaml.safe_load(f)
+
+
+def find_config_dir(dir, depth=0):
+    for path, subdirs, files in os.walk(dir):
+        for name in files:
+            if name == "config.yaml":
+                return path, name
+    # if we can not find the config file from the current dir, we search for the parent dir:
+    if depth > 2:
+        return None
+    return find_config_dir(osp.dirname(dir), depth + 1)
+
+
+def find_model_path(dir, itr=None):
+    # if itr is specified, return model with the itr number
+    if itr is not None:
+        model_path = osp.join(dir, "model_" + str(itr) + ".pt")
+        if not osp.exists(model_path):
+            return None
+            # raise ValueError("Model doesn't exist: " + model_path)
+        return model_path
+    # if itr is not specified, return model.pt or the one with the largest itr number
+    pattern = "*pt"
+    model = "model.pt"
+    max_itr = -1
+    for _, _, files in os.walk(dir):
+        for name in files:
+            if fnmatch(name, pattern):
+                name = name.split(".pt")[0].split("_")
+                if len(name) > 1:
+                    itr = int(name[1])
+                    if itr > max_itr:
+                        max_itr = itr
+                        model = "model_" + str(itr) + ".pt"
+    model_path = osp.join(dir, model)
+    if not osp.exists(model_path):
+        return None
+        # raise ValueError("Model doesn't exist: " + model_path)
+    return model_path, max_itr
+
+
+def setup_eval_configs(dir, itr=None):
+    path, config_name = find_config_dir(dir)
+    model_path, load_itr = find_model_path(osp.join(path, "model_save"), itr=itr)
+    config_path = osp.join(path, config_name)
+    configs = load_config(config_path)
+    return model_path, load_itr, configs["policy"], configs["timeout_steps"], configs[configs["policy"]]
+
+
+def class_from_path(path):
+    module_name, class_name = path.rsplit(".", 1)
+    class_object = getattr(importlib.import_module(module_name), class_name)
+    return class_object
diff --git a/scene/safebench/util/scenic_utils.py b/scene/safebench/util/scenic_utils.py
new file mode 100644
index 00000000..f0ec8105
--- /dev/null
+++ b/scene/safebench/util/scenic_utils.py
@@ -0,0 +1,414 @@
+
+### Top-level functionality of the scenic package as a script:
+### load a scenario and generate scenes in an infinite loop.
+### modified from https://github.com/BerkeleyLearnVerify/Scenic/blob/main/src/scenic/__main__.py
+### & https://github.com/BerkeleyLearnVerify/Scenic/blob/main/src/scenic/core/simulators.py
+
+import os 
+import random
+import numpy as np 
+import torch
+
+import enum
+import sys
+import time
+import argparse
+import pygame
+from collections import OrderedDict, defaultdict
+
+if sys.version_info >= (3, 8):
+    from importlib import metadata
+else:
+    import importlib_metadata as metadata
+
+import scenic.syntax.translator as translator
+import scenic.core.errors as errors
+from scenic.core.simulators import SimulationCreationError
+from scenic.core.object_types import (enableDynamicProxyFor, setDynamicProxyFor,
+                                      disableDynamicProxyFor)
+from scenic.core.distributions import RejectionException
+import scenic.core.dynamics as dynamics
+from scenic.core.errors import RuntimeParseError, InvalidScenarioError, optionallyDebugRejection
+from scenic.core.requirements import RequirementType
+from scenic.core.vectors import Vector
+
+def get_parser(scenicFile):
+    parser = argparse.ArgumentParser(prog='scenic', add_help=False,
+                                     usage='scenic [-h | --help] [options] FILE [options]',
+                                     description='Sample from a Scenic scenario, optionally '
+                                                 'running dynamic simulations.')
+
+    mainOptions = parser.add_argument_group('main options')
+    mainOptions.add_argument('-S', '--simulate', default=True,
+                             help='run dynamic simulations from scenes '
+                                  'instead of simply showing diagrams of scenes')
+    mainOptions.add_argument('-s', '--seed', help='random seed', default=0, type=int)
+    mainOptions.add_argument('-v', '--verbosity', help='verbosity level (default 1)',
+                             type=int, choices=(0, 1, 2, 3), default=1)
+    mainOptions.add_argument('-p', '--param', help='override a global parameter',
+                             nargs=2, default=[], action='append', metavar=('PARAM', 'VALUE'))
+    mainOptions.add_argument('-m', '--model', help='specify a Scenic world model', default='scenic.simulators.carla.model')
+    mainOptions.add_argument('--scenario', default=None,
+                             help='name of scenario to run (if file contains multiple)')
+
+    # Simulation options
+    simOpts = parser.add_argument_group('dynamic simulation options')
+    simOpts.add_argument('--time', help='time bound for simulations (default none)',
+                         type=int, default=10000)
+    simOpts.add_argument('--count', help='number of successful simulations to run (default infinity)',
+                         type=int, default=0)
+    simOpts.add_argument('--max-sims-per-scene', type=int, default=1, metavar='N',
+                         help='max # of rejected simulations before sampling a new scene (default 1)')
+
+    # Interactive rendering options
+    intOptions = parser.add_argument_group('static scene diagramming options')
+    intOptions.add_argument('-d', '--delay', type=float,
+                            help='loop automatically with this delay (in seconds) '
+                                 'instead of waiting for the user to close the diagram')
+    intOptions.add_argument('-z', '--zoom', type=float, default=1,
+                            help='zoom expansion factor, or 0 to show the whole workspace (default 1)')
+
+    # Debugging options
+    debugOpts = parser.add_argument_group('debugging options')
+    debugOpts.add_argument('--show-params', help='show values of global parameters',
+                           action='store_true')
+    debugOpts.add_argument('--show-records', help='show values of recorded expressions',
+                           action='store_true')
+    debugOpts.add_argument('-b', '--full-backtrace', help='show full internal backtraces',
+                           action='store_true')
+    debugOpts.add_argument('--pdb', action='store_true',
+                           help='enter interactive debugger on errors (implies "-b")')
+    debugOpts.add_argument('--pdb-on-reject', action='store_true',
+                           help='enter interactive debugger on rejections (implies "-b")')
+    ver = metadata.version('scenic')
+    debugOpts.add_argument('--version', action='version', version=f'Scenic {ver}',
+                           help='print Scenic version information and exit')
+    debugOpts.add_argument('--dump-initial-python', help='dump initial translated Python',
+                           action='store_true')
+    debugOpts.add_argument('--dump-ast', help='dump final AST', action='store_true')
+    debugOpts.add_argument('--dump-python', help='dump Python equivalent of final AST',
+                           action='store_true')
+    debugOpts.add_argument('--no-pruning', help='disable pruning', action='store_true')
+    debugOpts.add_argument('--gather-stats', type=int, metavar='N',
+                           help='collect timing statistics over this many scenes')
+    
+    parser.add_argument('--scenicFile', help='a Scenic file to run', default = scenicFile, metavar='FILE')
+
+    parser.add_argument('-h', '--help', action='help', default=argparse.SUPPRESS,
+                        help=argparse.SUPPRESS)
+
+    # Parse arguments and set up configuration
+    args = parser.parse_args(args=[])
+    return args
+
+class ScenicSimulator:
+    def __init__(self, scenicFile, params):
+        self.args = get_parser(scenicFile)
+        delay = self.args.delay
+        errors.showInternalBacktrace = self.args.full_backtrace
+        if self.args.pdb:
+            errors.postMortemDebugging = True
+            errors.showInternalBacktrace = True
+        if self.args.pdb_on_reject:
+            errors.postMortemRejections = True
+            errors.showInternalBacktrace = True
+        translator.dumpTranslatedPython = self.args.dump_initial_python
+        translator.dumpFinalAST = self.args.dump_ast
+        translator.dumpASTPython = self.args.dump_python
+        translator.verbosity = self.args.verbosity
+        translator.usePruning = not self.args.no_pruning
+#         if self.args.seed is not None and self.args.verbosity >= 1:
+#             print(f'Using random seed = {self.args.seed}')
+#             random.seed(self.args.seed)
+        # Load scenario from file
+        if self.args.verbosity >= 1:
+            print('Beginning scenario construction...')
+        startTime = time.time()
+        self.scenario = errors.callBeginningScenicTrace(
+            lambda: translator.scenarioFromFile(self.args.scenicFile,
+                                                params=params,
+                                                model=self.args.model,
+                                                scenario=self.args.scenario)
+        )
+        self.opt_params, self.opt_record = self.get_params()
+        
+        totalTime = time.time() - startTime
+        if self.args.verbosity >= 1:
+            print(f'Scenario constructed in {totalTime:.2f} seconds.')
+        self.simulator = errors.callBeginningScenicTrace(self.scenario.getSimulator)
+        self.simulator.render = False
+        
+    def get_params(self):
+        all_params = self.scenario.params
+        opt_record = {}
+        opt_params = {}
+        for param in all_params.keys():
+            if param.startswith('OPT'):
+                opt_record[param] = []
+                opt_params[param] = all_params[param]
+                opt_params[param].min = opt_params[param].low
+                opt_params[param].max = opt_params[param].high
+        return opt_params, opt_record
+    
+    def record_params(self):
+        all_params = self.scene.params
+        for param in self.opt_record.keys():
+            self.opt_record[param].append(all_params[param])
+        print("Recording params...")
+              
+    def update_params(self):
+        print("Updating params...")
+        for param in self.opt_params.keys():
+            if len(self.opt_record[param]) > 1:
+                mean = np.mean(self.opt_record[param])
+                std = np.std(self.opt_record[param])
+                self.opt_params[param].low = round(max(mean - std, self.opt_params[param].min), 2)
+                self.opt_params[param].high = round(min(mean + std, self.opt_params[param].max), 2)
+#                 self.opt_record[param] = []
+        self.scenario.params.update(self.opt_params)
+        print(self.save_params())
+        
+    def load_params(self, params):
+        print("Loading params...")
+        for param in params.keys():
+            self.opt_params[param].low = params[param]['low']
+            self.opt_params[param].high = params[param]['high']
+        self.scenario.params.update(self.opt_params)
+        
+    def save_params(self):
+        print("Saving params...")
+        save_params = {}
+        for param in self.opt_params.keys():
+            cur_param = self.opt_params[param]
+            save_params[param] = {'low': cur_param.low, 'high': cur_param.high}
+        return save_params
+        
+    def generateScene(self):
+        scene, iterations = errors.callBeginningScenicTrace(
+            lambda: self.scenario.generate(verbosity=self.args.verbosity)
+        )
+        return scene, iterations
+    
+    def setSimulation(self, scene):
+        if self.args.verbosity >= 1:
+            print(f'  Beginning simulation of {scene.dynamicScenario}...')
+        try:     
+            self.scene = scene
+            self.simulation = self.simulator.createSimulation(scene, verbosity=self.args.verbosity)
+        except SimulationCreationError as e:
+            if self.args.verbosity >= 1:
+                print(f'  Failed to create simulation: {e}')
+            return False
+        return True
+        
+    def runSimulation(self):
+        """Run the simulation.
+        Throws a RejectSimulationException if a requirement is violated.
+        """
+        maxSteps = self.args.time
+        trajectory = self.simulation.trajectory
+        if self.simulation.currentTime > 0:
+            raise RuntimeError('tried to run a Simulation which has already run')
+        assert len(trajectory) == 0
+        actionSequence = []
+
+        import scenic.syntax.veneer as veneer
+        veneer.beginSimulation(self.simulation)
+        dynamicScenario = self.simulation.scene.dynamicScenario
+
+        # Initialize dynamic scenario
+        dynamicScenario._start()
+
+        # Give objects a chance to do any simulator-specific setup
+        for obj in self.simulation.objects:
+            if obj is self.simulation.objects[0]:
+                continue
+            obj.startDynamicSimulation()
+
+        # Update all objects in case the simulator has adjusted any dynamic
+        # properties during setup
+        self.simulation.updateObjects()
+
+        # Run simulation
+        assert self.simulation.currentTime == 0
+        terminationReason = None
+        terminationType = None
+        while True:
+            yield self.simulation.currentTime
+            if self.simulation.verbosity >= 3:
+                print(f'    Time step {self.simulation.currentTime}:')
+
+            # Run compose blocks of compositional scenarios
+            # (and check if any requirements defined therein fail)
+            terminationReason = dynamicScenario._step()
+            terminationType = TerminationType.scenarioComplete
+
+            # Record current state of the simulation
+            self.simulation.recordCurrentState()
+
+            # Run monitors
+            newReason = dynamicScenario._runMonitors()
+            if newReason is not None:
+                terminationReason = newReason
+                terminationType = TerminationType.terminatedByMonitor
+
+            # "Always" and scenario-level requirements have been checked;
+            # now safe to terminate if the top-level scenario has finished,
+            # a monitor requested termination, or we've hit the timeout
+            if terminationReason is not None:
+                pass
+            terminationReason = dynamicScenario._checkSimulationTerminationConditions()
+            if terminationReason is not None:
+                terminationType = TerminationType.simulationTerminationCondition
+                pass
+            if maxSteps and self.simulation.currentTime >= maxSteps:
+                terminationReason = f'reached time limit ({maxSteps} steps)'
+                terminationType = TerminationType.timeLimit
+                pass
+
+            # Compute the actions of the agents in this time step
+            allActions = OrderedDict()
+            schedule = self.simulation.scheduleForAgents()
+            for agent in schedule:
+                if agent is self.simulation.objects[0]:
+                    continue
+
+                behavior = agent.behavior
+                if not behavior._runningIterator:   # TODO remove hack
+                    behavior._start(agent)
+                actions = behavior._step()
+                if isinstance(actions, EndSimulationAction):
+                    terminationReason = str(actions)
+                    terminationType = TerminationType.terminatedByBehavior
+                    break
+                assert isinstance(actions, tuple)
+                if len(actions) == 1 and isinstance(actions[0], (list, tuple)):
+                    actions = tuple(actions[0])
+                    
+#                 if not self.simulation.actionsAreCompatible(agent, actions):
+#                     raise InvalidScenarioError(f'agent {agent} tried incompatible '
+#                                                f' action(s) {actions}')
+                allActions[agent] = actions
+            if terminationReason is not None:
+                break
+
+            # Execute the actions
+            if self.simulation.verbosity >= 3:
+                for agent, actions in allActions.items():
+                    print(f'      Agent {agent} takes action(s) {actions}')
+            actionSequence.append(allActions)
+            self.simulation.executeActions(allActions)
+
+            # Run the simulation for a single step and read its state back into Scenic
+            # the step is controlled by safebench instead #
+#             self.simulation.step()
+            self.simulation.updateObjects()
+            self.simulation.currentTime += 1
+            
+            # Package up simulation results into a compact object
+            # update for every step #
+            result = SimulationResult(trajectory, actionSequence, terminationType,
+                                  terminationReason, self.simulation.records)
+            self.simulation.result = result
+        
+    def endSimulation(self):
+        # Stop all remaining scenarios
+        # (and reject if some 'require eventually' condition was never satisfied)
+        import scenic.syntax.veneer as veneer
+        for scenario in tuple(veneer.runningScenarios):
+            scenario._stop('simulation terminated')
+            
+        # If the simulation was terminated by an exception (including rejections),
+        # some scenarios may still be running; we need to clean them up without
+        # checking their requirements, which could raise rejection exceptions.
+        
+        for scenario in tuple(veneer.runningScenarios):
+            scenario._stop('exception', quiet=True)
+            
+        if not hasattr(self, "simulation"):
+            return 
+        
+        dynamicScenario = self.simulation.scene.dynamicScenario
+        values = dynamicScenario._evaluateRecordedExprs(RequirementType.recordFinal)
+        for name, val in values.items():
+            self.simulation.records[name] = val
+        
+        ### destroy ###
+        self.simulation.destroy()
+        for obj in self.simulation.scene.objects:
+            disableDynamicProxyFor(obj)
+        for agent in self.simulation.agents:
+            if agent.behavior._isRunning:
+                agent.behavior._stop()
+        for monitor in self.simulation.scene.monitors:
+            if monitor._isRunning:
+                monitor._stop()
+        veneer.endSimulation(self.simulation)
+        
+    def destroy(self):
+        self.simulator.destroy()
+        
+class Action:
+    """An :term:`action` which can be taken by an agent for one step of a simulation."""
+    def canBeTakenBy(self, agent):
+        return True
+
+    def applyTo(self, agent, simulation):
+        raise NotImplementedError
+
+class EndSimulationAction(Action):
+    """Special action indicating it is time to end the simulation.
+    Only for internal use.
+    """
+    def __init__(self, line):
+        self.line = line
+
+    def __str__(self):
+        return f'"terminate" executed on line {self.line}'
+
+class EndScenarioAction(Action):
+    """Special action indicating it is time to end the current scenario.
+    Only for internal use.
+    """
+    def __init__(self, line):
+        self.line = line
+
+    def __str__(self):
+        return f'"terminate scenario" executed on line {self.line}'
+
+@enum.unique
+class TerminationType(enum.Enum):
+    """Enum describing the possible ways a simulation can end."""
+    #: Simulation reached the specified time limit.
+    timeLimit = 'reached simulation time limit'
+    #: The top-level scenario's :keyword:`compose` block finished executing.
+    scenarioComplete = 'the top-level scenario finished'
+    #: A user-specified termination condition was met.
+    simulationTerminationCondition = 'a simulation termination condition was met'
+    #: A :term:`monitor` used :keyword:`terminate` to end the simulation.
+    terminatedByMonitor = 'a monitor terminated the simulation'
+    #: A :term:`dynamic behavior` used :keyword:`terminate` to end the simulation.
+    terminatedByBehavior = 'a behavior terminated the simulation'
+
+class SimulationResult:
+    """Result of running a simulation.
+    Attributes:
+        trajectory: A tuple giving for each time step the simulation's 'state': by
+            default the positions of every object. See `Simulation.currentState`.
+        finalState: The last 'state' of the simulation, as above.
+        actions: A tuple giving for each time step a dict specifying for each agent the
+            (possibly-empty) tuple of actions it took at that time step.
+        terminationType (`TerminationType`): The way the simulation ended.
+        terminationReason (str): A human-readable string giving the reason why the
+            simulation ended, possibly including debugging info.
+        records (dict): For each :keyword:`record` statement, the value or time series of
+            values its expression took during the simulation.
+    """
+    def __init__(self, trajectory, actions, terminationType, terminationReason, records):
+        self.trajectory = tuple(trajectory)
+        assert self.trajectory
+        self.finalState = self.trajectory[-1]
+        self.actions = tuple(actions)
+        self.terminationType = terminationType
+        self.terminationReason = str(terminationReason)
+        self.records = dict(records)
diff --git a/scene/safebench/util/torch_util.py b/scene/safebench/util/torch_util.py
new file mode 100644
index 00000000..8b0cde9e
--- /dev/null
+++ b/scene/safebench/util/torch_util.py
@@ -0,0 +1,236 @@
+''' 
+Date: 2023-01-31 22:23:17
+LastEditTime: 2023-03-01 16:56:21
+Description: 
+    Copyright (c) 2022-2023 Safebench Team
+
+    This work is licensed under the terms of the MIT license.
+    For a copy, see <https://opensource.org/licenses/MIT>
+'''
+
+import os
+from typing import Any, Iterable, Optional
+
+import numpy as np
+import random
+import scipy.signal
+import torch
+
+
+def combined_shape(length, shape=None):
+    if shape is None:
+        return (length, )
+    return (length, shape) if np.isscalar(shape) else (length, *shape)
+
+
+def discount_cumsum(x, discount):
+    r"""
+    magic from rllab for computing discounted cumulative sums of vectors.
+
+    input: 
+        numpy 1d vector x, [x0,  x1, x2]
+
+    output:
+        [x0 + discount * x1 + discount^2 * x2, x1 + discount * x2, x2]
+    """
+    return scipy.signal.lfilter([1], [1, float(-discount)], x[::-1], axis=0)[::-1]
+
+
+def set_seed(seed=1029):
+    random.seed(seed)
+    os.environ['PYTHONHASHSEED'] = str(seed)
+    np.random.seed(seed)
+    # torch.use_deterministic_algorithms(True)
+    torch.manual_seed(seed)
+    torch.cuda.manual_seed(seed)
+    torch.cuda.manual_seed_all(seed)  # if you are using multi-GPU.
+    torch.backends.cudnn.benchmark = False
+    torch.backends.cudnn.deterministic = True
+
+
+def set_torch_variable(device):
+    device = device.lower()
+    use_cpu = device == 'cpu'
+    use_gpu = device.split(':')[0] == 'cuda'
+    assert use_cpu or use_gpu, 'device must be either cpu or cuda:\{gpu_id\}'
+    if not torch.cuda.is_available():
+        os.environ["MODEL_DEVICE"] = 'cpu'
+    else:
+        os.environ["MODEL_DEVICE"] = device
+
+
+def get_torch_device():
+    device_name = os.environ.get("MODEL_DEVICE")
+    try:
+        return torch.device(device_name)
+    except:
+        raise ValueError("'MODEL_DEVICE' env variable has not been specified. Current 'MODEL_DEVICE' env variable is {device_name}")
+
+
+def get_device_name():
+    return os.environ.get("MODEL_DEVICE")
+
+
+def to_tensor(
+        item: Any,
+        dtype: torch.dtype = torch.float32,
+        device: Optional[torch.device] = None,
+        ignore_keys: list = [],
+        transform_scalar: bool = True,
+        squeeze=False
+    ) -> torch.Tensor:
+    device = get_torch_device() if device is None else device
+
+    def squeeze_tensor(d):
+        data = torch.tensor(d, dtype=dtype, device=device)
+        if squeeze:
+            return torch.squeeze(data)
+        return data
+
+    if isinstance(item, dict):
+        new_data = {}
+        for k, v in item.items():
+            if k in ignore_keys:
+                new_data[k] = v
+            else:
+                new_data[k] = to_tensor(v, dtype, device, ignore_keys, transform_scalar, squeeze=squeeze)
+        return new_data
+    elif isinstance(item, list) or isinstance(item, tuple):
+        if len(item) == 0:
+            return None
+        return squeeze_tensor(item)
+    elif isinstance(item, np.ndarray):
+        return squeeze_tensor(item)
+    elif isinstance(item, bool) or isinstance(item, str):
+        return item
+    elif np.isscalar(item):
+        if transform_scalar:
+            return torch.as_tensor(item, device=device).to(dtype)
+        else:
+            return item
+    elif item is None:
+        return None
+    elif isinstance(item, torch.Tensor):
+        return item.to(dtype)
+    else:
+        raise TypeError("not support item type: {}".format(type(item)))
+
+
+def to_ndarray(item: Any, dtype: np.dtype=None) -> np.ndarray:
+    def transform(d):
+        if dtype is None:
+            return np.array(d)
+        else:
+            return np.array(d, dtype=dtype)
+
+    if isinstance(item, dict):
+        new_data = {}
+        for k, v in item.items():
+            new_data[k] = to_ndarray(v, dtype)
+        return new_data
+    elif isinstance(item, list) or isinstance(item, tuple):
+        if len(item) == 0:
+            return None
+        elif hasattr(item, '_fields'):  # namedtuple
+            return type(item)(*[to_ndarray(t, dtype) for t in item])
+        else:
+            new_data = []
+            for t in item:
+                new_data.append(to_ndarray(t, dtype))
+            return new_data
+    elif isinstance(item, torch.Tensor):
+        if item.device != 'cpu':
+            item = item.detach().cpu()
+        if dtype is None:
+            return item.numpy()
+        else:
+            return item.numpy().astype(dtype)
+    elif isinstance(item, np.ndarray):
+        if dtype is None:
+            return item
+        else:
+            return item.astype(dtype)
+    elif isinstance(item, bool) or isinstance(item, str):
+        return item
+    elif np.isscalar(item):
+        return np.array(item)
+    elif item is None:
+        return None
+    else:
+        raise TypeError("not support item type: {}".format(type(item)))
+
+
+def to_device(item: Any, device: str=None, ignore_keys: list = []) -> Any:
+    if device is None:
+        device = get_device_name()
+    if isinstance(item, torch.nn.Module):
+        return item.to(device)
+    elif isinstance(item, torch.Tensor):
+        return item.to(device)
+    elif isinstance(item, dict):
+        new_item = {}
+        for k in item.keys():
+            if k in ignore_keys:
+                new_item[k] = item[k]
+            else:
+                new_item[k] = to_device(item[k], device)
+        return new_item
+    elif isinstance(item, np.ndarray) or isinstance(item, np.bool_):
+        return item
+    elif item is None or isinstance(item, str):
+        return item
+    elif isinstance(item, list):
+        return [to_device(k, device) for k in item]
+    elif isinstance(item, tuple):
+        return tuple([to_device(k, device) for k in item])
+    else:
+        raise TypeError("not support item type: {}".format(type(item)))
+
+
+def to_dtype(item: Any, dtype: type) -> Any:
+    if isinstance(item, torch.Tensor):
+        return item.to(dtype=dtype)
+    elif isinstance(item, dict):
+        return {k: to_dtype(item[k], dtype) for k in item.keys()}
+    else:
+        raise TypeError("not support item type: {}".format(type(item)))
+
+
+def count_vars(module):
+    return sum([np.prod(p.shape) for p in module.parameters()])
+
+
+def CUDA(var):
+    return var.cuda() if torch.cuda.is_available() else var
+
+
+def CPU(var):
+    return var.cpu().detach().numpy()
+
+
+def kaiming_init(m):
+    if isinstance(m, torch.nn.Linear):
+        torch.nn.init.xavier_normal_(m.weight)
+        if m.bias is not None:
+            m.bias.data.fill_(0)
+    elif isinstance(m, torch.nn.Conv2d) or isinstance(m, torch.nn.ConvTranspose2d):
+        torch.nn.init.kaiming_normal_(m.weight)
+        if m.bias is not None:
+            m.bias.data.fill_(0)
+    elif isinstance(m, (torch.nn.BatchNorm1d, torch.nn.BatchNorm2d)):
+        m.weight.data.fill_(1)
+        if m.bias is not None:
+            m.bias.data.fill_(0)
+
+
+def hidden_init(layer):
+    fan_in = layer.weight.data.size()[0]
+    lim = 1. / np.sqrt(fan_in)
+    return (-lim, lim)
+
+
+def normal(x, mu, sigma_sq):
+    pi = CUDA(Variable(torch.FloatTensor([np.pi])))
+    a = (-1*(CUDA(Variable(x))-mu).pow(2)/(2*sigma_sq)).exp()
+    b = 1/(2*sigma_sq*pi.expand_as(sigma_sq)).sqrt()
+    return a*b