diff --git a/scene/.gitignore b/scene/.gitignore new file mode 100644 index 00000000..a5c0b714 --- /dev/null +++ b/scene/.gitignore @@ -0,0 +1,156 @@ +# 缓存文件 +*.cache +*.tmp +*.log +*.swp +*.swo +__pycache__/ +*.pyc +.DS_Store +Thumbs.db + +# 输出文件 +*.out +*.o +*.obj +*.class +*.bin +*.dll +*.so +*.a +*.lib +*.exe +*.msi +*.msp +*.pdb +*.ilk + +# 编译器生成的文件 +*.sln +*.suo +*.vcxproj +*.vcxproj.filters +*.user +*.vs +*.vspscc +*.vssscc +*.csproj +*.csproj.user +*.csproj.vspscc +*.csproj.vssscc + +# 二进制文件 +*.bin +*.dll +*.exe +*.msi +*.msp +*.pdb +*.ilk +*.so +*.a +*.lib +*.o +*.obj + +# 特定工具或框架的缓存文件 +node_modules/ +bower_components/ +dist/ +build/ +out/ +target/ + +# 特定语言的缓存文件 +# Python +__pycache__/ +*.pyc +*.pyo +*.pyd +*.egg +*.egg-info/ +*.egg-link +*.pydevproject +*.pyc +*.pyo +*.pyd +*.sqlite3 + +# C/C++ +*.o +*.obj +*.a +*.lib +*.so +*.dll +*.exe +*.pdb +*.ilk + +# Java +*.class +*.jar +*.war +*.ear +*.class +*.java~ +*.class~ +*.jar~ +*.war~ +*.ear~ + +# JavaScript +node_modules/ +*.js.map +*.min.js +*.min.js.map + +# MATLAB +*.mat +*.fig +*.mex* +*.dll +*.so +*.a +*.lib + +# 编辑器和 IDE 的临时文件 +*.swp +*.swo +*.swn +*.swo~ +*.swp~ +*.swo~ +*.swn~ +.DS_Store +Thumbs.db +*.bak +*.tmp +*.log +*.cache +*.session +*.sublime-workspace +*.sublime-project +*.vscode/ +*.vscode-insiders/ +*.vscode-remote/ +*.vscode-workspace +*.vscode-insiders-workspace +*.vscode-remote-workspace + +# 其他 +*.dat +*.bin +*.bak +*.tmp +*.log +*.cache +*.session +*.sublime-workspace +*.sublime-project +*.vscode/ +*.vscode-insiders/ +*.vscode-remote/ +*.vscode-workspace +*.vscode-insiders-workspace +*.vscode-remote-workspace \ No newline at end of file diff --git a/scene/safebench/README.md b/scene/safebench/README.md new file mode 100644 index 00000000..6ee3469f --- /dev/null +++ b/scene/safebench/README.md @@ -0,0 +1,87 @@ +# SafeBench (简化版) + +本目录仅保留了本项目 (ChatScene) 中使用到的 **SafeBench** 相关功能模块,以保证项目小巧、清晰。 + +如果需要完整体验 SafeBench,请参考官方仓库:[SafeBench GitHub](https://github.com/TRI-ML/safebench)。 + +--- + +## 目录结构说明 + +## 🚀 安装方法 + +确保你已经安装了以下依赖: + +```bash +pip install gym pygame + ``` +--- + +## 核心文件功能说明 + +- **`carla_runner.py`** + 直接连接 Carla 仿真器,快速加载静态地图、布置车辆,并执行预定义动作。 + 主要用于 **直接测试简单场景**,无需 Scenic 脚本。 + +- **`scenic_runner.py`** + 读取 `.scenic` 脚本文件(描述静态场景),通过 Scenic 编译后,自动在 Carla 中搭建对应场景并执行仿真。 + 适合用于**自然语言转静态场景**的生成实验。 + +- **`scenic_runner_dynamic.py`** + 支持动态场景(随时间变化的元素,如动态行人、车辆转向等),可以加载更复杂的 `.scenic` 动态脚本,在 Carla 中实时生成并控制场景。 + 主要用于**自然语言生成动态场景**,并仿真运行。 + +--- + +## 使用方法 + +1. **Carla环境初始化** + - 需要提前启动 Carla Server,确保版本为 0.9.15。 + - 建议使用命令行启动: + ```bash + ./CarlaUE4.sh -quality-level=Low + ``` + +2. **运行 carla_runner.py** + - 示例命令: + ```bash + python safebench/carla_runner.py + ``` + - 默认加载固定场景,可修改内部配置来指定地图、交通参与者等。 + +3. **运行 scenic_runner.py** + - 先准备一个静态 `.scenic` 场景文件。 + - 运行示例: + ```bash + python safebench/scenic_runner.py --scenario_file ./your_scenario.scenic + ``` + - 程序会自动编译 Scenic 脚本并在 Carla 中布置场景。 + +4. **运行 scenic_runner_dynamic.py** + - 适合用于需要生成动态行为的场景。 + - 运行示例: + ```bash + python safebench/scenic_runner_dynamic.py --scenario_file ./your_dynamic_scenario.scenic + ``` + - 可以控制仿真时间、动作脚本等。 + +--- +scenario + +## 注意事项 + +- 本项目只保留了必要模块,因此**不支持** SafeBench 原版完整功能(如 Benchmark评估、对抗攻击等)。 +- 仅适配 **Carla 0.9.15 + Python 3.7/3.8** 环境。 +- Scenic 脚本需符合正确的语法规范,否则可能编译失败。 + +--- + +## 参考资料 + +- [SafeBench 官方仓库](https://github.com/TRI-ML/safebench) +- [CARLA Simulator](https://carla.org/) +- [Scenic Language 官方文档](https://scenic-lang.readthedocs.io/) + +--- + +> 本目录仅作为 ChatScene 项目的辅助模块,建议如需进一步扩展测试功能,可参考原版 SafeBench 框架进行完善。 diff --git a/scene/safebench/carla_runner.py b/scene/safebench/carla_runner.py new file mode 100644 index 00000000..51d60717 --- /dev/null +++ b/scene/safebench/carla_runner.py @@ -0,0 +1,437 @@ +''' +Date: 2023-01-31 22:23:17 +LastEditTime: 2023-04-03 22:35:17 +Description: + Copyright (c) 2022-2023 Safebench Team + + This work is licensed under the terms of the MIT license. + For a copy, see +''' +import os +import copy +import glob + +import numpy as np +import carla +import pygame +from tqdm import tqdm + +from safebench.gym_carla.env_wrapper import VectorWrapper +from safebench.gym_carla.envs.render import BirdeyeRender +from safebench.gym_carla.replay_buffer import RouteReplayBuffer, PerceptionReplayBuffer + +from safebench.agent import AGENT_POLICY_LIST +from safebench.scenario import SCENARIO_POLICY_LIST + +from safebench.scenario.scenario_manager.carla_data_provider import CarlaDataProvider +from safebench.scenario.scenario_data_loader import ScenarioDataLoader +from safebench.scenario.tools.scenario_utils import scenario_parse + +from safebench.util.logger import Logger, setup_logger_kwargs +from safebench.util.metric_util import get_route_scores, get_perception_scores + + +class CarlaRunner: + def __init__(self, agent_config, scenario_config): + self.scenario_config = scenario_config + self.agent_config = agent_config + + self.seed = scenario_config['seed'] + self.exp_name = scenario_config['exp_name'] + self.output_dir = scenario_config['output_dir'] + self.mode = scenario_config['mode'] + self.save_video = scenario_config['save_video'] + + self.render = scenario_config['render'] + self.num_scenario = scenario_config['num_scenario'] + self.fixed_delta_seconds = scenario_config['fixed_delta_seconds'] + self.scenario_category = scenario_config['scenario_category'] + + # continue training flag + self.continue_agent_training = scenario_config['continue_agent_training'] + self.continue_scenario_training = scenario_config['continue_scenario_training'] + + # apply settings to carla + self.client = carla.Client('localhost', scenario_config['port']) + self.client.set_timeout(10.0) + self.world = None + self.env = None + + self.env_params = { + 'auto_ego': scenario_config['auto_ego'], + 'obs_type': agent_config['obs_type'], + 'scenario_category': self.scenario_category, + 'ROOT_DIR': scenario_config['ROOT_DIR'], + 'warm_up_steps': 9, # number of ticks after spawning the vehicles + 'disable_lidar': True, # show bird-eye view lidar or not + 'display_size': 128, # screen size of one bird-eye view window + 'obs_range': 32, # observation range (meter) + 'd_behind': 12, # distance behind the ego vehicle (meter) + 'max_past_step': 1, # the number of past steps to draw + 'discrete': False, # whether to use discrete control space + 'discrete_acc': [-3.0, 0.0, 3.0], # discrete value of accelerations + 'discrete_steer': [-0.2, 0.0, 0.2], # discrete value of steering angles + 'continuous_accel_range': [-3.0, 3.0], # continuous acceleration range + 'continuous_steer_range': [-0.3, 0.3], # continuous steering angle range + 'max_episode_step': scenario_config['max_episode_step'], # maximum timesteps per episode + 'max_waypt': 12, # maximum number of waypoints + 'lidar_bin': 0.125, # bin size of lidar sensor (meter) + 'out_lane_thres': 4, # threshold for out of lane (meter) + 'desired_speed': 8, # desired speed (m/s) + 'image_sz': 1024, # TODO: move to config of od scenario + } + + # pass config from scenario to agent + agent_config['mode'] = scenario_config['mode'] + agent_config['ego_action_dim'] = scenario_config['ego_action_dim'] + agent_config['ego_state_dim'] = scenario_config['ego_state_dim'] + agent_config['ego_action_limit'] = scenario_config['ego_action_limit'] + + # define logger + logger_kwargs = setup_logger_kwargs( + self.exp_name, + self.output_dir, + self.seed, + agent=agent_config['policy_type'], + scenario=scenario_config['policy_type'], + scenario_category=self.scenario_category + ) + self.logger = Logger(**logger_kwargs) + + # prepare parameters + if self.mode == 'train_agent': + self.buffer_capacity = agent_config['buffer_capacity'] + self.eval_in_train_freq = agent_config['eval_in_train_freq'] + self.save_freq = agent_config['save_freq'] + self.train_episode = agent_config['train_episode'] + self.current_episode = -1 + self.logger.save_config(agent_config) + self.logger.create_training_dir() + elif self.mode == 'train_scenario': + self.buffer_capacity = scenario_config['buffer_capacity'] + self.eval_in_train_freq = scenario_config['eval_in_train_freq'] + self.save_freq = scenario_config['save_freq'] + self.train_episode = scenario_config['train_episode'] + self.logger.save_config(scenario_config) + self.logger.create_training_dir() + elif self.mode == 'eval': + self.save_freq = scenario_config['save_freq'] + self.logger.log('>> Evaluation Mode, skip config saving', 'yellow') + self.logger.create_eval_dir(load_existing_results=True) + else: + raise NotImplementedError(f"Unsupported mode: {self.mode}.") + + # define agent and scenario + self.logger.log('>> Agent Policy: ' + agent_config['policy_type']) + self.logger.log('>> Scenario Policy: ' + scenario_config['policy_type']) + + if self.scenario_config['auto_ego']: + self.logger.log('>> Using auto-polit for ego vehicle, action of policy will be ignored', 'yellow') + if scenario_config['policy_type'] == 'ordinary' and self.mode != 'train_agent': + self.logger.log('>> Ordinary scenario can only be used in agent training', 'red') + raise Exception() + self.logger.log('>> ' + '-' * 40) + + # define agent and scenario policy + self.agent_policy = AGENT_POLICY_LIST[agent_config['policy_type']](agent_config, logger=self.logger) + self.scenario_policy = SCENARIO_POLICY_LIST[scenario_config['policy_type']](scenario_config, logger=self.logger) + if self.save_video: + assert self.mode == 'eval', "only allow video saving in eval mode" + self.logger.init_video_recorder() + + def _init_world(self, town): + self.logger.log(f">> Initializing carla world: {town}") + self.world = self.client.load_world(town) + settings = self.world.get_settings() + settings.synchronous_mode = True + settings.fixed_delta_seconds = self.fixed_delta_seconds + self.world.apply_settings(settings) + CarlaDataProvider.set_client(self.client) + CarlaDataProvider.set_world(self.world) + CarlaDataProvider.set_traffic_manager_port(self.scenario_config['tm_port']) + self.world.set_weather(carla.WeatherParameters.ClearNoon) + + def _init_renderer(self): + self.logger.log(">> Initializing pygame birdeye renderer") + pygame.init() + flag = pygame.HWSURFACE | pygame.DOUBLEBUF + if not self.render: + flag = flag | pygame.HIDDEN + if self.scenario_category == 'planning': + # [bird-eye view, Lidar, front view] or [bird-eye view, front view] + if self.env_params['disable_lidar']: + window_size = (self.env_params['display_size'] * 2, self.env_params['display_size'] * self.num_scenario) + else: + window_size = (self.env_params['display_size'] * 3, self.env_params['display_size'] * self.num_scenario) + else: + window_size = (self.env_params['display_size'], self.env_params['display_size'] * self.num_scenario) + self.display = pygame.display.set_mode(window_size, flag) + + # initialize the render for generating observation and visualization + pixels_per_meter = self.env_params['display_size'] / self.env_params['obs_range'] + pixels_ahead_vehicle = (self.env_params['obs_range'] / 2 - self.env_params['d_behind']) * pixels_per_meter + self.birdeye_params = { + 'screen_size': [self.env_params['display_size'], self.env_params['display_size']], + 'pixels_per_meter': pixels_per_meter, + 'pixels_ahead_vehicle': pixels_ahead_vehicle, + } + self.birdeye_render = BirdeyeRender(self.world, self.birdeye_params, logger=self.logger) + + def train(self, data_loader, start_episode=0, replay_buffer = None): + # general buffer for both agent and scenario + + for _ in tqdm(range(len(data_loader))): + self.current_episode += 1 + if self.current_episode >= self.train_episode: + return + if self.current_episode < start_episode: + continue + # sample scenarios + sampled_scenario_configs, _ = data_loader.sampler() + # reset the index counter to create endless loader + # data_loader.reset_idx_counter() + + # get static obs and then reset with init action + static_obs = self.env.get_static_obs(sampled_scenario_configs) + self.scenario_policy.load_model(sampled_scenario_configs) + scenario_init_action, additional_dict = self.scenario_policy.get_init_action(static_obs) + try: + obs, infos = self.env.reset(sampled_scenario_configs, scenario_init_action) + except: + continue + replay_buffer.store_init([static_obs, scenario_init_action], additional_dict=additional_dict) + + # get ego vehicle from scenario + self.agent_policy.set_ego_and_route(self.env.get_ego_vehicles(), infos) + + # start loop + episode_reward = [] + while not self.env.all_scenario_done(): + # get action from agent policy and scenario policy (assume using one batch) + ego_actions = self.agent_policy.get_action(obs, infos, deterministic=False) + scenario_actions = self.scenario_policy.get_action(obs, infos, deterministic=False) + + # apply action to env and get obs + next_obs, rewards, dones, infos = self.env.step(ego_actions=ego_actions, scenario_actions=scenario_actions) + replay_buffer.store([ego_actions, scenario_actions, obs, next_obs, rewards, dones], additional_dict=infos) + obs = copy.deepcopy(next_obs) + episode_reward.append(np.mean(rewards)) + + # train off-policy agent or scenario + if self.mode == 'train_agent' and self.agent_policy.type == 'offpolicy': + loss = self.agent_policy.train(replay_buffer) + elif self.mode == 'train_scenario' and self.scenario_policy.type == 'offpolicy': + self.scenario_policy.train(replay_buffer) + + score_function = get_route_scores if self.scenario_category in ['planning', 'scenic'] else get_perception_scores + all_scores = score_function(self.env.running_results) + + # end up environment + self.env.clean_up() + replay_buffer.finish_one_episode() + self.logger.add_training_results('episode', self.current_episode) + self.logger.add_training_results('episode_reward', np.sum(episode_reward)) + for key, value in all_scores.items(): + self.logger.add_training_results(key, value) + + if loss is not None: + critic_loss, actor_loss = loss + self.logger.add_training_results('critic_loss', critic_loss) + self.logger.add_training_results('actor_loss', actor_loss) + else: + critic_loss, actor_loss = 0, 0 + self.logger.add_training_results('critic_loss', critic_loss) + self.logger.add_training_results('actor_loss', actor_loss) + self.logger.log(f">> Episode: {self.current_episode}, #buffer_len: {replay_buffer.buffer_len}, critic: {critic_loss:.3f}, actor: {actor_loss:.3f}") + self.logger.save_training_results() + + # train on-policy agent or scenario + if self.mode == 'train_agent' and self.agent_policy.type == 'onpolicy': + self.agent_policy.train(replay_buffer) + elif self.mode == 'train_scenario' and self.scenario_policy.type in ['init_state', 'onpolicy']: + self.scenario_policy.train(replay_buffer) + + # eval during training + if (self.current_episode+1) % self.eval_in_train_freq == 0: + #self.eval(env, data_loader) + pass + + # save checkpoints + if (self.current_episode+1) % self.save_freq == 0: + if self.mode == 'train_agent': + self.agent_policy.save_model(self.current_episode, replay_buffer) + if self.mode == 'train_scenario': + self.scenario_policy.save_model(self.current_episode) + + def eval(self, data_loader): + num_finished_scenario = 0 + data_loader.reset_idx_counter() + while len(data_loader) > 0: + # sample scenarios + sampled_scenario_configs, num_sampled_scenario = data_loader.sampler() + log_name = f'ROUTE-{sampled_scenario_configs[0].route_id-4}' + num_finished_scenario += num_sampled_scenario + + # reset envs with new config, get init action from scenario policy, and run scenario + static_obs = self.env.get_static_obs(sampled_scenario_configs) + self.scenario_policy.load_model(sampled_scenario_configs) + scenario_init_action, _ = self.scenario_policy.get_init_action(static_obs, deterministic=True) + try: + obs, infos = self.env.reset(sampled_scenario_configs, scenario_init_action) + except: + continue + # get ego vehicle from scenario + self.agent_policy.set_ego_and_route(self.env.get_ego_vehicles(), infos) + + score_list = {s_i: [] for s_i in range(num_sampled_scenario)} + while not self.env.all_scenario_done(): + # get action from agent policy and scenario policy (assume using one batch) + ego_actions = self.agent_policy.get_action(obs, infos, deterministic=True) + scenario_actions = self.scenario_policy.get_action(obs, infos, deterministic=True) + + # apply action to env and get obs + obs, rewards, _, infos = self.env.step(ego_actions=ego_actions, scenario_actions=scenario_actions) + + # save video + if self.save_video: + if self.scenario_category == 'planning': + self.logger.add_frame(pygame.surfarray.array3d(self.display).transpose(1, 0, 2)) + else: + self.logger.add_frame({s_i['scenario_id']: ego_actions[n_i]['annotated_image'] for n_i, s_i in enumerate(infos)}) + + # accumulate scores of corresponding scenario + reward_idx = 0 + for s_i in infos: + score = rewards[reward_idx] if self.scenario_category == 'planning' else 1-infos[reward_idx]['iou_loss'] + score_list[s_i['scenario_id']].append(score) + reward_idx += 1 + + # clean up all things + self.logger.log(">> All scenarios are completed. Clearning up all actors") + self.env.clean_up() + + # save video + if self.save_video: + data_ids = [config.data_id for config in sampled_scenario_configs] + self.logger.save_video(data_ids=data_ids) + + # print score for ranking + self.logger.log(f'[{num_finished_scenario}/{data_loader.num_total_scenario}] Ranking scores for batch scenario:', 'yellow') + for s_i in score_list.keys(): + self.logger.log('\t Env id ' + str(s_i) + ': ' + str(np.mean(score_list[s_i])), 'yellow') + + + # calculate evaluation results + score_function = get_route_scores if self.scenario_category == 'planning' else get_perception_scores + all_running_results = self.logger.add_eval_results(records=self.env.running_results) + all_scores = score_function(all_running_results) + self.logger.add_eval_results(scores=all_scores) + self.logger.print_eval_results() + if len(self.env.running_results) % self.save_freq == 0: + self.logger.save_eval_results(log_name) + self.logger.save_eval_results(log_name) + + def run(self, test_epoch = None): + # get scenario data of different maps + config_by_map = scenario_parse(self.scenario_config, self.logger) + + map_keys = list(config_by_map.keys()) + if self.mode == 'train_agent': + Buffer = RouteReplayBuffer if self.scenario_category == 'planning' else PerceptionReplayBuffer + replay_buffer = Buffer(self.num_scenario, self.mode, self.buffer_capacity) + map_keys = map_keys * 20 + if self.continue_agent_training: + self.logger.load_training_results() + start_episode = self.check_continue_training(self.agent_policy, replay_buffer) + 1 + if start_episode >= self.train_episode: + return + else: + self.clean_cache(self.agent_policy.model_path) + start_episode = -1 + else: + self.agent_policy.load_model(episode=test_epoch) + + for m_i in map_keys: + if self.mode == 'eval': + log_name = f'ROUTE-{config_by_map[m_i][0].route_id-4}' + if self.logger.check_eval_dir(log_name) == len(config_by_map[m_i]): + self.logger.log(f">> This scenario and route have been done.") + continue + elif self.mode == 'train_agent': + if self.current_episode >= self.train_episode - 1: + return + + if self.current_episode + len(config_by_map[m_i]) < start_episode: + self.current_episode += len(config_by_map[m_i]) + continue + + # initialize map and render + try: + self._init_world(m_i) + self._init_renderer() + except: + continue + + # create scenarios within the vectorized wrapper + self.env = VectorWrapper( + self.env_params, + self.scenario_config, + self.world, + self.birdeye_render, + self.display, + self.logger + ) + + # prepare data loader and buffer + data_loader = ScenarioDataLoader(config_by_map[m_i], self.num_scenario, m_i, self.world) + + # run with different modes + + if self.mode == 'eval': +# self.agent_policy.load_model(episode=test_epoch) + # self.scenario_policy.load_model() + self.agent_policy.set_mode('eval') + self.scenario_policy.set_mode('eval') + self.eval(data_loader) + elif self.mode == 'train_agent': +# start_episode = self.check_continue_training(self.agent_policy) +# self.scenario_policy.load_model() + self.agent_policy.set_mode('train') + self.scenario_policy.set_mode('eval') + self.train(data_loader, start_episode, replay_buffer) + elif self.mode == 'train_scenario': + start_episode = self.check_continue_training(self.scenario_policy) + self.agent_policy.load_model() + self.agent_policy.set_mode('eval') + self.scenario_policy.set_mode('train') + self.train(data_loader, start_episode) + else: + raise NotImplementedError(f"Unsupported mode: {self.mode}.") + + def check_continue_training(self, policy, replay_buffer): + # load previous checkpoint + policy.load_model(replay_buffer = replay_buffer) + if policy.continue_episode == 0: + start_episode = 0 + self.logger.log('>> Previous checkpoint not found. Training from scratch.') + else: + start_episode = policy.continue_episode + self.logger.log(f'>> Continue training from previous checkpoint, epoch: {start_episode}.') + return start_episode + + def clean_cache(self, path): + # Get a list of all files in directory + all_files = glob.glob(os.path.join(path, '*')) + + # Specify the file to keep + file_to_keep = os.path.join(path, 'model.sac.-001.torch') + + # Remove all files except the one to keep + for file in all_files: + if file != file_to_keep: + os.remove(file) + + def close(self): + pygame.quit() + if self.env: + self.env.clean_up() diff --git a/scene/safebench/scenic_runner.py b/scene/safebench/scenic_runner.py new file mode 100644 index 00000000..096c1c72 --- /dev/null +++ b/scene/safebench/scenic_runner.py @@ -0,0 +1,552 @@ +''' +Date: 2023-01-31 22:23:17 +LastEditTime: 2023-03-07 12:28:17 +Description: + Copyright (c) 2022-2023 Safebench Team + + This work is licensed under the terms of the MIT license. + For a copy, see +''' + +import copy +import os +import json +import glob +import random + +import numpy as np +import carla +import pygame +from tqdm import tqdm + +from safebench.gym_carla.env_wrapper import VectorWrapper +from safebench.gym_carla.envs.render import BirdeyeRender +from safebench.gym_carla.replay_buffer import RouteReplayBuffer, PerceptionReplayBuffer + +from safebench.agent import AGENT_POLICY_LIST +from safebench.scenario import SCENARIO_POLICY_LIST + +from safebench.scenario.scenario_manager.carla_data_provider import CarlaDataProvider +from safebench.scenario.scenario_data_loader import ScenarioDataLoader, ScenicDataLoader +from safebench.scenario.tools.scenario_utils import scenario_parse, scenic_parse + +from safebench.util.logger import Logger, setup_logger_kwargs +from safebench.util.metric_util import get_route_scores, get_perception_scores +from safebench.util.scenic_utils import ScenicSimulator + +class ScenicRunner: + def __init__(self, agent_config, scenario_config): + self.scenario_config = scenario_config + self.agent_config = agent_config + + self.seed = scenario_config['seed'] + self.exp_name = scenario_config['exp_name'] + self.output_dir = scenario_config['output_dir'] + self.mode = scenario_config['mode'] + self.save_video = scenario_config['save_video'] + + self.render = scenario_config['render'] + self.num_scenario = scenario_config['num_scenario'] + self.fixed_delta_seconds = scenario_config['fixed_delta_seconds'] + self.scenario_category = scenario_config['scenario_category'] + + # continue training flag + self.continue_agent_training = scenario_config['continue_agent_training'] + self.continue_scenario_training = scenario_config['continue_scenario_training'] + + # apply settings to carla + self.client = carla.Client('localhost', scenario_config['port']) + self.client.set_timeout(10.0) + self.world = None + self.env = None + + self.env_params = { + 'auto_ego': scenario_config['auto_ego'], + 'obs_type': agent_config['obs_type'], + 'scenario_category': self.scenario_category, + 'ROOT_DIR': scenario_config['ROOT_DIR'], + 'warm_up_steps': 9, # number of ticks after spawning the vehicles + 'disable_lidar': True, # show bird-eye view lidar or not + 'display_size': 128, # screen size of one bird-eye view window + 'obs_range': 32, # observation range (meter) + 'd_behind': 12, # distance behind the ego vehicle (meter) + 'max_past_step': 1, # the number of past steps to draw + 'discrete': False, # whether to use discrete control space + 'discrete_acc': [-3.0, 0.0, 3.0], # discrete value of accelerations + 'discrete_steer': [-0.2, 0.0, 0.2], # discrete value of steering angles + 'continuous_accel_range': [-3.0, 3.0], # continuous acceleration range + 'continuous_steer_range': [-0.3, 0.3], # continuous steering angle range + 'max_episode_step': scenario_config['max_episode_step'], # maximum timesteps per episode + 'max_waypt': 12, # maximum number of waypoints + 'lidar_bin': 0.125, # bin size of lidar sensor (meter) + 'out_lane_thres': 4, # threshold for out of lane (meter) + 'desired_speed': 8, # desired speed (m/s) + 'image_sz': 1024, # TODO: move to config of od scenario + } + + + # pass config from scenario to agent + agent_config['mode'] = scenario_config['mode'] + agent_config['ego_action_dim'] = scenario_config['ego_action_dim'] + agent_config['ego_state_dim'] = scenario_config['ego_state_dim'] + agent_config['ego_action_limit'] = scenario_config['ego_action_limit'] + + # define logger + logger_kwargs = setup_logger_kwargs( + self.exp_name, + self.output_dir, + self.seed, + agent=agent_config['policy_type'], + scenario=scenario_config['policy_type'], + scenario_category=self.scenario_category + ) + self.logger = Logger(**logger_kwargs) + + # prepare parameters + if self.mode == 'train_agent': + self.buffer_capacity = agent_config['buffer_capacity'] + self.eval_in_train_freq = agent_config['eval_in_train_freq'] + self.save_freq = agent_config['save_freq'] + self.train_episode = agent_config['train_episode'] + self.current_episode = -1 + self.logger.save_config(agent_config) + self.logger.create_training_dir() + elif self.mode == 'train_scenario': + self.save_freq = agent_config['save_freq'] + self.logger.create_eval_dir(load_existing_results=False) + elif self.mode == 'eval': + self.save_freq = agent_config['save_freq'] + self.logger.log('>> Evaluation Mode, skip config saving', 'yellow') + self.logger.create_eval_dir(load_existing_results=False) + else: + raise NotImplementedError(f"Unsupported mode: {self.mode}.") + + # define agent and scenario + self.logger.log('>> Agent Policy: ' + agent_config['policy_type']) + self.logger.log('>> Scenario Policy: ' + scenario_config['policy_type']) + + if self.scenario_config['auto_ego']: + self.logger.log('>> Using auto-polit for ego vehicle, action of policy will be ignored', 'yellow') + if scenario_config['policy_type'] == 'ordinary' and self.mode != 'train_agent': + self.logger.log('>> Ordinary scenario can only be used in agent training', 'red') + raise Exception() + self.logger.log('>> ' + '-' * 40) + + # define agent and scenario policy + self.agent_policy = AGENT_POLICY_LIST[agent_config['policy_type']](agent_config, logger=self.logger) + self.scenario_policy = SCENARIO_POLICY_LIST[scenario_config['policy_type']](scenario_config, logger=self.logger) + if self.save_video: + assert self.mode == 'eval', "only allow video saving in eval mode" + self.logger.init_video_recorder() + + def _init_world(self): + self.logger.log(">> Initializing carla world") + self.world = self.client.get_world() + settings = self.world.get_settings() + settings.synchronous_mode = True + settings.fixed_delta_seconds = self.fixed_delta_seconds + self.world.apply_settings(settings) + CarlaDataProvider.set_client(self.client) + CarlaDataProvider.set_world(self.world) + CarlaDataProvider.set_traffic_manager_port(self.scenario_config['tm_port']) + + def _init_scenic(self, config): + self.logger.log(f">> Initializing scenic simulator: {config.scenic_file}") + self.scenic = ScenicSimulator(config.scenic_file, config.extra_params) + + def _init_renderer(self): + self.logger.log(">> Initializing pygame birdeye renderer") + pygame.init() + flag = pygame.HWSURFACE | pygame.DOUBLEBUF + if not self.render: + flag = flag | pygame.HIDDEN + if self.scenario_category in ['planning', 'scenic']: + # [bird-eye view, Lidar, front view] or [bird-eye view, front view] + if self.env_params['disable_lidar']: + window_size = (self.env_params['display_size'] * 2, self.env_params['display_size'] * self.num_scenario) + else: + window_size = (self.env_params['display_size'] * 3, self.env_params['display_size'] * self.num_scenario) + else: + window_size = (self.env_params['display_size'], self.env_params['display_size'] * self.num_scenario) + self.display = pygame.display.set_mode(window_size, flag) + + # initialize the render for generating observation and visualization + pixels_per_meter = self.env_params['display_size'] / self.env_params['obs_range'] + pixels_ahead_vehicle = (self.env_params['obs_range'] / 2 - self.env_params['d_behind']) * pixels_per_meter + self.birdeye_params = { + 'screen_size': [self.env_params['display_size'], self.env_params['display_size']], + 'pixels_per_meter': pixels_per_meter, + 'pixels_ahead_vehicle': pixels_ahead_vehicle, + } + self.birdeye_render = BirdeyeRender(self.world, self.birdeye_params, logger=self.logger) + + def run_scenes(self, scenes): + self.logger.log(f">> Begin to run the scene...") + ## currently there is only one scene in this list ## + for scene in scenes: + if self.scenic.setSimulation(scene): + self.scenic.update_behavior = self.scenic.runSimulation() + next(self.scenic.update_behavior) + + def train(self, data_loader, start_episode=0, replay_buffer = None): + # general buffer for both agent and scenario + + for _ in tqdm(range(len(data_loader))): + self.current_episode += 1 + if self.current_episode >= self.train_episode: + return + if self.current_episode < start_episode: + continue + # sample scenarios + sampled_scenario_configs, _ = data_loader.sampler() + # reset the index counter to create endless loader + # data_loader.reset_idx_counter() + + scenes = [config.scene for config in sampled_scenario_configs] + # begin to run the scene + self.run_scenes(scenes) + + # get static obs and then reset with init action + static_obs = self.env.get_static_obs(sampled_scenario_configs) + self.scenario_policy.load_model(sampled_scenario_configs) + scenario_init_action, additional_dict = self.scenario_policy.get_init_action(static_obs) + obs, infos = self.env.reset(sampled_scenario_configs, scenario_init_action) + replay_buffer.store_init([static_obs, scenario_init_action], additional_dict=additional_dict) + + # get ego vehicle from scenario + self.agent_policy.set_ego_and_route(self.env.get_ego_vehicles(), infos) + + # start loop + episode_reward = [] + while not self.env.all_scenario_done(): + # get action from agent policy and scenario policy (assume using one batch) + ego_actions = self.agent_policy.get_action(obs, infos, deterministic=False) + scenario_actions = self.scenario_policy.get_action(obs, infos, deterministic=False) + + # apply action to env and get obs + next_obs, rewards, dones, infos = self.env.step(ego_actions=ego_actions, scenario_actions=scenario_actions) + replay_buffer.store([ego_actions, scenario_actions, obs, next_obs, rewards, dones], additional_dict=infos) + obs = copy.deepcopy(next_obs) + episode_reward.append(np.mean(rewards)) + + # train off-policy agent or scenario + if self.mode == 'train_agent' and self.agent_policy.type == 'offpolicy': + loss = self.agent_policy.train(replay_buffer) + elif self.mode == 'train_scenario' and self.scenario_policy.type == 'offpolicy': + self.scenario_policy.train(replay_buffer) + + score_function = get_route_scores if self.scenario_category in ['planning', 'scenic'] else get_perception_scores + all_scores = score_function(self.env.running_results) + + # end up environment + self.env.clean_up() + replay_buffer.finish_one_episode() + self.logger.add_training_results('episode', self.current_episode) + self.logger.add_training_results('episode_reward', np.sum(episode_reward)) + for key, value in all_scores.items(): + self.logger.add_training_results(key, value) + if loss is not None: + critic_loss, actor_loss = loss + self.logger.add_training_results('critic_loss', critic_loss) + self.logger.add_training_results('actor_loss', actor_loss) + else: + critic_loss, actor_loss = 0, 0 + self.logger.add_training_results('critic_loss', critic_loss) + self.logger.add_training_results('actor_loss', actor_loss) + self.logger.log(f">> Episode: {self.current_episode}, #buffer_len: {replay_buffer.buffer_len}, critic: {critic_loss:.3f}, actor: {actor_loss:.3f}") + self.logger.save_training_results() + + # train on-policy agent or scenario + if self.mode == 'train_agent' and self.agent_policy.type == 'onpolicy': + self.agent_policy.train(replay_buffer) + elif self.mode == 'train_scenario' and self.scenario_policy.type in ['init_state', 'onpolicy']: + self.scenario_policy.train(replay_buffer) + + # eval during training + if (self.current_episode+1) % self.eval_in_train_freq == 0: + #self.eval(env, data_loader) + pass + + # save checkpoints + if (self.current_episode+1) % self.save_freq == 0: + if self.mode == 'train_agent': + self.agent_policy.save_model(self.current_episode, replay_buffer) + if self.mode == 'train_scenario': + self.scenario_policy.save_model(self.current_episode) + + self.scenic.destroy() + + def eval(self, data_loader, select = False): + num_finished_scenario = 0 + data_loader.reset_idx_counter() + # recording the score and the id of corresponding selected scenes + map_id_score = {} + behavior_name = data_loader.behavior + route_id = data_loader.route_id + opt_step = data_loader.opt_step + opt_time = 0 + + if route_id is None: + log_name = f'OPT_{behavior_name}' + else: + log_name = f'OPT_{behavior_name}_ROUTE-{route_id}' + + if select: + self.scene_map[log_name] = {} + self.scene_map[log_name][f'opt_time_{opt_time}'] = self.scenic.save_params() + + while len(data_loader) > 0: + # sample scenarios + sampled_scenario_configs, num_sampled_scenario = data_loader.sampler() + num_finished_scenario += num_sampled_scenario + assert num_sampled_scenario == 1, 'scenic can only run one scene at one time' + + scenes = [config.scene for config in sampled_scenario_configs] + # begin to run the scene + self.run_scenes(scenes) + + # reset envs with new config, get init action from scenario policy, and run scenario + static_obs = self.env.get_static_obs(sampled_scenario_configs) + self.scenario_policy.load_model(sampled_scenario_configs) + scenario_init_action, _ = self.scenario_policy.get_init_action(static_obs, deterministic=True) + obs, infos = self.env.reset(sampled_scenario_configs, scenario_init_action) + + # get ego vehicle from scenario + self.agent_policy.set_ego_and_route(self.env.get_ego_vehicles(), infos) + + score_list = {s_i: [] for s_i in range(num_sampled_scenario)} + while not self.env.all_scenario_done(): + # get action from agent policy and scenario policy (assume using one batch) + ego_actions = self.agent_policy.get_action(obs, infos, deterministic=True) + scenario_actions = self.scenario_policy.get_action(obs, infos, deterministic=True) + + # apply action to env and get obs + obs, rewards, _, infos = self.env.step(ego_actions=ego_actions, scenario_actions=scenario_actions) + + # save video + if self.save_video: + if self.scenario_category in ['planning', 'scenic']: + self.logger.add_frame(pygame.surfarray.array3d(self.display).transpose(1, 0, 2)) + else: + self.logger.add_frame({s_i['scenario_id']: ego_actions[n_i]['annotated_image'] for n_i, s_i in enumerate(infos)}) + + # accumulate scores of corresponding scenario + reward_idx = 0 + for s_i in infos: + score = rewards[reward_idx] if self.scenario_category in ['planning', 'scenic'] else 1-infos[reward_idx]['iou_loss'] + score_list[s_i['scenario_id']].append(score) + reward_idx += 1 + + # clean up all things + self.logger.log(">> All scenarios are completed. Clearning up all actors") + self.env.clean_up() + + # save video + if self.save_video: + data_ids = [config.data_id for config in sampled_scenario_configs] + self.logger.save_video(data_ids=data_ids, log_name= log_name) + + # print score for ranking + self.logger.log(f'[{num_finished_scenario}/{data_loader.num_total_scenario}] Ranking scores for batch scenario:', color='yellow') + for s_i in score_list.keys(): + self.logger.log('\t Env id ' + str(s_i) + ': ' + str(np.mean(score_list[s_i])), color='yellow') + + # calculate evaluation results + score_function = get_route_scores if self.scenario_category in ['planning', 'scenic'] else get_perception_scores + all_running_results = self.logger.add_eval_results(records=self.env.running_results) + all_scores = score_function(all_running_results) + self.logger.add_eval_results(scores=all_scores) + self.logger.print_eval_results() + if len(self.env.running_results) % self.save_freq == 0: + self.logger.save_eval_results(log_name) + + if infos[0]['collision']: + self.scenic.record_params() + if select and (num_finished_scenario % opt_step == 0): + opt_time += 1 + self.scenic.update_params() + self.scene_map[log_name][f'opt_time_{opt_time}'] = self.scenic.save_params() + data_loader.train_scene(opt_time) + + self.logger.save_eval_results(log_name) + + if select: + self.scene_map[log_name]['select_id'] = self.select_adv_scene(self.logger.eval_records, score_function, data_loader.select_num) + self.dump_scene_map(sampled_scenario_configs[0].scenario_id) + + self.logger.clear() + self.scenic.destroy() + + def select_adv_scene(self, results, score_function, select_num): + # define your own selection mechanism here + map_id_score_collision = {} + map_id_score_non_collision = {} + for i in results.keys(): + score = score_function({i:results[i]}) + if score['collision_rate'] == 1: + map_id_score_collision[i] = score['final_score'] + else: + map_id_score_non_collision[i] = score['final_score'] + + # Sort the collision scenes by their scores + collision_scenes_sorted = sorted(map_id_score_collision.items(), key=lambda x: x[1]) + + # Get the number of scenes to select from the collision cases + num_collision_selected = min(select_num, len(collision_scenes_sorted)) + + # Select the lowest scored scenes with collision + selected_scene_id = [scene[0] for scene in collision_scenes_sorted[:num_collision_selected]] + + # If not enough collision scenes, select remaining scenes + num_non_collision_selected = select_num - num_collision_selected + if num_non_collision_selected > 0: + # Sort the non-collision scenes by their scores + non_collision_scenes_sorted = sorted(map_id_score_non_collision.items(), key=lambda x: x[1]) + # Select the lowest scored scenes from the non-collision cases + selected_scene_id.extend([scene[0] for scene in non_collision_scenes_sorted[:num_non_collision_selected]]) + return sorted(selected_scene_id) + + def run(self, test_epoch = None): + # get scenario data of different maps + config_list = scenic_parse(self.scenario_config, self.logger) + + + ### load rl model ## + if self.mode == 'train_scenario': + ## we only need the pretrained surrogate model here ## + pass + elif self.mode == 'train_agent': + ## initlize buffer ### + Buffer = RouteReplayBuffer if self.scenario_category in ['scenic', 'planning'] else PerceptionReplayBuffer + replay_buffer = Buffer(self.num_scenario, self.mode, self.buffer_capacity) + + ### repeat the training, 20 is just a random placeholder + config_list = config_list * 20 + + ### check if resume ### + if self.continue_agent_training: + self.logger.load_training_results() + start_episode = self.check_continue_training(self.agent_policy, replay_buffer) + 1 + if start_episode >= self.train_episode: + return + else: + self.clean_cache(self.agent_policy.model_path) + start_episode = -1 + + elif self.mode == 'eval': + ### load trained model for evaluation ### + if test_epoch: + self.agent_policy.load_model(episode=test_epoch) + + last_town = None + for config in config_list: + + ## set log name ## + if config.route_id is None: + log_name = f'OPT_{config.behavior}' + else: + log_name = f'OPT_{config.behavior}_ROUTE-{config.route_id}' + + ## check if all done ## + if self.mode == 'eval': + if self.logger.check_eval_dir(log_name) == config.select_num: + self.logger.log(f">> This scenario and route have been done.") + continue + elif self.mode == 'train_agent': + if self.current_episode >= self.train_episode - 1: + return + + if self.current_episode + config.select_num < start_episode: + self.current_episode += config.select_num + continue + + # initialize scenic + self._init_scenic(config) + + # initialize map and render + if last_town != config.extra_params['town']: + self._init_world() + self._init_renderer() + last_town = config.extra_params['town'] + self.world.scenic = self.scenic + + # create scenarios within the vectorized wrapper + self.env = VectorWrapper( + self.env_params, + self.scenario_config, + self.world, + self.birdeye_render, + self.display, + self.logger + ) + + # prepare data loader and buffer + data_loader = ScenicDataLoader(self.scenic, config, self.num_scenario) + # run with different modes + + if self.mode == 'train_scenario': + ### select hard scenic scenario config on the surrogate model ### + self.scene_map = self.load_scene_map(config.scenario_id) + self.agent_policy.set_mode('eval') + self.scenario_policy.set_mode('eval') + self.eval(data_loader, select = True) + elif self.mode == 'train_agent': + ### train the surrogate model on the selected hard scenrios ### + self.agent_policy.set_mode('train') + self.scenario_policy.set_mode('eval') + self.train(data_loader, start_episode, replay_buffer) + elif self.mode == 'eval': + ### evaluate the trained agent on different test models ### + self.agent_policy.set_mode('eval') + self.scenario_policy.set_mode('eval') + self.eval(data_loader) + else: + raise NotImplementedError(f"Unsupported mode: {self.mode}.") + + def check_continue_training(self, policy, replay_buffer): + # load previous checkpoint + policy.load_model(replay_buffer = replay_buffer) + if policy.continue_episode == 0: + start_episode = 0 + self.logger.log('>> Previous checkpoint not found. Training from scratch.') + else: + start_episode = policy.continue_episode + self.logger.log(f'>> Continue training from previous checkpoint, epoch: {start_episode}.') + return start_episode + + def dump_scene_map(self, scenario_id): + # load previous checkpoint + scenic_dir = os.path.join(self.scenario_config['scenic_dir'], f'scenario_{scenario_id}') + f = open(os.path.join(scenic_dir, f"{scenic_dir.split('/')[-1]}.json"), 'w') + json_dumps_str = json.dumps(self.scene_map, indent=4) + print(json_dumps_str, file=f) + f.close() + + def load_scene_map(self, scenario_id): + # load previous checkpoint + scenic_dir = os.path.join(self.scenario_config['scenic_dir'], f'scenario_{scenario_id}') + try: + with open(os.path.join(scenic_dir, f"{scenic_dir.split('/')[-1]}.json"), 'r') as f: + data = json.loads(f.read()) + except: + data = {} + return data + + def clean_cache(self, path): + # Get a list of all files in directory + all_files = glob.glob(os.path.join(path, '*')) + + # Specify the file to keep + file_to_keep = os.path.join(path, 'model.sac.-001.torch') + + # Remove all files except the one to keep + for file in all_files: + if file != file_to_keep: + os.remove(file) + + def close(self): + pygame.quit() # close pygame renderer + if self.env: + self.env.clean_up() + + diff --git a/scene/safebench/scenic_runner_dynamic.py b/scene/safebench/scenic_runner_dynamic.py new file mode 100644 index 00000000..43773e50 --- /dev/null +++ b/scene/safebench/scenic_runner_dynamic.py @@ -0,0 +1,540 @@ +''' +Date: 2023-01-31 22:23:17 +LastEditTime: 2023-03-07 12:28:17 +Description: + Copyright (c) 2022-2023 Safebench Team + + This work is licensed under the terms of the MIT license. + For a copy, see +''' + +import copy +import os +import json +import glob +import random + +import numpy as np +import carla +import pygame +from tqdm import tqdm + +from safebench.gym_carla.env_wrapper import VectorWrapper +from safebench.gym_carla.envs.render import BirdeyeRender +from safebench.gym_carla.replay_buffer import RouteReplayBuffer, PerceptionReplayBuffer + +from safebench.agent import AGENT_POLICY_LIST +from safebench.scenario import SCENARIO_POLICY_LIST + +from safebench.scenario.scenario_manager.carla_data_provider import CarlaDataProvider +from safebench.scenario.scenario_data_loader import ScenarioDataLoader, ScenicDataLoader +from safebench.scenario.tools.scenario_utils import scenario_parse, dynamic_scenic_parse + +from safebench.util.logger import Logger, setup_logger_kwargs +from safebench.util.metric_util import get_route_scores, get_perception_scores +from safebench.util.scenic_utils import ScenicSimulator + +class ScenicRunner: + def __init__(self, agent_config, scenario_config): + self.scenario_config = scenario_config + self.agent_config = agent_config + + self.seed = scenario_config['seed'] + self.exp_name = scenario_config['exp_name'] + self.output_dir = scenario_config['output_dir'] + self.mode = scenario_config['mode'] + self.save_video = scenario_config['save_video'] + + self.render = scenario_config['render'] + self.num_scenario = scenario_config['num_scenario'] + self.fixed_delta_seconds = scenario_config['fixed_delta_seconds'] + self.scenario_category = scenario_config['scenario_category'] + + # continue training flag + self.continue_agent_training = scenario_config['continue_agent_training'] + self.continue_scenario_training = scenario_config['continue_scenario_training'] + + # apply settings to carla + self.client = carla.Client('localhost', scenario_config['port']) + self.client.set_timeout(10.0) + self.world = None + self.env = None + + self.env_params = { + 'auto_ego': scenario_config['auto_ego'], + 'obs_type': agent_config['obs_type'], + 'scenario_category': self.scenario_category, + 'ROOT_DIR': scenario_config['ROOT_DIR'], + 'warm_up_steps': 9, # number of ticks after spawning the vehicles + 'disable_lidar': True, # show bird-eye view lidar or not + 'display_size': 128, # screen size of one bird-eye view window + 'obs_range': 32, # observation range (meter) + 'd_behind': 12, # distance behind the ego vehicle (meter) + 'max_past_step': 1, # the number of past steps to draw + 'discrete': False, # whether to use discrete control space + 'discrete_acc': [-3.0, 0.0, 3.0], # discrete value of accelerations + 'discrete_steer': [-0.2, 0.0, 0.2], # discrete value of steering angles + 'continuous_accel_range': [-3.0, 3.0], # continuous acceleration range + 'continuous_steer_range': [-0.3, 0.3], # continuous steering angle range + 'max_episode_step': scenario_config['max_episode_step'], # maximum timesteps per episode + 'max_waypt': 12, # maximum number of waypoints + 'lidar_bin': 0.125, # bin size of lidar sensor (meter) + 'out_lane_thres': 4, # threshold for out of lane (meter) + 'desired_speed': 8, # desired speed (m/s) + 'image_sz': 1024, # TODO: move to config of od scenario + } + + + # pass config from scenario to agent + agent_config['mode'] = scenario_config['mode'] + agent_config['ego_action_dim'] = scenario_config['ego_action_dim'] + agent_config['ego_state_dim'] = scenario_config['ego_state_dim'] + agent_config['ego_action_limit'] = scenario_config['ego_action_limit'] + + # define logger + logger_kwargs = setup_logger_kwargs( + self.exp_name, + self.output_dir, + self.seed, + agent=agent_config['policy_type'], + scenario=scenario_config['policy_type'], + scenario_category=self.scenario_category + ) + self.logger = Logger(**logger_kwargs) + + # prepare parameters + if self.mode == 'train_agent': + self.buffer_capacity = agent_config['buffer_capacity'] + self.eval_in_train_freq = agent_config['eval_in_train_freq'] + self.save_freq = agent_config['save_freq'] + self.train_episode = agent_config['train_episode'] + self.current_episode = -1 + self.logger.save_config(agent_config) + self.logger.create_training_dir() + elif self.mode == 'train_scenario': + self.save_freq = agent_config['save_freq'] + self.logger.create_eval_dir(load_existing_results=False) + elif self.mode == 'eval': + self.save_freq = agent_config['save_freq'] + self.logger.log('>> Evaluation Mode, skip config saving', 'yellow') + self.logger.create_eval_dir(load_existing_results=False) + else: + raise NotImplementedError(f"Unsupported mode: {self.mode}.") + + # define agent and scenario + self.logger.log('>> Agent Policy: ' + agent_config['policy_type']) + self.logger.log('>> Scenario Policy: ' + scenario_config['policy_type']) + + if self.scenario_config['auto_ego']: + self.logger.log('>> Using auto-polit for ego vehicle, action of policy will be ignored', 'yellow') + if scenario_config['policy_type'] == 'ordinary' and self.mode != 'train_agent': + self.logger.log('>> Ordinary scenario can only be used in agent training', 'red') + raise Exception() + self.logger.log('>> ' + '-' * 40) + + # define agent and scenario policy + self.agent_policy = AGENT_POLICY_LIST[agent_config['policy_type']](agent_config, logger=self.logger) + self.scenario_policy = SCENARIO_POLICY_LIST[scenario_config['policy_type']](scenario_config, logger=self.logger) + if self.save_video: + assert self.mode == 'eval', "only allow video saving in eval mode" + self.logger.init_video_recorder() + + def _init_world(self): + self.logger.log(">> Initializing carla world") + self.world = self.client.get_world() + settings = self.world.get_settings() + settings.synchronous_mode = True + settings.fixed_delta_seconds = self.fixed_delta_seconds + self.world.apply_settings(settings) + CarlaDataProvider.set_client(self.client) + CarlaDataProvider.set_world(self.world) + CarlaDataProvider.set_traffic_manager_port(self.scenario_config['tm_port']) + + def _init_scenic(self, config): + self.logger.log(f">> Initializing scenic simulator: {config.scenic_file}") + self.scenic = ScenicSimulator(config.scenic_file, config.extra_params) + + def _init_renderer(self): + self.logger.log(">> Initializing pygame birdeye renderer") + pygame.init() + flag = pygame.HWSURFACE | pygame.DOUBLEBUF + if not self.render: + flag = flag | pygame.HIDDEN + if self.scenario_category in ['planning', 'scenic']: + # [bird-eye view, Lidar, front view] or [bird-eye view, front view] + if self.env_params['disable_lidar']: + window_size = (self.env_params['display_size'] * 2, self.env_params['display_size'] * self.num_scenario) + else: + window_size = (self.env_params['display_size'] * 3, self.env_params['display_size'] * self.num_scenario) + else: + window_size = (self.env_params['display_size'], self.env_params['display_size'] * self.num_scenario) + self.display = pygame.display.set_mode(window_size, flag) + + # initialize the render for generating observation and visualization + pixels_per_meter = self.env_params['display_size'] / self.env_params['obs_range'] + pixels_ahead_vehicle = (self.env_params['obs_range'] / 2 - self.env_params['d_behind']) * pixels_per_meter + self.birdeye_params = { + 'screen_size': [self.env_params['display_size'], self.env_params['display_size']], + 'pixels_per_meter': pixels_per_meter, + 'pixels_ahead_vehicle': pixels_ahead_vehicle, + } + self.birdeye_render = BirdeyeRender(self.world, self.birdeye_params, logger=self.logger) + + def run_scenes(self, scenes): + self.logger.log(f">> Begin to run the scene...") + ## currently there is only one scene in this list ## + for scene in scenes: + if self.scenic.setSimulation(scene): + self.scenic.update_behavior = self.scenic.runSimulation() + next(self.scenic.update_behavior) + + def train(self, data_loader, start_episode=0, replay_buffer = None): + # general buffer for both agent and scenario + + for _ in tqdm(range(len(data_loader))): + self.current_episode += 1 + if self.current_episode >= self.train_episode: + return + if self.current_episode < start_episode: + continue + # sample scenarios + sampled_scenario_configs, _ = data_loader.sampler() + # reset the index counter to create endless loader + # data_loader.reset_idx_counter() + + scenes = [config.scene for config in sampled_scenario_configs] + # begin to run the scene + self.run_scenes(scenes) + + # get static obs and then reset with init action + static_obs = self.env.get_static_obs(sampled_scenario_configs) + self.scenario_policy.load_model(sampled_scenario_configs) + scenario_init_action, additional_dict = self.scenario_policy.get_init_action(static_obs) + obs, infos = self.env.reset(sampled_scenario_configs, scenario_init_action) + replay_buffer.store_init([static_obs, scenario_init_action], additional_dict=additional_dict) + + # get ego vehicle from scenario + self.agent_policy.set_ego_and_route(self.env.get_ego_vehicles(), infos) + + # start loop + episode_reward = [] + while not self.env.all_scenario_done(): + # get action from agent policy and scenario policy (assume using one batch) + ego_actions = self.agent_policy.get_action(obs, infos, deterministic=False) + scenario_actions = self.scenario_policy.get_action(obs, infos, deterministic=False) + + # apply action to env and get obs + next_obs, rewards, dones, infos = self.env.step(ego_actions=ego_actions, scenario_actions=scenario_actions) + replay_buffer.store([ego_actions, scenario_actions, obs, next_obs, rewards, dones], additional_dict=infos) + obs = copy.deepcopy(next_obs) + episode_reward.append(np.mean(rewards)) + + # train off-policy agent or scenario + if self.mode == 'train_agent' and self.agent_policy.type == 'offpolicy': + loss = self.agent_policy.train(replay_buffer) + elif self.mode == 'train_scenario' and self.scenario_policy.type == 'offpolicy': + self.scenario_policy.train(replay_buffer) + + score_function = get_route_scores if self.scenario_category in ['planning', 'scenic'] else get_perception_scores + all_scores = score_function(self.env.running_results) + + # end up environment + self.env.clean_up() + replay_buffer.finish_one_episode() + self.logger.add_training_results('episode', self.current_episode) + self.logger.add_training_results('episode_reward', np.sum(episode_reward)) + for key, value in all_scores.items(): + self.logger.add_training_results(key, value) + if loss is not None: + critic_loss, actor_loss = loss + self.logger.add_training_results('critic_loss', critic_loss) + self.logger.add_training_results('actor_loss', actor_loss) + else: + critic_loss, actor_loss = 0, 0 + self.logger.add_training_results('critic_loss', critic_loss) + self.logger.add_training_results('actor_loss', actor_loss) + self.logger.log(f">> Episode: {self.current_episode}, #buffer_len: {replay_buffer.buffer_len}, critic: {critic_loss:.3f}, actor: {actor_loss:.3f}") + self.logger.save_training_results() + + # train on-policy agent or scenario + if self.mode == 'train_agent' and self.agent_policy.type == 'onpolicy': + self.agent_policy.train(replay_buffer) + elif self.mode == 'train_scenario' and self.scenario_policy.type in ['init_state', 'onpolicy']: + self.scenario_policy.train(replay_buffer) + + # eval during training + if (self.current_episode+1) % self.eval_in_train_freq == 0: + #self.eval(env, data_loader) + pass + + # save checkpoints + if (self.current_episode+1) % self.save_freq == 0: + if self.mode == 'train_agent': + self.agent_policy.save_model(self.current_episode, replay_buffer) + if self.mode == 'train_scenario': + self.scenario_policy.save_model(self.current_episode) + + self.scenic.destroy() + + def eval(self, data_loader, select = False): + num_finished_scenario = 0 + data_loader.reset_idx_counter() + # recording the score and the id of corresponding selected scenes + map_id_score = {} + behavior_name = data_loader.behavior + opt_step = data_loader.opt_step + opt_time = 0 + + log_name = f'OPT_{behavior_name}' + + if select: + self.scene_map[log_name] = {} + self.scene_map[log_name][f'opt_time_{opt_time}'] = self.scenic.save_params() + + while len(data_loader) > 0: + # sample scenarios + sampled_scenario_configs, num_sampled_scenario = data_loader.sampler() + num_finished_scenario += num_sampled_scenario + assert num_sampled_scenario == 1, 'scenic can only run one scene at one time' + + scenes = [config.scene for config in sampled_scenario_configs] + # begin to run the scene + self.run_scenes(scenes) + + # reset envs with new config, get init action from scenario policy, and run scenario + static_obs = self.env.get_static_obs(sampled_scenario_configs) + self.scenario_policy.load_model(sampled_scenario_configs) + scenario_init_action, _ = self.scenario_policy.get_init_action(static_obs, deterministic=True) + obs, infos = self.env.reset(sampled_scenario_configs, scenario_init_action) + + # get ego vehicle from scenario + self.agent_policy.set_ego_and_route(self.env.get_ego_vehicles(), infos) + + score_list = {s_i: [] for s_i in range(num_sampled_scenario)} + while not self.env.all_scenario_done(): + # get action from agent policy and scenario policy (assume using one batch) + ego_actions = self.agent_policy.get_action(obs, infos, deterministic=True) + scenario_actions = self.scenario_policy.get_action(obs, infos, deterministic=True) + + # apply action to env and get obs + obs, rewards, _, infos = self.env.step(ego_actions=ego_actions, scenario_actions=scenario_actions) + + # save video + if self.save_video: + self.logger.add_frame(pygame.surfarray.array3d(self.display).transpose(1, 0, 2)) + + # accumulate scores of corresponding scenario + reward_idx = 0 + for s_i in infos: + score = rewards[reward_idx] if self.scenario_category in ['planning', 'scenic'] else 1-infos[reward_idx]['iou_loss'] + score_list[s_i['scenario_id']].append(score) + reward_idx += 1 + + # clean up all things + self.logger.log(">> All scenarios are completed. Clearning up all actors") + self.env.clean_up() + + # save video + if self.save_video: + data_ids = [config.data_id for config in sampled_scenario_configs] + self.logger.save_video(data_ids=data_ids, log_name= log_name) + + # print score for ranking + self.logger.log(f'[{num_finished_scenario}/{data_loader.num_total_scenario}] Ranking scores for batch scenario:', color='yellow') + for s_i in score_list.keys(): + self.logger.log('\t Env id ' + str(s_i) + ': ' + str(np.mean(score_list[s_i])), color='yellow') + + # calculate evaluation results + score_function = get_route_scores if self.scenario_category in ['planning', 'scenic'] else get_perception_scores + all_running_results = self.logger.add_eval_results(records=self.env.running_results) + all_scores = score_function(all_running_results) + self.logger.add_eval_results(scores=all_scores) + self.logger.print_eval_results() + if len(self.env.running_results) % self.save_freq == 0: + self.logger.save_eval_results(log_name) + + if infos[0]['collision']: + self.scenic.record_params() + + if select and (num_finished_scenario % opt_step == 0): + opt_time += 1 + self.scenic.update_params() + self.scene_map[log_name][f'opt_time_{opt_time}'] = self.scenic.save_params() + data_loader.train_scene(opt_time) + + self.logger.save_eval_results(log_name) + + if select: + self.scene_map[log_name]['select_id'] = self.select_adv_scene(self.logger.eval_records, score_function, data_loader.select_num) + self.dump_scene_map() + + self.logger.clear() + self.scenic.destroy() + + def select_adv_scene(self, results, score_function, select_num): + # define your own selection mechanism here + map_id_score_collision = {} + map_id_score_non_collision = {} + for i in results.keys(): + score = score_function({i:results[i]}) + if score['collision_rate'] == 1: + map_id_score_collision[i] = score['final_score'] + else: + map_id_score_non_collision[i] = score['final_score'] + + # Sort the collision scenes by their scores + collision_scenes_sorted = sorted(map_id_score_collision.items(), key=lambda x: x[1]) + + # Get the number of scenes to select from the collision cases + num_collision_selected = min(select_num, len(collision_scenes_sorted)) + + # Select the lowest scored scenes with collision + selected_scene_id = [scene[0] for scene in collision_scenes_sorted[:num_collision_selected]] + + # If not enough collision scenes, select remaining scenes + num_non_collision_selected = select_num - num_collision_selected + if num_non_collision_selected > 0: + # Sort the non-collision scenes by their scores + non_collision_scenes_sorted = sorted(map_id_score_non_collision.items(), key=lambda x: x[1]) + # Select the lowest scored scenes from the non-collision cases + selected_scene_id.extend([scene[0] for scene in non_collision_scenes_sorted[:num_non_collision_selected]]) + return sorted(selected_scene_id) + + def run(self, test_epoch = None): + # get scenario data of different maps + config_list = dynamic_scenic_parse(self.scenario_config, self.logger) + + + ### load rl model ## + if self.mode == 'train_scenario': + ## we only need the pretrained surrogate model here ## + pass + elif self.mode == 'train_agent': + ## initlize buffer ### + Buffer = RouteReplayBuffer if self.scenario_category in ['scenic', 'planning'] else PerceptionReplayBuffer + replay_buffer = Buffer(self.num_scenario, self.mode, self.buffer_capacity) + + ### repeat the training, 20 is just a random placeholder + config_list = config_list * 20 + + ### check if resume ### + if self.continue_agent_training: + self.logger.load_training_results() + start_episode = self.check_continue_training(self.agent_policy, replay_buffer) + 1 + if start_episode >= self.train_episode: + return + else: + self.clean_cache(self.agent_policy.model_path) + start_episode = -1 + + elif self.mode == 'eval' and test_epoch: + ### load trained model for evaluation ### + self.agent_policy.load_model(episode=test_epoch) + + last_town = None + for config in config_list: + + ## set log name ## + log_name = f'OPT_{config.behavior}' + + ## check if all done ## + if self.mode == 'eval': + if self.logger.check_eval_dir(log_name) == config.select_num: + self.logger.log(f">> This scenario and route have been done.") + continue + elif self.mode == 'train_agent': + if self.current_episode >= self.train_episode - 1: + return + + if self.current_episode + config.select_num < start_episode: + self.current_episode += config.select_num + continue + + # initialize scenic + self._init_scenic(config) + + # initialize map and render + self._init_world() + self._init_renderer() + self.world.scenic = self.scenic + + # create scenarios within the vectorized wrapper + self.env = VectorWrapper( + self.env_params, + self.scenario_config, + self.world, + self.birdeye_render, + self.display, + self.logger + ) + + # prepare data loader and buffer + data_loader = ScenicDataLoader(self.scenic, config, self.num_scenario) + # run with different modes + + if self.mode == 'train_scenario': + ### select hard scenic scenario config on the surrogate model ### + self.scene_map = self.load_scene_map() + self.agent_policy.set_mode('eval') + self.scenario_policy.set_mode('eval') + self.eval(data_loader, select = True) + elif self.mode == 'train_agent': + ### train the surrogate model on the selected hard scenrios ### + self.agent_policy.set_mode('train') + self.scenario_policy.set_mode('eval') + self.train(data_loader, start_episode, replay_buffer) + elif self.mode == 'eval': + ### evaluate the trained agent on different test models ### + self.agent_policy.set_mode('eval') + self.scenario_policy.set_mode('eval') + self.eval(data_loader) + else: + raise NotImplementedError(f"Unsupported mode: {self.mode}.") + + def check_continue_training(self, policy, replay_buffer): + # load previous checkpoint + policy.load_model(replay_buffer = replay_buffer) + if policy.continue_episode == 0: + start_episode = 0 + self.logger.log('>> Previous checkpoint not found. Training from scratch.') + else: + start_episode = policy.continue_episode + self.logger.log(f'>> Continue training from previous checkpoint, epoch: {start_episode}.') + return start_episode + + def dump_scene_map(self): + # load previous checkpoint + scenic_dir = self.scenario_config['scenic_dir'] + f = open(os.path.join(scenic_dir, f"dynamic_scenario.json"), 'w') + json_dumps_str = json.dumps(self.scene_map, indent=4) + print(json_dumps_str, file=f) + f.close() + + def load_scene_map(self): + # load previous checkpoint + scenic_dir = self.scenario_config['scenic_dir'] + try: + with open(os.path.join(scenic_dir, f"dynamic_scenario.json"), 'r') as f: + data = json.loads(f.read()) + except: + data = {} + return data + + def clean_cache(self, path): + # Get a list of all files in directory + all_files = glob.glob(os.path.join(path, '*')) + + # Specify the file to keep + file_to_keep = os.path.join(path, 'model.sac.-001.torch') + + # Remove all files except the one to keep + for file in all_files: + if file != file_to_keep: + os.remove(file) + + def close(self): + pygame.quit() # close pygame renderer + if self.env: + self.env.clean_up() + + \ No newline at end of file diff --git a/scene/safebench/util/__pycache__/od_util.cpython-37.pyc b/scene/safebench/util/__pycache__/od_util.cpython-37.pyc new file mode 100644 index 00000000..3373c9c2 Binary files /dev/null and b/scene/safebench/util/__pycache__/od_util.cpython-37.pyc differ diff --git a/scene/safebench/util/__pycache__/run_util.cpython-37.pyc b/scene/safebench/util/__pycache__/run_util.cpython-37.pyc new file mode 100644 index 00000000..9dac8106 Binary files /dev/null and b/scene/safebench/util/__pycache__/run_util.cpython-37.pyc differ diff --git a/scene/safebench/util/__pycache__/scenic_utils.cpython-38.pyc b/scene/safebench/util/__pycache__/scenic_utils.cpython-38.pyc new file mode 100644 index 00000000..cf5114e8 Binary files /dev/null and b/scene/safebench/util/__pycache__/scenic_utils.cpython-38.pyc differ diff --git a/scene/safebench/util/__pycache__/torch_util.cpython-37.pyc b/scene/safebench/util/__pycache__/torch_util.cpython-37.pyc new file mode 100644 index 00000000..a289044d Binary files /dev/null and b/scene/safebench/util/__pycache__/torch_util.cpython-37.pyc differ diff --git a/scene/safebench/util/logger.py b/scene/safebench/util/logger.py new file mode 100644 index 00000000..7f0bc293 --- /dev/null +++ b/scene/safebench/util/logger.py @@ -0,0 +1,370 @@ +''' +Date: 2023-01-31 22:23:17 +LastEditTime: 2023-04-01 16:02:49 +Description: + Copyright (c) 2022-2023 Safebench Team + + This work is licensed under the terms of the MIT license. + For a copy, see +''' + +import atexit +import json +import os +import os.path as osp +import time +import pickle + +import joblib +import numpy as np +import yaml + +from safebench.util.run_util import VideoRecorder, VideoRecorder_Perception + + +# Where experiment outputs are saved by default: +DEFAULT_DATA_DIR = osp.abspath(osp.dirname(osp.dirname(osp.dirname(__file__)))) + +# Whether to automatically insert a date and time stamp into the names of +# save directories: +FORCE_DATESTAMP = False + + +def setup_logger_kwargs(exp_name, output_dir, seed, datestamp=False, agent=None, scenario=None, scenario_category='planning'): + # Datestamp forcing + datestamp = datestamp or FORCE_DATESTAMP + + # Make base path + ymd_time = time.strftime("%Y-%m-%d_") if datestamp else '' + relpath = ''.join([ymd_time, exp_name]) + + # specify agent policy and scenario policy in the experiment directory. + agent_scenario_exp_name = exp_name + if agent is not None: + agent_scenario_exp_name = agent_scenario_exp_name + '_' + agent + if scenario is not None: + agent_scenario_exp_name = agent_scenario_exp_name + '_' + scenario + + # Make a seed-specific subfolder in the experiment directory. + if datestamp: + hms_time = time.strftime("%Y-%m-%d_%H-%M-%S") + subfolder = ''.join([hms_time, '-', agent_scenario_exp_name, '_s', str(seed)]) + else: + subfolder = ''.join([agent_scenario_exp_name, '_seed_', str(seed)]) + relpath = osp.join(relpath, subfolder) + + data_dir = os.path.join(DEFAULT_DATA_DIR, output_dir) + logger_kwargs = dict( + output_dir=osp.join(data_dir, relpath), + exp_name=exp_name, + scenario_category=scenario_category, + ) + return logger_kwargs + + +def is_json_serializable(v): + try: + json.dumps(v) + return True + except: + return False + + +def convert_json(obj): + """ Convert obj to a version which can be serialized with JSON. """ + if is_json_serializable(obj): + return obj + else: + if isinstance(obj, dict): + return {convert_json(k): convert_json(v) for k, v in obj.items()} + elif isinstance(obj, tuple): + return (convert_json(x) for x in obj) + elif isinstance(obj, list): + return [convert_json(x) for x in obj] + elif hasattr(obj, '__name__') and not ('lambda' in obj.__name__): + return convert_json(obj.__name__) + elif hasattr(obj, '__dict__') and obj.__dict__: + obj_dict = {convert_json(k): convert_json(v) for k, v in obj.__dict__.items()} + return {str(obj): obj_dict} + + return str(obj) + + +def statistics_scalar(x, with_min_and_max=False): + """ + Get mean/std and optional min/max of scalar x across MPI processes. + Args: + x: An array containing samples of the scalar to produce statistics + for. + with_min_and_max (bool): If true, return min and max of x in + addition to mean and std. + """ + x = np.array(x, dtype=np.float32) + mean = np.mean(x) + std = np.std(x) # compute global std + if with_min_and_max: + return mean, std, np.min(x), np.max(x) + return mean, std + + +color2num = dict( + gray=30, + red=31, + green=32, + yellow=33, + blue=34, + magenta=35, + cyan=36, + white=37, + crimson=38 +) + + +def colorize(string, color, bold=False, highlight=False): + """ + Colorize a string. + + This function was originally written by John Schulman. + """ + attr = [] + num = color2num[color] + if highlight: num += 10 + attr.append(str(num)) + if bold: attr.append('1') + return '\x1b[%sm%s\x1b[0m' % (';'.join(attr), string) + + +class Logger: + """ + A general-purpose logger. + Makes it easy to save diagnostics, hyperparameter configurations, the state of a training run, and the trained model. + """ + def __init__(self, output_dir=None, output_fname='progress.txt', exp_name=None, scenario_category='planning'): + """ + Initialize a Logger. + + Args: + output_dir (string): A directory for saving results to. + If ``None``, defaults to a temp directory of the form ``/tmp/experiments/somerandomnumber``. + + output_fname (string): Name for the tab-separated-value file + containing metrics logged throughout a training run. Defaults to ``progress.txt``. + + exp_name (string): Experiment name. If you run multiple training + runs and give them all the same ``exp_name``, the plotter will know to group them. (Use case: if you run the same + hyperparameter configuration with multiple random seeds, you should give them all the same ``exp_name``.) + """ + self.epoch = 0 + self.first_row = True + self.log_headers = [] + self.log_current_row = {} + self.exp_name = exp_name + self.log_print_history = [] + self.video_recorder = None + self.scenario_category = scenario_category + + self.output_dir = output_dir or "/tmp/experiments/%i" % int(time.time()) + self.log('>> ' + '-' * 40) + if osp.exists(self.output_dir): + self.log(">> Log path %s already exists! Storing info there anyway." % self.output_dir, 'yellow') + else: + os.makedirs(self.output_dir) + self.output_file = open(osp.join(self.output_dir, output_fname), 'a') + atexit.register(self.output_file.close) + self.log(">> Logging data to %s" % self.output_file.name, 'green') + + self.eval_results = {} + self.eval_records = {} + self.training_results = {} + + def create_training_dir(self): + result_dir = os.path.join(self.output_dir, 'training_results') + os.makedirs(result_dir, exist_ok=True) + self.result_file = os.path.join(result_dir, 'results.pkl') + + def load_training_results(self): + if os.path.exists(self.result_file): + with open(self.result_file, 'rb') as f: + self.training_results = pickle.load(f) + else: + self.training_results = {} + + def add_training_results(self, name=None, value=None): + if name is not None: + if name not in self.training_results: + self.training_results[name] = [] + self.training_results[name].append(value) + + def save_training_results(self): + self.log(f'>> Saving training results to {self.result_file}') + joblib.dump(self.training_results, self.result_file) + + def print_training_results(self): + self.log("Training results:") + for key, value in self.eval_results.items(): + self.log(f"\t {key: <25}{value}") + + def create_eval_dir(self, load_existing_results=True): + result_dir = os.path.join(self.output_dir, 'eval_results') + os.makedirs(result_dir, exist_ok=True) + self.result_file = os.path.join(result_dir, 'results.pkl') + self.record_file = os.path.join(result_dir, 'records.pkl') + if load_existing_results: + if os.path.exists(self.record_file): + self.log(f'>> Loading existing evaluation records from {self.record_file}, ') + self.eval_records = joblib.load(self.record_file) + else: + self.log(f'>> Loading existing record fail because no records.pkl is found.') + self.eval_records = {} + + def add_eval_results(self, scores=None, records=None): + if scores is not None: + self.eval_results.update(scores) + if records is not None: + self.eval_records.update(records) + return self.eval_records + +# def save_eval_results(self): +# self.log(f'>> Saving evaluation results to {self.result_file}') +# joblib.dump(self.eval_results, self.result_file) +# self.log(f'>> Saving evaluation records to {self.record_file}, length: {len(self.eval_records)}') +# joblib.dump(self.eval_records, self.record_file) + + def save_eval_results(self, name = None): + if name == None: + self.log(f'>> Saving evaluation results to {self.result_file}') + joblib.dump(self.eval_results, self.result_file) + self.log(f'>> Saving evaluation records to {self.record_file}, length: {len(self.eval_records)}') + joblib.dump(self.eval_records, self.record_file) + else: + result_dir = os.path.join(self.output_dir, 'eval_results') + os.makedirs(result_dir, exist_ok=True) + self.result_file = os.path.join(result_dir, name + '_results.pkl') + self.record_file = os.path.join(result_dir, name + '_records.pkl') + self.log(f'>> Saving evaluation results to {self.result_file}') + joblib.dump(self.eval_results, self.result_file) + self.log(f'>> Saving evaluation records to {self.record_file}') + joblib.dump(self.eval_records, self.record_file) + + def check_eval_dir(self, name): + result_dir = os.path.join(self.output_dir, 'eval_results') + os.makedirs(result_dir, exist_ok=True) + self.result_file = os.path.join(result_dir, name + '_results.pkl') + self.record_file = os.path.join(result_dir, name + '_records.pkl') + try: + return len(joblib.load(self.record_file)) + except: + return 0 + + def clear(self): + self.eval_results = {} + self.eval_records = {} + + def print_eval_results(self): + self.log("Evaluation results:") + for key, value in self.eval_results.items(): + self.log(f"\t {key: <25}{value}") + + def log(self, msg, color='green'): + # print with color + print(colorize(msg, color, bold=True)) + # save print message to log file + self.log_print_history.append(msg) + + def log_dict(self, dict_msg, color='green'): + for key, value in dict_msg.items(): + self.log("{}: {}".format(key, value), color) + + def log_tabular(self, key, val): + """ + Log a value of some diagnostic. + """ + if self.first_row: + self.log_headers.append(key) + else: + assert key in self.log_headers, "Trying to introduce a new key %s that you didn't include in the first iteration" % key + assert key not in self.log_current_row, "You already set %s this iteration. Maybe you forgot to call dump_tabular()" % key + self.log_current_row[key] = val + + def save_config(self, config): + """ + Log an experiment configuration. + """ + if self.exp_name is not None: + config['exp_name'] = self.exp_name + config_json = convert_json(config) + output = json.dumps(config_json, separators=(',', ':\t'), indent=4, sort_keys=True) + # print(colorize('Saving config:\n', color='cyan', bold=True)) + # print(output) + with open(osp.join(self.output_dir, "config.json"), 'w') as out: + out.write(output) + + with open(osp.join(self.output_dir, "config.yaml"), 'w') as out: + yaml.dump(config, out, default_flow_style=False, indent=4, sort_keys=False) + + def save_state(self, state_dict, itr=None): + """ + Saves the state of an experiment. + + Args: + state_dict (dict): Dictionary containing essential elements to + describe the current state of training. + + itr: An int, or None. Current iteration of training. + """ + fname = 'vars.pkl' if itr is None else 'vars%d.pkl' % itr + try: + joblib.dump(state_dict, osp.join(self.output_dir, fname)) + except: + self.log('Warning: could not pickle state_dict.', color='red') + + def dump_tabular(self, x_axis="Epoch", verbose=True, env=None): + """ + Write all of the diagnostics from the current iteration. Writes both to stdout, and to the output file. + x_axis: "Epoch" or "TotalEnvInteracts" + """ + data_dict = {} + self.epoch += 1 + vals = [] + key_lens = [len(key) for key in self.log_headers] + max_key_len = max(15, max(key_lens)) + keystr = '%' + '%d' % max_key_len + fmt = "| " + keystr + "s | %15s |" + n_slashes = 22 + max_key_len + if verbose: + print("-" * n_slashes) + if env is not None: + print("Env: ", env) + print("-" * n_slashes) + for key in self.log_headers: + val = self.log_current_row.get(key, "") + valstr = "%8.3g" % val if hasattr(val, "__float__") else val + if verbose: + print(fmt % (key, valstr)) + vals.append(val) + + if key == x_axis: + self.steps = val + if verbose: + print("-" * n_slashes, flush=True) + if self.output_file is not None: + if self.first_row: + self.output_file.write("\t".join(self.log_headers) + "\n") + self.output_file.write("\t".join(map(str, vals)) + "\n") + self.output_file.flush() + + self.log_current_row.clear() + self.first_row = False + return data_dict + + def init_video_recorder(self): + if self.scenario_category in ['planning', 'scenic']: + self.video_recorder = VideoRecorder(self.output_dir, logger=self) + elif self.scenario_category == 'perception': + self.video_recorder = VideoRecorder_Perception(self.output_dir, logger=self) + + def add_frame(self, frame): + self.video_recorder.add_frame(frame) + + def save_video(self, data_ids): + self.video_recorder.save(data_ids=data_ids) diff --git a/scene/safebench/util/metric_util.py b/scene/safebench/util/metric_util.py new file mode 100644 index 00000000..12bf0f90 --- /dev/null +++ b/scene/safebench/util/metric_util.py @@ -0,0 +1,317 @@ +import joblib +import math +import numpy as np + +from copy import deepcopy +import argparse + +import torch +from safebench.scenario.scenario_definition.atomic_criteria import Status + + +def cal_out_of_road_length(sequence): + out_of_road_raw = [i['off_road'] for i in sequence] + out_of_road = deepcopy(out_of_road_raw) + for i, out in enumerate(out_of_road_raw): + if out and i + 1 < len(out_of_road_raw): + out_of_road[i + 1] = True + + total_length = 0 + for i, out in enumerate(out_of_road): + if i == 0: + continue + if out: + total_length += sequence[i]['driven_distance'] - sequence[i - 1]['driven_distance'] + + return total_length + + +def cal_avg_yaw_velocity(sequence): + total_yaw_change = 0 + for i, time_stamp in enumerate(sequence): + if i == 0: + continue + total_yaw_change += abs(sequence[i]['ego_yaw'] - sequence[i - 1]['ego_yaw']) + total_yaw_change = total_yaw_change / 180 * math.pi + try: + avg_yaw_velocity = total_yaw_change / (sequence[-1]['current_game_time'] - sequence[0]['current_game_time']) + except: + avg_yaw_velocity = 0 + return avg_yaw_velocity + +def get_route_scores(record_dict): + # safety level + num_collision = 0 + num_run_red_light = 0 + num_run_stop_sign = 0 + sum_out_of_road_length = 0 + for data_id, sequence in record_dict.items(): + if sequence[-1]['collision'] == Status.FAILURE: + num_collision += 1 + num_run_red_light += sequence[-1]['run_red_light'] + num_run_stop_sign += sequence[-1]['run_stop'] + sum_out_of_road_length += cal_out_of_road_length(sequence) + + collision_rate = num_collision / len(record_dict) + avg_red_light_freq = num_run_red_light / len(record_dict) + avg_stop_sign_freq = num_run_stop_sign / len(record_dict) + out_of_road_length = sum_out_of_road_length / len(record_dict) + + # task performance level + total_route_completion = 0 + total_time_spent = 0 + success_data_cnt = 0 + total_distance_to_route = 0 + for data_id, sequence in record_dict.items(): + total_route_completion += sequence[-1]['route_complete'] / 100 + if sequence[-1]['route_complete'] == 100: + success_data_cnt += 1 + total_time_spent += sequence[-1]['current_game_time'] - sequence[0]['current_game_time'] + avg_distance_to_route = 0 + for time_stamp in sequence: + avg_distance_to_route += time_stamp['distance_to_route'] + total_distance_to_route += avg_distance_to_route / len(sequence) + + avg_distance_to_route = total_distance_to_route / len(record_dict) + route_following_stability = max(1 - avg_distance_to_route / 5, 0) + route_completion = total_route_completion / len(record_dict) + avg_time_spent = 0 if success_data_cnt == 0 else total_time_spent / success_data_cnt + + # comfort level + num_lane_invasion = 0 + total_acc = 0 + total_yaw_velocity = 0 + for data_id, sequence in record_dict.items(): + num_lane_invasion += sequence[-1]['lane_invasion'] + avg_acc = 0 + for time_stamp in sequence: + avg_acc += math.sqrt(time_stamp['ego_acceleration_x'] ** 2 + time_stamp['ego_acceleration_y'] ** 2 + time_stamp['ego_acceleration_z'] ** 2) + total_acc += avg_acc / len(sequence) + total_yaw_velocity += cal_avg_yaw_velocity(sequence) + + avg_lane_invasion_freq = num_lane_invasion / len(record_dict) + avg_acceleration = total_acc / len(record_dict) + avg_yaw_velocity = total_yaw_velocity / len(record_dict) + + predefined_max_values = { + # safety level + 'collision_rate': 1, + 'avg_red_light_freq': 1, + 'avg_stop_sign_freq': 1, + 'out_of_road_length': 50, + + # task performance level + 'route_following_stability': 1, + 'route_completion': 1, + 'avg_time_spent': 60, + + # comfort level + 'avg_acceleration': 8, + 'avg_yaw_velocity': 3, + 'avg_lane_invasion_freq': 20, + } + + scores = { + # safety level + 'collision_rate': collision_rate, + 'avg_red_light_freq': avg_red_light_freq, + 'avg_stop_sign_freq': avg_stop_sign_freq, + 'out_of_road_length': out_of_road_length, + + # task performance level + 'route_following_stability': route_following_stability, + 'route_completion': route_completion, + 'avg_time_spent': avg_time_spent, + + # comfort level + 'avg_acceleration': avg_acceleration, + 'avg_yaw_velocity': avg_yaw_velocity, + 'avg_lane_invasion_freq': avg_lane_invasion_freq, + + } + + # normalized_scores + ns = {metric: score if metric not in predefined_max_values else score / predefined_max_values[metric] for metric, score in scores.items()} + + final_score = ((1 - ns['collision_rate']) * 5 + + (3 - ns['avg_red_light_freq'] - ns['avg_stop_sign_freq'] - ns['out_of_road_length']) * 1 + + (ns['route_following_stability'] + ns['route_completion'] + 1 - ns['avg_time_spent']) * 0.5 + + (3 - ns['avg_acceleration'] - ns['avg_yaw_velocity'] - ns['avg_lane_invasion_freq']) * 0.2) / 10.1 + + ns['safety_os'] = ((1 - ns['collision_rate']) * 5 + (3 - ns['avg_red_light_freq'] - ns['avg_stop_sign_freq'] - ns['out_of_road_length']) * 1) / 8 + ns['task_os'] = (ns['route_following_stability'] + ns['route_completion'] + 1 - ns['avg_time_spent']) * 0.5 / 1.5 + ns['comfort_os'] = (3 - ns['avg_acceleration'] - ns['avg_yaw_velocity'] - ns['avg_lane_invasion_freq']) * 0.2 / 0.6 + ns['final_score'] = final_score + return ns + +# def get_route_scores(record_dict, time_out=30): +# # safety level +# num_collision = 0 +# sum_out_of_road_length = 0 +# for data_id, sequence in record_dict.items(): +# if sequence[-1]['collision'] == Status.FAILURE: +# num_collision += 1 +# sum_out_of_road_length += cal_out_of_road_length(sequence) + +# collision_rate = num_collision / len(record_dict) +# out_of_road_length = sum_out_of_road_length / len(record_dict) + +# # task performance level +# total_route_completion = 0 +# total_time_spent = 0 +# total_distance_to_route = 0 +# for data_id, sequence in record_dict.items(): +# total_route_completion += sequence[-1]['route_complete'] / 100 +# total_time_spent += sequence[-1]['current_game_time'] - sequence[0]['current_game_time'] +# avg_distance_to_route = 0 +# for time_stamp in sequence: +# avg_distance_to_route += time_stamp['distance_to_route'] +# total_distance_to_route += avg_distance_to_route / len(sequence) + +# avg_distance_to_route = total_distance_to_route / len(record_dict) +# route_completion = total_route_completion / len(record_dict) +# avg_time_spent = total_time_spent / len(record_dict) + +# # comfort level +# num_lane_invasion = 0 +# total_acc = 0 +# total_yaw_velocity = 0 +# for data_id, sequence in record_dict.items(): +# num_lane_invasion += sequence[-1]['lane_invasion'] +# avg_acc = 0 +# for time_stamp in sequence: +# avg_acc += math.sqrt(time_stamp['ego_acceleration_x'] ** 2 + time_stamp['ego_acceleration_y'] ** 2 + time_stamp['ego_acceleration_z'] ** 2) +# total_acc += avg_acc / len(sequence) +# total_yaw_velocity += cal_avg_yaw_velocity(sequence) + +# predefined_max_values = { +# # safety level +# 'collision_rate': 1, +# 'out_of_road_length': 10, + +# # task performance level +# 'distance_to_route': 5, +# 'incomplete_route': 1, +# 'running_time': time_out, +# } + +# weights = { +# # safety level +# 'collision_rate': 0.4, +# 'out_of_road_length': 0.1, + +# # task performance level +# 'distance_to_route': 0.1, +# 'incomplete_route': 0.3, +# 'running_time': 0.1, +# } + +# scores = { +# # safety level +# 'collision_rate': collision_rate, +# 'out_of_road_length': out_of_road_length, + +# # task performance level +# 'distance_to_route': avg_distance_to_route, +# 'incomplete_route': 1 - route_completion, +# 'running_time': avg_time_spent, +# } + +# all_scores = {key: round(value/predefined_max_values[key], 2) for key, value in scores.items()} +# final_score = 0 +# for key, score in all_scores.items(): +# final_score += score * weights[key] +# all_scores['final_score'] = final_score + +# return all_scores + +def compute_ap(recall, precision, method='interp'): + """ Compute the average precision, given the recall and precision curves + # Arguments + recall: The recall curve (list) + precision: The precision curve (list) + # Returns + Average precision, precision curve, recall curve + """ + + # Append sentinel values to beginning and end + mrec = np.concatenate(([0.0], recall, [1.0])) + mpre_input = np.concatenate(([1.0], precision, [0.0])) + + # Compute the precision envelope + mpre = np.flip(np.maximum.accumulate(np.flip(mpre_input))) + + # Integrate area under curve + # methods: 'continuous', 'interp' + if method == 'interp': + x = np.linspace(0, 1, 101) # 101-point interp (COCO) + ap = np.trapz(np.interp(x, mrec, mpre), x) # integrate + else: # 'continuous' + i = np.where(mrec[1:] != mrec[:-1])[0] # points where x axis (recall) changes + ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1]) # area under curve + + return ap, mpre_input, mpre, mrec + + +def _get_pr_curve(conf_scores, logits, num_gt, data_id, iou_thres=0.5): + eps = 1e-8 + idx = torch.argsort(conf_scores, descending=True) + logits = logits[idx] + tp = torch.cumsum(logits >= iou_thres, dim=0) + tp_fp = torch.cumsum(logits >= -0., dim=0) + precision = (tp / tp_fp).numpy() + recall = (tp / (num_gt + eps)).numpy() + + ap, mpre_input, mpre, mrec = compute_ap(recall, precision, method='continuous') + return ap + +def get_perception_scores(record_dict): + + mAP = [] + IoU_list = [] + pred = [] + gt = [] + cls = [] + scores = [] + for data_id in record_dict.keys(): + IoU_list.append([rec['iou'] for rec in record_dict[data_id]]) + conf_scores = torch.cat([rec['scores'] for rec in record_dict[data_id]]) + logits = torch.cat([rec['logits'] for rec in record_dict[data_id]]) + num_gt = len(record_dict[data_id]) + + map_iou = [] + for th in np.arange(0.5, 1.0, 0.05): + map_iou.append(_get_pr_curve(conf_scores, logits, num_gt, data_id, iou_thres=th)) + + mAP.append(np.mean(map_iou)) + pred.append([rec['pred'] for rec in record_dict[data_id]]) + gt.append([rec['gt'] for rec in record_dict[data_id]]) + cls.append([rec['class'] for rec in record_dict[data_id]]) + scores.append([rec['scores'] for rec in record_dict[data_id]]) + + IoU_mean = [np.mean(iou) for iou in IoU_list] + + + + return { + # 'scores': scores, + # 'pred': pred, + # 'gt': gt, + # 'class': cls, + 'mean_iou': IoU_mean, + 'mAP_evaluate': mAP, + } + + +def parse_args(): + parser = argparse.ArgumentParser() + parser.add_argument('--record_file', default='/home/carla/output/testing_records/record.pkl') + arguments = parser.parse_args() + return arguments + + +if __name__ == '__main__': + args = parse_args() + record = joblib.load(args.record_file) + # all_scores, normalized_scores, final_score = get_scores(record) + # print('overall score:', final_score) diff --git a/scene/safebench/util/od_util.py b/scene/safebench/util/od_util.py new file mode 100644 index 00000000..2fd0ce35 --- /dev/null +++ b/scene/safebench/util/od_util.py @@ -0,0 +1,130 @@ +''' +Date: 2023-01-31 22:23:17 +LastEditTime: 2023-03-04 14:40:44 +Description: + Copyright (c) 2022-2023 Safebench Team + + This work is licensed under the terms of the MIT license. + For a copy, see +''' + +import torch +import numpy as np +import cv2 +from moviepy.video.io.ffmpeg_writer import FFMPEG_VideoWriter + + +def CPU(x): + return x.detach().cpu().numpy() + + +def CUDA(x): + if isinstance(x, np.ndarray): + x = torch.from_numpy(x) + return x.cuda() + + +def save_image(fp, img): + cv2.cvtColor(img, cv2.COLOR_BGR2RGB) + cv2.imwrite(fp, img) + + +def build_projection_matrix(w, h, fov): + focal = w / (2.0 * np.tan(fov * np.pi / 360.0)) + K = np.identity(3) + K[0, 0] = K[1, 1] = focal + K[0, 2] = w / 2.0 + K[1, 2] = h / 2.0 + return K + + +def box_area(box): + # box = xyxy(4,n) + return (box[2] - box[0]) * (box[3] - box[1]) + + +def box_iou(box1, box2, eps=1e-7): + # https://github.com/pytorch/vision/blob/master/torchvision/ops/boxes.py + """ + Return intersection-over-union (Jaccard index) of boxes. + Both sets of boxes are expected to be in (x1, y1, x2, y2) format. + Arguments: + box1 (Tensor[N, 4]) + box2 (Tensor[M, 4]) + Returns: + iou (Tensor[N, M]): the NxM matrix containing the pairwise + IoU values for every element in boxes1 and boxes2 + """ + + # inter(N,M) = (rb(N,M,2) - lt(N,M,2)).clamp(0).prod(2) + # diff = box + (a1, a2), (b1, b2) = box1[:, None].chunk(2, 2), box2.chunk(2, 1) + + inter = (torch.min(a2, b2) - torch.max(a1, b1)).clamp(0).prod(2) + + # IoU = inter / (area1 + area2 - inter) + return inter / (box_area(box1.T)[:, None] + box_area(box2.T) - inter + eps) + + +def xywh2xyxy(x): + # Convert nx4 boxes from [x, y, w, h] to [x1, y1, x2, y2] where xy1=top-left, xy2=bottom-right + y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x) + y[:, 0] = x[:, 0] - x[:, 2] / 2 # top left x + y[:, 1] = x[:, 1] - x[:, 3] / 2 # top left y + y[:, 2] = x[:, 0] + x[:, 2] / 2 # bottom right x + y[:, 3] = x[:, 1] + x[:, 3] / 2 # bottom right y + return y + + +def xyxy2xywh(x): + # Convert nx4 boxes from [x1, y1, x2, y2] to [x, y, w, h] where xy1=top-left, xy2=bottom-right + y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x) + y[:, 0] = (x[:, 0] + x[:, 2]) / 2 # x center + y[:, 1] = (x[:, 1] + x[:, 3]) / 2 # y center + y[:, 2] = x[:, 2] - x[:, 0] # width + y[:, 3] = x[:, 3] - x[:, 1] # height + return y + + +def xyxy2xywhn(x, w=1024, h=1024, clip=False, eps=0.0): + # Convert nx4 boxes from [x1, y1, x2, y2] to [x, y, w, h] normalized where xy1=top-left, xy2=bottom-right + + y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x) + y[:, 0] = ((x[:, 0] + x[:, 2]) / 2) / w # x center + y[:, 1] = ((x[:, 1] + x[:, 3]) / 2) / h # y center + y[:, 2] = (x[:, 2] - x[:, 0]) / w # width + y[:, 3] = (x[:, 3] - x[:, 1]) / h # height + return y + + +def get_xyxy(x): + return torch.tensor([np.min(x[:, 0]), np.min(x[:, 1]), np.max(x[:, 0]), np.max(x[:, 1])]) + + +# coco_name +names_coco128 = ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light', + 'fire hydrant', 'stopsign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', + 'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', + 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', + 'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', + 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', + 'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', + 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear', + 'hair drier', 'toothbrush'] + + +names_coco_paper = ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light', 'fire hydrant', + 'street sign', 'stopsign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant', + 'bear', 'zebra', 'giraffe', 'hat', 'backpack', 'umbrella', 'shoe', 'eye glasses', 'handbag', 'tie', 'suitcase', + 'frisbee', 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', + 'tennis racket', 'bottle', 'plate', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', + 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed', 'mirror', + 'dining table', 'window', 'desk', 'toilet', 'door', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', + 'oven', 'toaster', 'sink', 'refrigerator', 'blender', 'book', 'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', + 'toothbrush', 'hair brush'] + + +if __name__ == '__main__': + a = torch.rand(5, 4) + b = torch.rand(4, 4) + print(box_iou(a, b)) \ No newline at end of file diff --git a/scene/safebench/util/pid_controller.py b/scene/safebench/util/pid_controller.py new file mode 100644 index 00000000..ae40f459 --- /dev/null +++ b/scene/safebench/util/pid_controller.py @@ -0,0 +1,239 @@ +''' +Date: 2023-01-31 22:23:17 +LastEditTime: 2023-04-03 19:00:37 +Description: + Copyright (c) 2022-2023 Safebench Team + + This file is modified from + Copyright (c) 2018-2020 Intel Corporation + + This work is licensed under the terms of the MIT license. + For a copy, see +''' + +from collections import deque +import math +import numpy as np +import carla +from agents.tools.misc import get_speed + + +class VehiclePIDController(): + """ + VehiclePIDController is the combination of two PID controllers + (lateral and longitudinal) to perform the low level control a vehicle from client side + """ + + def __init__(self, vehicle, args_lateral, args_longitudinal, offset=0, max_throttle=0.75, max_brake=0.3, max_steering=0.8): + """ + :param vehicle: actor to apply to local planner logic onto + :param args_lateral: dictionary of arguments to set the lateral PID controller using the following semantics: + K_P -- Proportional term + K_D -- Differential term + K_I -- Integral term + :param args_longitudinal: dictionary of arguments to set the longitudinal PID controller using the following semantics: + K_P -- Proportional term + K_D -- Differential term + K_I -- Integral term + :param offset: If different than zero, the vehicle will drive displaced from the center line. + Positive values imply a right offset while negative ones mean a left one. Numbers high enough + to cause the vehicle to drive through other lanes might break the controller. + """ + + self.max_brake = max_brake + self.max_throt = max_throttle + self.max_steer = max_steering + + self._vehicle = vehicle + self._world = self._vehicle.get_world() + self.past_steering = self._vehicle.get_control().steer + self._lon_controller = PIDLongitudinalController(self._vehicle, **args_longitudinal) + self._lat_controller = PIDLateralController(self._vehicle, offset, **args_lateral) + + def run_step(self, target_speed, transform): + """ + Execute one step of control invoking both lateral and longitudinal PID controllers to reach a target waypoint at a given target_speed. + :param target_speed: desired vehicle speed + :param waypoint: target location encoded as a waypoint + :return: distance (in meters) to the waypoint + """ + + acceleration = self._lon_controller.run_step(target_speed) + current_steering = self._lat_controller.run_step(transform) + control = carla.VehicleControl() + if acceleration >= 0.0: + control.throttle = min(acceleration, self.max_throt) + control.brake = 0.0 + else: + control.throttle = 0.0 + control.brake = min(abs(acceleration), self.max_brake) + + # Steering regulation: changes cannot happen abruptly, can't steer too much. + if current_steering > self.past_steering + 0.1: + current_steering = self.past_steering + 0.1 + elif current_steering < self.past_steering - 0.1: + current_steering = self.past_steering - 0.1 + + if current_steering >= 0: + steering = min(self.max_steer, current_steering) + else: + steering = max(-self.max_steer, current_steering) + + control.steer = steering + control.hand_brake = False + control.manual_gear_shift = False + self.past_steering = steering + + return control + + def change_longitudinal_PID(self, args_longitudinal): + """Changes the parameters of the PIDLongitudinalController""" + self._lon_controller.change_parameters(**args_longitudinal) + + def change_lateral_PID(self, args_lateral): + """Changes the parameters of the PIDLongitudinalController""" + self._lon_controller.change_parameters(**args_lateral) + + +class PIDLongitudinalController(): + """ + PIDLongitudinalController implements longitudinal control using a PID. + """ + + def __init__(self, vehicle, K_P=1.0, K_I=0.0, K_D=0.0, dt=0.03): + """ + Constructor method. + :param vehicle: actor to apply to local planner logic onto + :param K_P: Proportional term + :param K_D: Differential term + :param K_I: Integral term + :param dt: time differential in seconds + """ + self._vehicle = vehicle + self._k_p = K_P + self._k_i = K_I + self._k_d = K_D + self._dt = dt + self._error_buffer = deque(maxlen=10) + + def run_step(self, target_speed): + """ + Execute one step of longitudinal control to reach a given target speed. + :param target_speed: target speed in Km/h + :param debug: boolean for debugging + :return: throttle control + """ + current_speed = get_speed(self._vehicle) + return self._pid_control(target_speed, current_speed) + + def _pid_control(self, target_speed, current_speed): + """ + Estimate the throttle/brake of the vehicle based on the PID equations + + :param target_speed: target speed in Km/h + :param current_speed: current speed of the vehicle in Km/h + :return: throttle/brake control + """ + error = target_speed - current_speed + self._error_buffer.append(error) + + if len(self._error_buffer) >= 2: + _de = (self._error_buffer[-1] - self._error_buffer[-2]) / self._dt + _ie = sum(self._error_buffer) * self._dt + else: + _de = 0.0 + _ie = 0.0 + + return np.clip((self._k_p * error) + (self._k_d * _de) + (self._k_i * _ie), -1.0, 1.0) + + def change_parameters(self, K_P, K_I, K_D, dt): + self._k_p = K_P + self._k_i = K_I + self._k_d = K_D + self._dt = dt + + +class PIDLateralController(): + """ + PIDLateralController implements lateral control using a PID. + """ + + def __init__(self, vehicle, offset=0, K_P=1.0, K_I=0.0, K_D=0.0, dt=0.03): + """ + Constructor method. + + :param vehicle: actor to apply to local planner logic onto + :param offset: distance to the center line. If might cause issues if the value + is large enough to make the vehicle invade other lanes. + :param K_P: Proportional term + :param K_D: Differential term + :param K_I: Integral term + :param dt: time differential in seconds + """ + self._vehicle = vehicle + self._k_p = K_P + self._k_i = K_I + self._k_d = K_D + self._dt = dt + self._offset = offset + self._e_buffer = deque(maxlen=10) + + def run_step(self, transform): + """ + Execute one step of lateral control to steer + the vehicle towards a certain waypoin. + + :param transform: target waypoint + :return: steering control in the range [-1, 1] where: + -1 maximum steering to left + +1 maximum steering to right + """ + return self._pid_control(transform, self._vehicle.get_transform()) + + def _pid_control(self, transform, vehicle_transform): + """ + Estimate the steering angle of the vehicle based on the PID equations + + :param transform: target waypoint + :param vehicle_transform: current transform of the vehicle + :return: steering control in the range [-1, 1] + """ + # Get the ego's location and forward vector + ego_loc = vehicle_transform.location + v_vec = vehicle_transform.get_forward_vector() + v_vec = np.array([v_vec.x, v_vec.y, 0.0]) + + # Get the vector vehicle-target_wp + if self._offset != 0: + # Displace the wp to the side + w_tran = transform + r_vec = w_tran.get_right_vector() + w_loc = w_tran.location + carla.Location(x=self._offset*r_vec.x, y=self._offset*r_vec.y) + else: + w_loc = transform.location + + w_vec = np.array([w_loc.x - ego_loc.x, w_loc.y - ego_loc.y, 0.0]) + wv_linalg = np.linalg.norm(w_vec) * np.linalg.norm(v_vec) + if wv_linalg == 0: + _dot = 1 + else: + _dot = math.acos(np.clip(np.dot(w_vec, v_vec) / (wv_linalg), -1.0, 1.0)) + _cross = np.cross(v_vec, w_vec) + if _cross[2] < 0: + _dot *= -1.0 + + self._e_buffer.append(_dot) + if len(self._e_buffer) >= 2: + _de = (self._e_buffer[-1] - self._e_buffer[-2]) / self._dt + _ie = sum(self._e_buffer) * self._dt + else: + _de = 0.0 + _ie = 0.0 + + return np.clip((self._k_p * _dot) + (self._k_d * _de) + (self._k_i * _ie), -1.0, 1.0) + + def change_parameters(self, K_P, K_I, K_D, dt): + self._k_p = K_P + self._k_i = K_I + self._k_d = K_D + self._dt = dt diff --git a/scene/safebench/util/run_util.py b/scene/safebench/util/run_util.py new file mode 100644 index 00000000..52e039b9 --- /dev/null +++ b/scene/safebench/util/run_util.py @@ -0,0 +1,184 @@ +''' +Date: 2023-01-31 22:23:17 +LastEditTime: 2023-04-01 15:15:03 +Description: + Copyright (c) 2022-2023 Safebench Team + + This work is licensed under the terms of the MIT license. + For a copy, see +''' + +import os +import os.path as osp +import time +import numpy as np +from fnmatch import fnmatch + +import yaml +import importlib + +from moviepy.video.io.ffmpeg_writer import FFMPEG_VideoWriter + + +class VideoWriter: + def __init__(self, filename='_autoplay.mp4', fps=10.0, **kw): + self.writer = None + self.params = dict(filename=filename, fps=fps, **kw) + + def add(self, img): + img = np.asarray(img) + if self.writer is None: + h, w = img.shape[:2] + self.writer = FFMPEG_VideoWriter(size=(w, h), **self.params) + if img.dtype in [np.float32, np.float64]: + img = np.uint64(img.clip(0, 1)*255) + if len(img.shape) == 2: + img = np.repeat(img[..., None], 3, -1) + # self.writer.write_frame(img) + try: + self.writer.write_frame(img) + except: + pass + + def close(self): + if self.writer is not None: + self.writer.close() + self.writer = None + + def __enter__(self): + return self + + def __exit__(self, *kw): + self.close() + + +class VideoRecorder(object): + def __init__(self, output_dir, logger): + self.logger = logger + self.output_dir = output_dir + self.video_count = 0 + self.fps = 20 + self.frame_list = [] + hms_time = time.strftime("%Y-%m-%d_%H-%M-%S") + self.video_dir = os.path.join(self.output_dir, 'video') + self.original_video_dir = os.path.join(self.output_dir, 'video') + + def add_frame(self, frame): + self.frame_list.append(frame) + + def save(self, data_ids, log_name): + self.video_dir = os.path.join(self.original_video_dir, log_name) + data_ids = ['{:04d}'.format(data_id) for data_id in data_ids] + video_name = f'video_{"{:04d}".format(self.video_count)}_id_{"_".join(data_ids)}.mp4' + os.makedirs(self.video_dir, exist_ok=True) + video_file = os.path.join(self.video_dir, video_name) + self.logger.log(f'>> Saving video to {video_file}') + + # define video writer + video_writer = VideoWriter(filename=video_file, fps=self.fps) + for f in self.frame_list: + video_writer.add(f) + video_writer.close() + + # reset frame list + self.frame_list = [] + self.video_count += 1 + + +class VideoRecorder_Perception(object): + def __init__(self, output_dir, logger, width=1024, height=1024): + self.logger = logger + self.output_dir = output_dir + self.video_dir = os.path.join(self.output_dir, 'video') + self.video_count = 0 + + self.frame_list = [] + # TODO: parse observation size + self.width, self.height = width, height + + def add_frame(self, frame): + self.frame_list.append(frame) + + def save(self, data_ids): + num_episodes = len(data_ids) + os.makedirs(self.video_dir, exist_ok=True) + + # data_ids = ['{:04d}'.format(data_id) for data_id in data_ids] + video_name = [f'video_{"{:04d}".format(self.video_count)}_id_{"_{:04d}".format(data)}.mp4' for data in data_ids] + video_file = [os.path.join(self.video_dir, v) for v in video_name] + self.logger.log(f'>> Saving video to {self.video_dir}') + self.writer_list = [VideoWriter(filename=v, fps=20.0) for v in video_file] + for f in self.frame_list: + for n_i in range(num_episodes): + try: + self.writer_list[n_i].add(f[n_i]) + except: + pass + for n_i in range(num_episodes): + self.writer_list[n_i].close() + + self.logger.log(f'>> Saving video done.') + self.frame_list = [] + self.video_count += 1 + + +def print_dict(d): + print(yaml.dump(d, sort_keys=False, default_flow_style=False)) + + +def load_config(config_path="default_config.yaml") -> dict: + with open(config_path, 'r') as f: + return yaml.safe_load(f) + + +def find_config_dir(dir, depth=0): + for path, subdirs, files in os.walk(dir): + for name in files: + if name == "config.yaml": + return path, name + # if we can not find the config file from the current dir, we search for the parent dir: + if depth > 2: + return None + return find_config_dir(osp.dirname(dir), depth + 1) + + +def find_model_path(dir, itr=None): + # if itr is specified, return model with the itr number + if itr is not None: + model_path = osp.join(dir, "model_" + str(itr) + ".pt") + if not osp.exists(model_path): + return None + # raise ValueError("Model doesn't exist: " + model_path) + return model_path + # if itr is not specified, return model.pt or the one with the largest itr number + pattern = "*pt" + model = "model.pt" + max_itr = -1 + for _, _, files in os.walk(dir): + for name in files: + if fnmatch(name, pattern): + name = name.split(".pt")[0].split("_") + if len(name) > 1: + itr = int(name[1]) + if itr > max_itr: + max_itr = itr + model = "model_" + str(itr) + ".pt" + model_path = osp.join(dir, model) + if not osp.exists(model_path): + return None + # raise ValueError("Model doesn't exist: " + model_path) + return model_path, max_itr + + +def setup_eval_configs(dir, itr=None): + path, config_name = find_config_dir(dir) + model_path, load_itr = find_model_path(osp.join(path, "model_save"), itr=itr) + config_path = osp.join(path, config_name) + configs = load_config(config_path) + return model_path, load_itr, configs["policy"], configs["timeout_steps"], configs[configs["policy"]] + + +def class_from_path(path): + module_name, class_name = path.rsplit(".", 1) + class_object = getattr(importlib.import_module(module_name), class_name) + return class_object diff --git a/scene/safebench/util/scenic_utils.py b/scene/safebench/util/scenic_utils.py new file mode 100644 index 00000000..f0ec8105 --- /dev/null +++ b/scene/safebench/util/scenic_utils.py @@ -0,0 +1,414 @@ + +### Top-level functionality of the scenic package as a script: +### load a scenario and generate scenes in an infinite loop. +### modified from https://github.com/BerkeleyLearnVerify/Scenic/blob/main/src/scenic/__main__.py +### & https://github.com/BerkeleyLearnVerify/Scenic/blob/main/src/scenic/core/simulators.py + +import os +import random +import numpy as np +import torch + +import enum +import sys +import time +import argparse +import pygame +from collections import OrderedDict, defaultdict + +if sys.version_info >= (3, 8): + from importlib import metadata +else: + import importlib_metadata as metadata + +import scenic.syntax.translator as translator +import scenic.core.errors as errors +from scenic.core.simulators import SimulationCreationError +from scenic.core.object_types import (enableDynamicProxyFor, setDynamicProxyFor, + disableDynamicProxyFor) +from scenic.core.distributions import RejectionException +import scenic.core.dynamics as dynamics +from scenic.core.errors import RuntimeParseError, InvalidScenarioError, optionallyDebugRejection +from scenic.core.requirements import RequirementType +from scenic.core.vectors import Vector + +def get_parser(scenicFile): + parser = argparse.ArgumentParser(prog='scenic', add_help=False, + usage='scenic [-h | --help] [options] FILE [options]', + description='Sample from a Scenic scenario, optionally ' + 'running dynamic simulations.') + + mainOptions = parser.add_argument_group('main options') + mainOptions.add_argument('-S', '--simulate', default=True, + help='run dynamic simulations from scenes ' + 'instead of simply showing diagrams of scenes') + mainOptions.add_argument('-s', '--seed', help='random seed', default=0, type=int) + mainOptions.add_argument('-v', '--verbosity', help='verbosity level (default 1)', + type=int, choices=(0, 1, 2, 3), default=1) + mainOptions.add_argument('-p', '--param', help='override a global parameter', + nargs=2, default=[], action='append', metavar=('PARAM', 'VALUE')) + mainOptions.add_argument('-m', '--model', help='specify a Scenic world model', default='scenic.simulators.carla.model') + mainOptions.add_argument('--scenario', default=None, + help='name of scenario to run (if file contains multiple)') + + # Simulation options + simOpts = parser.add_argument_group('dynamic simulation options') + simOpts.add_argument('--time', help='time bound for simulations (default none)', + type=int, default=10000) + simOpts.add_argument('--count', help='number of successful simulations to run (default infinity)', + type=int, default=0) + simOpts.add_argument('--max-sims-per-scene', type=int, default=1, metavar='N', + help='max # of rejected simulations before sampling a new scene (default 1)') + + # Interactive rendering options + intOptions = parser.add_argument_group('static scene diagramming options') + intOptions.add_argument('-d', '--delay', type=float, + help='loop automatically with this delay (in seconds) ' + 'instead of waiting for the user to close the diagram') + intOptions.add_argument('-z', '--zoom', type=float, default=1, + help='zoom expansion factor, or 0 to show the whole workspace (default 1)') + + # Debugging options + debugOpts = parser.add_argument_group('debugging options') + debugOpts.add_argument('--show-params', help='show values of global parameters', + action='store_true') + debugOpts.add_argument('--show-records', help='show values of recorded expressions', + action='store_true') + debugOpts.add_argument('-b', '--full-backtrace', help='show full internal backtraces', + action='store_true') + debugOpts.add_argument('--pdb', action='store_true', + help='enter interactive debugger on errors (implies "-b")') + debugOpts.add_argument('--pdb-on-reject', action='store_true', + help='enter interactive debugger on rejections (implies "-b")') + ver = metadata.version('scenic') + debugOpts.add_argument('--version', action='version', version=f'Scenic {ver}', + help='print Scenic version information and exit') + debugOpts.add_argument('--dump-initial-python', help='dump initial translated Python', + action='store_true') + debugOpts.add_argument('--dump-ast', help='dump final AST', action='store_true') + debugOpts.add_argument('--dump-python', help='dump Python equivalent of final AST', + action='store_true') + debugOpts.add_argument('--no-pruning', help='disable pruning', action='store_true') + debugOpts.add_argument('--gather-stats', type=int, metavar='N', + help='collect timing statistics over this many scenes') + + parser.add_argument('--scenicFile', help='a Scenic file to run', default = scenicFile, metavar='FILE') + + parser.add_argument('-h', '--help', action='help', default=argparse.SUPPRESS, + help=argparse.SUPPRESS) + + # Parse arguments and set up configuration + args = parser.parse_args(args=[]) + return args + +class ScenicSimulator: + def __init__(self, scenicFile, params): + self.args = get_parser(scenicFile) + delay = self.args.delay + errors.showInternalBacktrace = self.args.full_backtrace + if self.args.pdb: + errors.postMortemDebugging = True + errors.showInternalBacktrace = True + if self.args.pdb_on_reject: + errors.postMortemRejections = True + errors.showInternalBacktrace = True + translator.dumpTranslatedPython = self.args.dump_initial_python + translator.dumpFinalAST = self.args.dump_ast + translator.dumpASTPython = self.args.dump_python + translator.verbosity = self.args.verbosity + translator.usePruning = not self.args.no_pruning +# if self.args.seed is not None and self.args.verbosity >= 1: +# print(f'Using random seed = {self.args.seed}') +# random.seed(self.args.seed) + # Load scenario from file + if self.args.verbosity >= 1: + print('Beginning scenario construction...') + startTime = time.time() + self.scenario = errors.callBeginningScenicTrace( + lambda: translator.scenarioFromFile(self.args.scenicFile, + params=params, + model=self.args.model, + scenario=self.args.scenario) + ) + self.opt_params, self.opt_record = self.get_params() + + totalTime = time.time() - startTime + if self.args.verbosity >= 1: + print(f'Scenario constructed in {totalTime:.2f} seconds.') + self.simulator = errors.callBeginningScenicTrace(self.scenario.getSimulator) + self.simulator.render = False + + def get_params(self): + all_params = self.scenario.params + opt_record = {} + opt_params = {} + for param in all_params.keys(): + if param.startswith('OPT'): + opt_record[param] = [] + opt_params[param] = all_params[param] + opt_params[param].min = opt_params[param].low + opt_params[param].max = opt_params[param].high + return opt_params, opt_record + + def record_params(self): + all_params = self.scene.params + for param in self.opt_record.keys(): + self.opt_record[param].append(all_params[param]) + print("Recording params...") + + def update_params(self): + print("Updating params...") + for param in self.opt_params.keys(): + if len(self.opt_record[param]) > 1: + mean = np.mean(self.opt_record[param]) + std = np.std(self.opt_record[param]) + self.opt_params[param].low = round(max(mean - std, self.opt_params[param].min), 2) + self.opt_params[param].high = round(min(mean + std, self.opt_params[param].max), 2) +# self.opt_record[param] = [] + self.scenario.params.update(self.opt_params) + print(self.save_params()) + + def load_params(self, params): + print("Loading params...") + for param in params.keys(): + self.opt_params[param].low = params[param]['low'] + self.opt_params[param].high = params[param]['high'] + self.scenario.params.update(self.opt_params) + + def save_params(self): + print("Saving params...") + save_params = {} + for param in self.opt_params.keys(): + cur_param = self.opt_params[param] + save_params[param] = {'low': cur_param.low, 'high': cur_param.high} + return save_params + + def generateScene(self): + scene, iterations = errors.callBeginningScenicTrace( + lambda: self.scenario.generate(verbosity=self.args.verbosity) + ) + return scene, iterations + + def setSimulation(self, scene): + if self.args.verbosity >= 1: + print(f' Beginning simulation of {scene.dynamicScenario}...') + try: + self.scene = scene + self.simulation = self.simulator.createSimulation(scene, verbosity=self.args.verbosity) + except SimulationCreationError as e: + if self.args.verbosity >= 1: + print(f' Failed to create simulation: {e}') + return False + return True + + def runSimulation(self): + """Run the simulation. + Throws a RejectSimulationException if a requirement is violated. + """ + maxSteps = self.args.time + trajectory = self.simulation.trajectory + if self.simulation.currentTime > 0: + raise RuntimeError('tried to run a Simulation which has already run') + assert len(trajectory) == 0 + actionSequence = [] + + import scenic.syntax.veneer as veneer + veneer.beginSimulation(self.simulation) + dynamicScenario = self.simulation.scene.dynamicScenario + + # Initialize dynamic scenario + dynamicScenario._start() + + # Give objects a chance to do any simulator-specific setup + for obj in self.simulation.objects: + if obj is self.simulation.objects[0]: + continue + obj.startDynamicSimulation() + + # Update all objects in case the simulator has adjusted any dynamic + # properties during setup + self.simulation.updateObjects() + + # Run simulation + assert self.simulation.currentTime == 0 + terminationReason = None + terminationType = None + while True: + yield self.simulation.currentTime + if self.simulation.verbosity >= 3: + print(f' Time step {self.simulation.currentTime}:') + + # Run compose blocks of compositional scenarios + # (and check if any requirements defined therein fail) + terminationReason = dynamicScenario._step() + terminationType = TerminationType.scenarioComplete + + # Record current state of the simulation + self.simulation.recordCurrentState() + + # Run monitors + newReason = dynamicScenario._runMonitors() + if newReason is not None: + terminationReason = newReason + terminationType = TerminationType.terminatedByMonitor + + # "Always" and scenario-level requirements have been checked; + # now safe to terminate if the top-level scenario has finished, + # a monitor requested termination, or we've hit the timeout + if terminationReason is not None: + pass + terminationReason = dynamicScenario._checkSimulationTerminationConditions() + if terminationReason is not None: + terminationType = TerminationType.simulationTerminationCondition + pass + if maxSteps and self.simulation.currentTime >= maxSteps: + terminationReason = f'reached time limit ({maxSteps} steps)' + terminationType = TerminationType.timeLimit + pass + + # Compute the actions of the agents in this time step + allActions = OrderedDict() + schedule = self.simulation.scheduleForAgents() + for agent in schedule: + if agent is self.simulation.objects[0]: + continue + + behavior = agent.behavior + if not behavior._runningIterator: # TODO remove hack + behavior._start(agent) + actions = behavior._step() + if isinstance(actions, EndSimulationAction): + terminationReason = str(actions) + terminationType = TerminationType.terminatedByBehavior + break + assert isinstance(actions, tuple) + if len(actions) == 1 and isinstance(actions[0], (list, tuple)): + actions = tuple(actions[0]) + +# if not self.simulation.actionsAreCompatible(agent, actions): +# raise InvalidScenarioError(f'agent {agent} tried incompatible ' +# f' action(s) {actions}') + allActions[agent] = actions + if terminationReason is not None: + break + + # Execute the actions + if self.simulation.verbosity >= 3: + for agent, actions in allActions.items(): + print(f' Agent {agent} takes action(s) {actions}') + actionSequence.append(allActions) + self.simulation.executeActions(allActions) + + # Run the simulation for a single step and read its state back into Scenic + # the step is controlled by safebench instead # +# self.simulation.step() + self.simulation.updateObjects() + self.simulation.currentTime += 1 + + # Package up simulation results into a compact object + # update for every step # + result = SimulationResult(trajectory, actionSequence, terminationType, + terminationReason, self.simulation.records) + self.simulation.result = result + + def endSimulation(self): + # Stop all remaining scenarios + # (and reject if some 'require eventually' condition was never satisfied) + import scenic.syntax.veneer as veneer + for scenario in tuple(veneer.runningScenarios): + scenario._stop('simulation terminated') + + # If the simulation was terminated by an exception (including rejections), + # some scenarios may still be running; we need to clean them up without + # checking their requirements, which could raise rejection exceptions. + + for scenario in tuple(veneer.runningScenarios): + scenario._stop('exception', quiet=True) + + if not hasattr(self, "simulation"): + return + + dynamicScenario = self.simulation.scene.dynamicScenario + values = dynamicScenario._evaluateRecordedExprs(RequirementType.recordFinal) + for name, val in values.items(): + self.simulation.records[name] = val + + ### destroy ### + self.simulation.destroy() + for obj in self.simulation.scene.objects: + disableDynamicProxyFor(obj) + for agent in self.simulation.agents: + if agent.behavior._isRunning: + agent.behavior._stop() + for monitor in self.simulation.scene.monitors: + if monitor._isRunning: + monitor._stop() + veneer.endSimulation(self.simulation) + + def destroy(self): + self.simulator.destroy() + +class Action: + """An :term:`action` which can be taken by an agent for one step of a simulation.""" + def canBeTakenBy(self, agent): + return True + + def applyTo(self, agent, simulation): + raise NotImplementedError + +class EndSimulationAction(Action): + """Special action indicating it is time to end the simulation. + Only for internal use. + """ + def __init__(self, line): + self.line = line + + def __str__(self): + return f'"terminate" executed on line {self.line}' + +class EndScenarioAction(Action): + """Special action indicating it is time to end the current scenario. + Only for internal use. + """ + def __init__(self, line): + self.line = line + + def __str__(self): + return f'"terminate scenario" executed on line {self.line}' + +@enum.unique +class TerminationType(enum.Enum): + """Enum describing the possible ways a simulation can end.""" + #: Simulation reached the specified time limit. + timeLimit = 'reached simulation time limit' + #: The top-level scenario's :keyword:`compose` block finished executing. + scenarioComplete = 'the top-level scenario finished' + #: A user-specified termination condition was met. + simulationTerminationCondition = 'a simulation termination condition was met' + #: A :term:`monitor` used :keyword:`terminate` to end the simulation. + terminatedByMonitor = 'a monitor terminated the simulation' + #: A :term:`dynamic behavior` used :keyword:`terminate` to end the simulation. + terminatedByBehavior = 'a behavior terminated the simulation' + +class SimulationResult: + """Result of running a simulation. + Attributes: + trajectory: A tuple giving for each time step the simulation's 'state': by + default the positions of every object. See `Simulation.currentState`. + finalState: The last 'state' of the simulation, as above. + actions: A tuple giving for each time step a dict specifying for each agent the + (possibly-empty) tuple of actions it took at that time step. + terminationType (`TerminationType`): The way the simulation ended. + terminationReason (str): A human-readable string giving the reason why the + simulation ended, possibly including debugging info. + records (dict): For each :keyword:`record` statement, the value or time series of + values its expression took during the simulation. + """ + def __init__(self, trajectory, actions, terminationType, terminationReason, records): + self.trajectory = tuple(trajectory) + assert self.trajectory + self.finalState = self.trajectory[-1] + self.actions = tuple(actions) + self.terminationType = terminationType + self.terminationReason = str(terminationReason) + self.records = dict(records) diff --git a/scene/safebench/util/torch_util.py b/scene/safebench/util/torch_util.py new file mode 100644 index 00000000..8b0cde9e --- /dev/null +++ b/scene/safebench/util/torch_util.py @@ -0,0 +1,236 @@ +''' +Date: 2023-01-31 22:23:17 +LastEditTime: 2023-03-01 16:56:21 +Description: + Copyright (c) 2022-2023 Safebench Team + + This work is licensed under the terms of the MIT license. + For a copy, see +''' + +import os +from typing import Any, Iterable, Optional + +import numpy as np +import random +import scipy.signal +import torch + + +def combined_shape(length, shape=None): + if shape is None: + return (length, ) + return (length, shape) if np.isscalar(shape) else (length, *shape) + + +def discount_cumsum(x, discount): + r""" + magic from rllab for computing discounted cumulative sums of vectors. + + input: + numpy 1d vector x, [x0, x1, x2] + + output: + [x0 + discount * x1 + discount^2 * x2, x1 + discount * x2, x2] + """ + return scipy.signal.lfilter([1], [1, float(-discount)], x[::-1], axis=0)[::-1] + + +def set_seed(seed=1029): + random.seed(seed) + os.environ['PYTHONHASHSEED'] = str(seed) + np.random.seed(seed) + # torch.use_deterministic_algorithms(True) + torch.manual_seed(seed) + torch.cuda.manual_seed(seed) + torch.cuda.manual_seed_all(seed) # if you are using multi-GPU. + torch.backends.cudnn.benchmark = False + torch.backends.cudnn.deterministic = True + + +def set_torch_variable(device): + device = device.lower() + use_cpu = device == 'cpu' + use_gpu = device.split(':')[0] == 'cuda' + assert use_cpu or use_gpu, 'device must be either cpu or cuda:\{gpu_id\}' + if not torch.cuda.is_available(): + os.environ["MODEL_DEVICE"] = 'cpu' + else: + os.environ["MODEL_DEVICE"] = device + + +def get_torch_device(): + device_name = os.environ.get("MODEL_DEVICE") + try: + return torch.device(device_name) + except: + raise ValueError("'MODEL_DEVICE' env variable has not been specified. Current 'MODEL_DEVICE' env variable is {device_name}") + + +def get_device_name(): + return os.environ.get("MODEL_DEVICE") + + +def to_tensor( + item: Any, + dtype: torch.dtype = torch.float32, + device: Optional[torch.device] = None, + ignore_keys: list = [], + transform_scalar: bool = True, + squeeze=False + ) -> torch.Tensor: + device = get_torch_device() if device is None else device + + def squeeze_tensor(d): + data = torch.tensor(d, dtype=dtype, device=device) + if squeeze: + return torch.squeeze(data) + return data + + if isinstance(item, dict): + new_data = {} + for k, v in item.items(): + if k in ignore_keys: + new_data[k] = v + else: + new_data[k] = to_tensor(v, dtype, device, ignore_keys, transform_scalar, squeeze=squeeze) + return new_data + elif isinstance(item, list) or isinstance(item, tuple): + if len(item) == 0: + return None + return squeeze_tensor(item) + elif isinstance(item, np.ndarray): + return squeeze_tensor(item) + elif isinstance(item, bool) or isinstance(item, str): + return item + elif np.isscalar(item): + if transform_scalar: + return torch.as_tensor(item, device=device).to(dtype) + else: + return item + elif item is None: + return None + elif isinstance(item, torch.Tensor): + return item.to(dtype) + else: + raise TypeError("not support item type: {}".format(type(item))) + + +def to_ndarray(item: Any, dtype: np.dtype=None) -> np.ndarray: + def transform(d): + if dtype is None: + return np.array(d) + else: + return np.array(d, dtype=dtype) + + if isinstance(item, dict): + new_data = {} + for k, v in item.items(): + new_data[k] = to_ndarray(v, dtype) + return new_data + elif isinstance(item, list) or isinstance(item, tuple): + if len(item) == 0: + return None + elif hasattr(item, '_fields'): # namedtuple + return type(item)(*[to_ndarray(t, dtype) for t in item]) + else: + new_data = [] + for t in item: + new_data.append(to_ndarray(t, dtype)) + return new_data + elif isinstance(item, torch.Tensor): + if item.device != 'cpu': + item = item.detach().cpu() + if dtype is None: + return item.numpy() + else: + return item.numpy().astype(dtype) + elif isinstance(item, np.ndarray): + if dtype is None: + return item + else: + return item.astype(dtype) + elif isinstance(item, bool) or isinstance(item, str): + return item + elif np.isscalar(item): + return np.array(item) + elif item is None: + return None + else: + raise TypeError("not support item type: {}".format(type(item))) + + +def to_device(item: Any, device: str=None, ignore_keys: list = []) -> Any: + if device is None: + device = get_device_name() + if isinstance(item, torch.nn.Module): + return item.to(device) + elif isinstance(item, torch.Tensor): + return item.to(device) + elif isinstance(item, dict): + new_item = {} + for k in item.keys(): + if k in ignore_keys: + new_item[k] = item[k] + else: + new_item[k] = to_device(item[k], device) + return new_item + elif isinstance(item, np.ndarray) or isinstance(item, np.bool_): + return item + elif item is None or isinstance(item, str): + return item + elif isinstance(item, list): + return [to_device(k, device) for k in item] + elif isinstance(item, tuple): + return tuple([to_device(k, device) for k in item]) + else: + raise TypeError("not support item type: {}".format(type(item))) + + +def to_dtype(item: Any, dtype: type) -> Any: + if isinstance(item, torch.Tensor): + return item.to(dtype=dtype) + elif isinstance(item, dict): + return {k: to_dtype(item[k], dtype) for k in item.keys()} + else: + raise TypeError("not support item type: {}".format(type(item))) + + +def count_vars(module): + return sum([np.prod(p.shape) for p in module.parameters()]) + + +def CUDA(var): + return var.cuda() if torch.cuda.is_available() else var + + +def CPU(var): + return var.cpu().detach().numpy() + + +def kaiming_init(m): + if isinstance(m, torch.nn.Linear): + torch.nn.init.xavier_normal_(m.weight) + if m.bias is not None: + m.bias.data.fill_(0) + elif isinstance(m, torch.nn.Conv2d) or isinstance(m, torch.nn.ConvTranspose2d): + torch.nn.init.kaiming_normal_(m.weight) + if m.bias is not None: + m.bias.data.fill_(0) + elif isinstance(m, (torch.nn.BatchNorm1d, torch.nn.BatchNorm2d)): + m.weight.data.fill_(1) + if m.bias is not None: + m.bias.data.fill_(0) + + +def hidden_init(layer): + fan_in = layer.weight.data.size()[0] + lim = 1. / np.sqrt(fan_in) + return (-lim, lim) + + +def normal(x, mu, sigma_sq): + pi = CUDA(Variable(torch.FloatTensor([np.pi]))) + a = (-1*(CUDA(Variable(x))-mu).pow(2)/(2*sigma_sq)).exp() + b = 1/(2*sigma_sq*pi.expand_as(sigma_sq)).sqrt() + return a*b