diff --git a/scene/.gitignore b/scene/.gitignore new file mode 100644 index 00000000..a5c0b714 --- /dev/null +++ b/scene/.gitignore @@ -0,0 +1,156 @@ +# 缓存文件 +*.cache +*.tmp +*.log +*.swp +*.swo +__pycache__/ +*.pyc +.DS_Store +Thumbs.db + +# 输出文件 +*.out +*.o +*.obj +*.class +*.bin +*.dll +*.so +*.a +*.lib +*.exe +*.msi +*.msp +*.pdb +*.ilk + +# 编译器生成的文件 +*.sln +*.suo +*.vcxproj +*.vcxproj.filters +*.user +*.vs +*.vspscc +*.vssscc +*.csproj +*.csproj.user +*.csproj.vspscc +*.csproj.vssscc + +# 二进制文件 +*.bin +*.dll +*.exe +*.msi +*.msp +*.pdb +*.ilk +*.so +*.a +*.lib +*.o +*.obj + +# 特定工具或框架的缓存文件 +node_modules/ +bower_components/ +dist/ +build/ +out/ +target/ + +# 特定语言的缓存文件 +# Python +__pycache__/ +*.pyc +*.pyo +*.pyd +*.egg +*.egg-info/ +*.egg-link +*.pydevproject +*.pyc +*.pyo +*.pyd +*.sqlite3 + +# C/C++ +*.o +*.obj +*.a +*.lib +*.so +*.dll +*.exe +*.pdb +*.ilk + +# Java +*.class +*.jar +*.war +*.ear +*.class +*.java~ +*.class~ +*.jar~ +*.war~ +*.ear~ + +# JavaScript +node_modules/ +*.js.map +*.min.js +*.min.js.map + +# MATLAB +*.mat +*.fig +*.mex* +*.dll +*.so +*.a +*.lib + +# 编辑器和 IDE 的临时文件 +*.swp +*.swo +*.swn +*.swo~ +*.swp~ +*.swo~ +*.swn~ +.DS_Store +Thumbs.db +*.bak +*.tmp +*.log +*.cache +*.session +*.sublime-workspace +*.sublime-project +*.vscode/ +*.vscode-insiders/ +*.vscode-remote/ +*.vscode-workspace +*.vscode-insiders-workspace +*.vscode-remote-workspace + +# 其他 +*.dat +*.bin +*.bak +*.tmp +*.log +*.cache +*.session +*.sublime-workspace +*.sublime-project +*.vscode/ +*.vscode-insiders/ +*.vscode-remote/ +*.vscode-workspace +*.vscode-insiders-workspace +*.vscode-remote-workspace \ No newline at end of file diff --git a/scene/retrieve/architecture.py b/scene/retrieve/architecture.py index ebdb3129..5c4e2cdb 100644 --- a/scene/retrieve/architecture.py +++ b/scene/retrieve/architecture.py @@ -2,34 +2,60 @@ import openai import torch import transformers -os.environ["OPENAI_API_KEY"] = 'sk-proj-xxx' - -class LLMChat(): - def __init__(self, model_name = 'meta-llama/Meta-Llama-3-8B-Instruct'): + +# 设置 OpenAI API 密钥 +os.environ["OPENAI_API_KEY"] = 'sk-...s0kA' # 请替换为你的实际 API 密钥 + + +class LLMChat: + def __init__(self, model_name='gpt-4', use_gpu=True): + """ + 初始化聊天模型。 + - model_name: 使用的模型名称(例如 'gpt-4' 或 HuggingFace 模型名称)。 + - use_gpu: 是否使用 GPU(默认是 True)。 + """ super(LLMChat, self).__init__() self.model_name = model_name + self.use_gpu = use_gpu + + # 如果是 GPT 模型,使用 OpenAI API if model_name.startswith('gpt'): - self.client = openai.OpenAI() + openai.api_key = os.environ["OPENAI_API_KEY"] # 设置 OpenAI API 密钥 else: + # 设置 HuggingFace 模型并选择 GPU 或 CPU + self.device = "cuda" if torch.cuda.is_available() and use_gpu else "cpu" self.pipeline = transformers.pipeline( "text-generation", model=model_name, - model_kwargs={"torch_dtype": torch.bfloat16}, - device="cuda", + model_kwargs={"torch_dtype": torch.float32}, # 使用 float32 避免兼容性问题 + device=0 if self.device == "cuda" else -1, # 如果使用 GPU,选择 GPU 设备 ) - def generate(self, messages, max_new_tokens = 500): + def generate(self, messages, max_new_tokens=500): + """ + 生成文本。 + - messages: 输入消息(适用于 GPT 模型和 HuggingFace 模型)。 + - max_new_tokens: 最大生成的 tokens 数量。 + """ if self.model_name.startswith('gpt'): - response = self.client.chat.completions.create( + # 对于 OpenAI GPT 模型,使用新的聊天 API + response = openai.ChatCompletion.create( model=self.model_name, - messages=messages, - temperature=0, + messages=messages, # 将消息数组直接传递给 chat 模型 + temperature=0.7, # 控制生成文本的多样性 + max_tokens=max_new_tokens, ) - return response.choices[0].message.content + return response['choices'][0]['message']['content'].strip() # 返回生成的文本 + else: + # 对于 HuggingFace 模型,使用文本生成 pipeline + if isinstance(messages, str): + # 如果输入是单个字符串,构建消息格式 + messages = [{"role": "user", "content": messages}] + outputs = self.pipeline( - messages, + messages[0]['content'], # 传递用户消息的内容 max_new_tokens=max_new_tokens, - do_sample=False + do_sample=True, # 启用采样,生成更多样的文本 ) - return outputs[0]["generated_text"][-1] + return outputs[0]["generated_text"].strip() # 返回生成的文本 diff --git a/scene/safebench/README.md b/scene/safebench/README.md new file mode 100644 index 00000000..6ee3469f --- /dev/null +++ b/scene/safebench/README.md @@ -0,0 +1,87 @@ +# SafeBench (简化版) + +本目录仅保留了本项目 (ChatScene) 中使用到的 **SafeBench** 相关功能模块,以保证项目小巧、清晰。 + +如果需要完整体验 SafeBench,请参考官方仓库:[SafeBench GitHub](https://github.com/TRI-ML/safebench)。 + +--- + +## 目录结构说明 + +## 🚀 安装方法 + +确保你已经安装了以下依赖: + +```bash +pip install gym pygame + ``` +--- + +## 核心文件功能说明 + +- **`carla_runner.py`** + 直接连接 Carla 仿真器,快速加载静态地图、布置车辆,并执行预定义动作。 + 主要用于 **直接测试简单场景**,无需 Scenic 脚本。 + +- **`scenic_runner.py`** + 读取 `.scenic` 脚本文件(描述静态场景),通过 Scenic 编译后,自动在 Carla 中搭建对应场景并执行仿真。 + 适合用于**自然语言转静态场景**的生成实验。 + +- **`scenic_runner_dynamic.py`** + 支持动态场景(随时间变化的元素,如动态行人、车辆转向等),可以加载更复杂的 `.scenic` 动态脚本,在 Carla 中实时生成并控制场景。 + 主要用于**自然语言生成动态场景**,并仿真运行。 + +--- + +## 使用方法 + +1. **Carla环境初始化** + - 需要提前启动 Carla Server,确保版本为 0.9.15。 + - 建议使用命令行启动: + ```bash + ./CarlaUE4.sh -quality-level=Low + ``` + +2. **运行 carla_runner.py** + - 示例命令: + ```bash + python safebench/carla_runner.py + ``` + - 默认加载固定场景,可修改内部配置来指定地图、交通参与者等。 + +3. **运行 scenic_runner.py** + - 先准备一个静态 `.scenic` 场景文件。 + - 运行示例: + ```bash + python safebench/scenic_runner.py --scenario_file ./your_scenario.scenic + ``` + - 程序会自动编译 Scenic 脚本并在 Carla 中布置场景。 + +4. **运行 scenic_runner_dynamic.py** + - 适合用于需要生成动态行为的场景。 + - 运行示例: + ```bash + python safebench/scenic_runner_dynamic.py --scenario_file ./your_dynamic_scenario.scenic + ``` + - 可以控制仿真时间、动作脚本等。 + +--- +scenario + +## 注意事项 + +- 本项目只保留了必要模块,因此**不支持** SafeBench 原版完整功能(如 Benchmark评估、对抗攻击等)。 +- 仅适配 **Carla 0.9.15 + Python 3.7/3.8** 环境。 +- Scenic 脚本需符合正确的语法规范,否则可能编译失败。 + +--- + +## 参考资料 + +- [SafeBench 官方仓库](https://github.com/TRI-ML/safebench) +- [CARLA Simulator](https://carla.org/) +- [Scenic Language 官方文档](https://scenic-lang.readthedocs.io/) + +--- + +> 本目录仅作为 ChatScene 项目的辅助模块,建议如需进一步扩展测试功能,可参考原版 SafeBench 框架进行完善。 diff --git a/scene/safebench/carla_runner.py b/scene/safebench/carla_runner.py new file mode 100644 index 00000000..51d60717 --- /dev/null +++ b/scene/safebench/carla_runner.py @@ -0,0 +1,437 @@ +''' +Date: 2023-01-31 22:23:17 +LastEditTime: 2023-04-03 22:35:17 +Description: + Copyright (c) 2022-2023 Safebench Team + + This work is licensed under the terms of the MIT license. + For a copy, see +''' +import os +import copy +import glob + +import numpy as np +import carla +import pygame +from tqdm import tqdm + +from safebench.gym_carla.env_wrapper import VectorWrapper +from safebench.gym_carla.envs.render import BirdeyeRender +from safebench.gym_carla.replay_buffer import RouteReplayBuffer, PerceptionReplayBuffer + +from safebench.agent import AGENT_POLICY_LIST +from safebench.scenario import SCENARIO_POLICY_LIST + +from safebench.scenario.scenario_manager.carla_data_provider import CarlaDataProvider +from safebench.scenario.scenario_data_loader import ScenarioDataLoader +from safebench.scenario.tools.scenario_utils import scenario_parse + +from safebench.util.logger import Logger, setup_logger_kwargs +from safebench.util.metric_util import get_route_scores, get_perception_scores + + +class CarlaRunner: + def __init__(self, agent_config, scenario_config): + self.scenario_config = scenario_config + self.agent_config = agent_config + + self.seed = scenario_config['seed'] + self.exp_name = scenario_config['exp_name'] + self.output_dir = scenario_config['output_dir'] + self.mode = scenario_config['mode'] + self.save_video = scenario_config['save_video'] + + self.render = scenario_config['render'] + self.num_scenario = scenario_config['num_scenario'] + self.fixed_delta_seconds = scenario_config['fixed_delta_seconds'] + self.scenario_category = scenario_config['scenario_category'] + + # continue training flag + self.continue_agent_training = scenario_config['continue_agent_training'] + self.continue_scenario_training = scenario_config['continue_scenario_training'] + + # apply settings to carla + self.client = carla.Client('localhost', scenario_config['port']) + self.client.set_timeout(10.0) + self.world = None + self.env = None + + self.env_params = { + 'auto_ego': scenario_config['auto_ego'], + 'obs_type': agent_config['obs_type'], + 'scenario_category': self.scenario_category, + 'ROOT_DIR': scenario_config['ROOT_DIR'], + 'warm_up_steps': 9, # number of ticks after spawning the vehicles + 'disable_lidar': True, # show bird-eye view lidar or not + 'display_size': 128, # screen size of one bird-eye view window + 'obs_range': 32, # observation range (meter) + 'd_behind': 12, # distance behind the ego vehicle (meter) + 'max_past_step': 1, # the number of past steps to draw + 'discrete': False, # whether to use discrete control space + 'discrete_acc': [-3.0, 0.0, 3.0], # discrete value of accelerations + 'discrete_steer': [-0.2, 0.0, 0.2], # discrete value of steering angles + 'continuous_accel_range': [-3.0, 3.0], # continuous acceleration range + 'continuous_steer_range': [-0.3, 0.3], # continuous steering angle range + 'max_episode_step': scenario_config['max_episode_step'], # maximum timesteps per episode + 'max_waypt': 12, # maximum number of waypoints + 'lidar_bin': 0.125, # bin size of lidar sensor (meter) + 'out_lane_thres': 4, # threshold for out of lane (meter) + 'desired_speed': 8, # desired speed (m/s) + 'image_sz': 1024, # TODO: move to config of od scenario + } + + # pass config from scenario to agent + agent_config['mode'] = scenario_config['mode'] + agent_config['ego_action_dim'] = scenario_config['ego_action_dim'] + agent_config['ego_state_dim'] = scenario_config['ego_state_dim'] + agent_config['ego_action_limit'] = scenario_config['ego_action_limit'] + + # define logger + logger_kwargs = setup_logger_kwargs( + self.exp_name, + self.output_dir, + self.seed, + agent=agent_config['policy_type'], + scenario=scenario_config['policy_type'], + scenario_category=self.scenario_category + ) + self.logger = Logger(**logger_kwargs) + + # prepare parameters + if self.mode == 'train_agent': + self.buffer_capacity = agent_config['buffer_capacity'] + self.eval_in_train_freq = agent_config['eval_in_train_freq'] + self.save_freq = agent_config['save_freq'] + self.train_episode = agent_config['train_episode'] + self.current_episode = -1 + self.logger.save_config(agent_config) + self.logger.create_training_dir() + elif self.mode == 'train_scenario': + self.buffer_capacity = scenario_config['buffer_capacity'] + self.eval_in_train_freq = scenario_config['eval_in_train_freq'] + self.save_freq = scenario_config['save_freq'] + self.train_episode = scenario_config['train_episode'] + self.logger.save_config(scenario_config) + self.logger.create_training_dir() + elif self.mode == 'eval': + self.save_freq = scenario_config['save_freq'] + self.logger.log('>> Evaluation Mode, skip config saving', 'yellow') + self.logger.create_eval_dir(load_existing_results=True) + else: + raise NotImplementedError(f"Unsupported mode: {self.mode}.") + + # define agent and scenario + self.logger.log('>> Agent Policy: ' + agent_config['policy_type']) + self.logger.log('>> Scenario Policy: ' + scenario_config['policy_type']) + + if self.scenario_config['auto_ego']: + self.logger.log('>> Using auto-polit for ego vehicle, action of policy will be ignored', 'yellow') + if scenario_config['policy_type'] == 'ordinary' and self.mode != 'train_agent': + self.logger.log('>> Ordinary scenario can only be used in agent training', 'red') + raise Exception() + self.logger.log('>> ' + '-' * 40) + + # define agent and scenario policy + self.agent_policy = AGENT_POLICY_LIST[agent_config['policy_type']](agent_config, logger=self.logger) + self.scenario_policy = SCENARIO_POLICY_LIST[scenario_config['policy_type']](scenario_config, logger=self.logger) + if self.save_video: + assert self.mode == 'eval', "only allow video saving in eval mode" + self.logger.init_video_recorder() + + def _init_world(self, town): + self.logger.log(f">> Initializing carla world: {town}") + self.world = self.client.load_world(town) + settings = self.world.get_settings() + settings.synchronous_mode = True + settings.fixed_delta_seconds = self.fixed_delta_seconds + self.world.apply_settings(settings) + CarlaDataProvider.set_client(self.client) + CarlaDataProvider.set_world(self.world) + CarlaDataProvider.set_traffic_manager_port(self.scenario_config['tm_port']) + self.world.set_weather(carla.WeatherParameters.ClearNoon) + + def _init_renderer(self): + self.logger.log(">> Initializing pygame birdeye renderer") + pygame.init() + flag = pygame.HWSURFACE | pygame.DOUBLEBUF + if not self.render: + flag = flag | pygame.HIDDEN + if self.scenario_category == 'planning': + # [bird-eye view, Lidar, front view] or [bird-eye view, front view] + if self.env_params['disable_lidar']: + window_size = (self.env_params['display_size'] * 2, self.env_params['display_size'] * self.num_scenario) + else: + window_size = (self.env_params['display_size'] * 3, self.env_params['display_size'] * self.num_scenario) + else: + window_size = (self.env_params['display_size'], self.env_params['display_size'] * self.num_scenario) + self.display = pygame.display.set_mode(window_size, flag) + + # initialize the render for generating observation and visualization + pixels_per_meter = self.env_params['display_size'] / self.env_params['obs_range'] + pixels_ahead_vehicle = (self.env_params['obs_range'] / 2 - self.env_params['d_behind']) * pixels_per_meter + self.birdeye_params = { + 'screen_size': [self.env_params['display_size'], self.env_params['display_size']], + 'pixels_per_meter': pixels_per_meter, + 'pixels_ahead_vehicle': pixels_ahead_vehicle, + } + self.birdeye_render = BirdeyeRender(self.world, self.birdeye_params, logger=self.logger) + + def train(self, data_loader, start_episode=0, replay_buffer = None): + # general buffer for both agent and scenario + + for _ in tqdm(range(len(data_loader))): + self.current_episode += 1 + if self.current_episode >= self.train_episode: + return + if self.current_episode < start_episode: + continue + # sample scenarios + sampled_scenario_configs, _ = data_loader.sampler() + # reset the index counter to create endless loader + # data_loader.reset_idx_counter() + + # get static obs and then reset with init action + static_obs = self.env.get_static_obs(sampled_scenario_configs) + self.scenario_policy.load_model(sampled_scenario_configs) + scenario_init_action, additional_dict = self.scenario_policy.get_init_action(static_obs) + try: + obs, infos = self.env.reset(sampled_scenario_configs, scenario_init_action) + except: + continue + replay_buffer.store_init([static_obs, scenario_init_action], additional_dict=additional_dict) + + # get ego vehicle from scenario + self.agent_policy.set_ego_and_route(self.env.get_ego_vehicles(), infos) + + # start loop + episode_reward = [] + while not self.env.all_scenario_done(): + # get action from agent policy and scenario policy (assume using one batch) + ego_actions = self.agent_policy.get_action(obs, infos, deterministic=False) + scenario_actions = self.scenario_policy.get_action(obs, infos, deterministic=False) + + # apply action to env and get obs + next_obs, rewards, dones, infos = self.env.step(ego_actions=ego_actions, scenario_actions=scenario_actions) + replay_buffer.store([ego_actions, scenario_actions, obs, next_obs, rewards, dones], additional_dict=infos) + obs = copy.deepcopy(next_obs) + episode_reward.append(np.mean(rewards)) + + # train off-policy agent or scenario + if self.mode == 'train_agent' and self.agent_policy.type == 'offpolicy': + loss = self.agent_policy.train(replay_buffer) + elif self.mode == 'train_scenario' and self.scenario_policy.type == 'offpolicy': + self.scenario_policy.train(replay_buffer) + + score_function = get_route_scores if self.scenario_category in ['planning', 'scenic'] else get_perception_scores + all_scores = score_function(self.env.running_results) + + # end up environment + self.env.clean_up() + replay_buffer.finish_one_episode() + self.logger.add_training_results('episode', self.current_episode) + self.logger.add_training_results('episode_reward', np.sum(episode_reward)) + for key, value in all_scores.items(): + self.logger.add_training_results(key, value) + + if loss is not None: + critic_loss, actor_loss = loss + self.logger.add_training_results('critic_loss', critic_loss) + self.logger.add_training_results('actor_loss', actor_loss) + else: + critic_loss, actor_loss = 0, 0 + self.logger.add_training_results('critic_loss', critic_loss) + self.logger.add_training_results('actor_loss', actor_loss) + self.logger.log(f">> Episode: {self.current_episode}, #buffer_len: {replay_buffer.buffer_len}, critic: {critic_loss:.3f}, actor: {actor_loss:.3f}") + self.logger.save_training_results() + + # train on-policy agent or scenario + if self.mode == 'train_agent' and self.agent_policy.type == 'onpolicy': + self.agent_policy.train(replay_buffer) + elif self.mode == 'train_scenario' and self.scenario_policy.type in ['init_state', 'onpolicy']: + self.scenario_policy.train(replay_buffer) + + # eval during training + if (self.current_episode+1) % self.eval_in_train_freq == 0: + #self.eval(env, data_loader) + pass + + # save checkpoints + if (self.current_episode+1) % self.save_freq == 0: + if self.mode == 'train_agent': + self.agent_policy.save_model(self.current_episode, replay_buffer) + if self.mode == 'train_scenario': + self.scenario_policy.save_model(self.current_episode) + + def eval(self, data_loader): + num_finished_scenario = 0 + data_loader.reset_idx_counter() + while len(data_loader) > 0: + # sample scenarios + sampled_scenario_configs, num_sampled_scenario = data_loader.sampler() + log_name = f'ROUTE-{sampled_scenario_configs[0].route_id-4}' + num_finished_scenario += num_sampled_scenario + + # reset envs with new config, get init action from scenario policy, and run scenario + static_obs = self.env.get_static_obs(sampled_scenario_configs) + self.scenario_policy.load_model(sampled_scenario_configs) + scenario_init_action, _ = self.scenario_policy.get_init_action(static_obs, deterministic=True) + try: + obs, infos = self.env.reset(sampled_scenario_configs, scenario_init_action) + except: + continue + # get ego vehicle from scenario + self.agent_policy.set_ego_and_route(self.env.get_ego_vehicles(), infos) + + score_list = {s_i: [] for s_i in range(num_sampled_scenario)} + while not self.env.all_scenario_done(): + # get action from agent policy and scenario policy (assume using one batch) + ego_actions = self.agent_policy.get_action(obs, infos, deterministic=True) + scenario_actions = self.scenario_policy.get_action(obs, infos, deterministic=True) + + # apply action to env and get obs + obs, rewards, _, infos = self.env.step(ego_actions=ego_actions, scenario_actions=scenario_actions) + + # save video + if self.save_video: + if self.scenario_category == 'planning': + self.logger.add_frame(pygame.surfarray.array3d(self.display).transpose(1, 0, 2)) + else: + self.logger.add_frame({s_i['scenario_id']: ego_actions[n_i]['annotated_image'] for n_i, s_i in enumerate(infos)}) + + # accumulate scores of corresponding scenario + reward_idx = 0 + for s_i in infos: + score = rewards[reward_idx] if self.scenario_category == 'planning' else 1-infos[reward_idx]['iou_loss'] + score_list[s_i['scenario_id']].append(score) + reward_idx += 1 + + # clean up all things + self.logger.log(">> All scenarios are completed. Clearning up all actors") + self.env.clean_up() + + # save video + if self.save_video: + data_ids = [config.data_id for config in sampled_scenario_configs] + self.logger.save_video(data_ids=data_ids) + + # print score for ranking + self.logger.log(f'[{num_finished_scenario}/{data_loader.num_total_scenario}] Ranking scores for batch scenario:', 'yellow') + for s_i in score_list.keys(): + self.logger.log('\t Env id ' + str(s_i) + ': ' + str(np.mean(score_list[s_i])), 'yellow') + + + # calculate evaluation results + score_function = get_route_scores if self.scenario_category == 'planning' else get_perception_scores + all_running_results = self.logger.add_eval_results(records=self.env.running_results) + all_scores = score_function(all_running_results) + self.logger.add_eval_results(scores=all_scores) + self.logger.print_eval_results() + if len(self.env.running_results) % self.save_freq == 0: + self.logger.save_eval_results(log_name) + self.logger.save_eval_results(log_name) + + def run(self, test_epoch = None): + # get scenario data of different maps + config_by_map = scenario_parse(self.scenario_config, self.logger) + + map_keys = list(config_by_map.keys()) + if self.mode == 'train_agent': + Buffer = RouteReplayBuffer if self.scenario_category == 'planning' else PerceptionReplayBuffer + replay_buffer = Buffer(self.num_scenario, self.mode, self.buffer_capacity) + map_keys = map_keys * 20 + if self.continue_agent_training: + self.logger.load_training_results() + start_episode = self.check_continue_training(self.agent_policy, replay_buffer) + 1 + if start_episode >= self.train_episode: + return + else: + self.clean_cache(self.agent_policy.model_path) + start_episode = -1 + else: + self.agent_policy.load_model(episode=test_epoch) + + for m_i in map_keys: + if self.mode == 'eval': + log_name = f'ROUTE-{config_by_map[m_i][0].route_id-4}' + if self.logger.check_eval_dir(log_name) == len(config_by_map[m_i]): + self.logger.log(f">> This scenario and route have been done.") + continue + elif self.mode == 'train_agent': + if self.current_episode >= self.train_episode - 1: + return + + if self.current_episode + len(config_by_map[m_i]) < start_episode: + self.current_episode += len(config_by_map[m_i]) + continue + + # initialize map and render + try: + self._init_world(m_i) + self._init_renderer() + except: + continue + + # create scenarios within the vectorized wrapper + self.env = VectorWrapper( + self.env_params, + self.scenario_config, + self.world, + self.birdeye_render, + self.display, + self.logger + ) + + # prepare data loader and buffer + data_loader = ScenarioDataLoader(config_by_map[m_i], self.num_scenario, m_i, self.world) + + # run with different modes + + if self.mode == 'eval': +# self.agent_policy.load_model(episode=test_epoch) + # self.scenario_policy.load_model() + self.agent_policy.set_mode('eval') + self.scenario_policy.set_mode('eval') + self.eval(data_loader) + elif self.mode == 'train_agent': +# start_episode = self.check_continue_training(self.agent_policy) +# self.scenario_policy.load_model() + self.agent_policy.set_mode('train') + self.scenario_policy.set_mode('eval') + self.train(data_loader, start_episode, replay_buffer) + elif self.mode == 'train_scenario': + start_episode = self.check_continue_training(self.scenario_policy) + self.agent_policy.load_model() + self.agent_policy.set_mode('eval') + self.scenario_policy.set_mode('train') + self.train(data_loader, start_episode) + else: + raise NotImplementedError(f"Unsupported mode: {self.mode}.") + + def check_continue_training(self, policy, replay_buffer): + # load previous checkpoint + policy.load_model(replay_buffer = replay_buffer) + if policy.continue_episode == 0: + start_episode = 0 + self.logger.log('>> Previous checkpoint not found. Training from scratch.') + else: + start_episode = policy.continue_episode + self.logger.log(f'>> Continue training from previous checkpoint, epoch: {start_episode}.') + return start_episode + + def clean_cache(self, path): + # Get a list of all files in directory + all_files = glob.glob(os.path.join(path, '*')) + + # Specify the file to keep + file_to_keep = os.path.join(path, 'model.sac.-001.torch') + + # Remove all files except the one to keep + for file in all_files: + if file != file_to_keep: + os.remove(file) + + def close(self): + pygame.quit() + if self.env: + self.env.clean_up() diff --git a/scene/safebench/scenic_runner.py b/scene/safebench/scenic_runner.py new file mode 100644 index 00000000..096c1c72 --- /dev/null +++ b/scene/safebench/scenic_runner.py @@ -0,0 +1,552 @@ +''' +Date: 2023-01-31 22:23:17 +LastEditTime: 2023-03-07 12:28:17 +Description: + Copyright (c) 2022-2023 Safebench Team + + This work is licensed under the terms of the MIT license. + For a copy, see +''' + +import copy +import os +import json +import glob +import random + +import numpy as np +import carla +import pygame +from tqdm import tqdm + +from safebench.gym_carla.env_wrapper import VectorWrapper +from safebench.gym_carla.envs.render import BirdeyeRender +from safebench.gym_carla.replay_buffer import RouteReplayBuffer, PerceptionReplayBuffer + +from safebench.agent import AGENT_POLICY_LIST +from safebench.scenario import SCENARIO_POLICY_LIST + +from safebench.scenario.scenario_manager.carla_data_provider import CarlaDataProvider +from safebench.scenario.scenario_data_loader import ScenarioDataLoader, ScenicDataLoader +from safebench.scenario.tools.scenario_utils import scenario_parse, scenic_parse + +from safebench.util.logger import Logger, setup_logger_kwargs +from safebench.util.metric_util import get_route_scores, get_perception_scores +from safebench.util.scenic_utils import ScenicSimulator + +class ScenicRunner: + def __init__(self, agent_config, scenario_config): + self.scenario_config = scenario_config + self.agent_config = agent_config + + self.seed = scenario_config['seed'] + self.exp_name = scenario_config['exp_name'] + self.output_dir = scenario_config['output_dir'] + self.mode = scenario_config['mode'] + self.save_video = scenario_config['save_video'] + + self.render = scenario_config['render'] + self.num_scenario = scenario_config['num_scenario'] + self.fixed_delta_seconds = scenario_config['fixed_delta_seconds'] + self.scenario_category = scenario_config['scenario_category'] + + # continue training flag + self.continue_agent_training = scenario_config['continue_agent_training'] + self.continue_scenario_training = scenario_config['continue_scenario_training'] + + # apply settings to carla + self.client = carla.Client('localhost', scenario_config['port']) + self.client.set_timeout(10.0) + self.world = None + self.env = None + + self.env_params = { + 'auto_ego': scenario_config['auto_ego'], + 'obs_type': agent_config['obs_type'], + 'scenario_category': self.scenario_category, + 'ROOT_DIR': scenario_config['ROOT_DIR'], + 'warm_up_steps': 9, # number of ticks after spawning the vehicles + 'disable_lidar': True, # show bird-eye view lidar or not + 'display_size': 128, # screen size of one bird-eye view window + 'obs_range': 32, # observation range (meter) + 'd_behind': 12, # distance behind the ego vehicle (meter) + 'max_past_step': 1, # the number of past steps to draw + 'discrete': False, # whether to use discrete control space + 'discrete_acc': [-3.0, 0.0, 3.0], # discrete value of accelerations + 'discrete_steer': [-0.2, 0.0, 0.2], # discrete value of steering angles + 'continuous_accel_range': [-3.0, 3.0], # continuous acceleration range + 'continuous_steer_range': [-0.3, 0.3], # continuous steering angle range + 'max_episode_step': scenario_config['max_episode_step'], # maximum timesteps per episode + 'max_waypt': 12, # maximum number of waypoints + 'lidar_bin': 0.125, # bin size of lidar sensor (meter) + 'out_lane_thres': 4, # threshold for out of lane (meter) + 'desired_speed': 8, # desired speed (m/s) + 'image_sz': 1024, # TODO: move to config of od scenario + } + + + # pass config from scenario to agent + agent_config['mode'] = scenario_config['mode'] + agent_config['ego_action_dim'] = scenario_config['ego_action_dim'] + agent_config['ego_state_dim'] = scenario_config['ego_state_dim'] + agent_config['ego_action_limit'] = scenario_config['ego_action_limit'] + + # define logger + logger_kwargs = setup_logger_kwargs( + self.exp_name, + self.output_dir, + self.seed, + agent=agent_config['policy_type'], + scenario=scenario_config['policy_type'], + scenario_category=self.scenario_category + ) + self.logger = Logger(**logger_kwargs) + + # prepare parameters + if self.mode == 'train_agent': + self.buffer_capacity = agent_config['buffer_capacity'] + self.eval_in_train_freq = agent_config['eval_in_train_freq'] + self.save_freq = agent_config['save_freq'] + self.train_episode = agent_config['train_episode'] + self.current_episode = -1 + self.logger.save_config(agent_config) + self.logger.create_training_dir() + elif self.mode == 'train_scenario': + self.save_freq = agent_config['save_freq'] + self.logger.create_eval_dir(load_existing_results=False) + elif self.mode == 'eval': + self.save_freq = agent_config['save_freq'] + self.logger.log('>> Evaluation Mode, skip config saving', 'yellow') + self.logger.create_eval_dir(load_existing_results=False) + else: + raise NotImplementedError(f"Unsupported mode: {self.mode}.") + + # define agent and scenario + self.logger.log('>> Agent Policy: ' + agent_config['policy_type']) + self.logger.log('>> Scenario Policy: ' + scenario_config['policy_type']) + + if self.scenario_config['auto_ego']: + self.logger.log('>> Using auto-polit for ego vehicle, action of policy will be ignored', 'yellow') + if scenario_config['policy_type'] == 'ordinary' and self.mode != 'train_agent': + self.logger.log('>> Ordinary scenario can only be used in agent training', 'red') + raise Exception() + self.logger.log('>> ' + '-' * 40) + + # define agent and scenario policy + self.agent_policy = AGENT_POLICY_LIST[agent_config['policy_type']](agent_config, logger=self.logger) + self.scenario_policy = SCENARIO_POLICY_LIST[scenario_config['policy_type']](scenario_config, logger=self.logger) + if self.save_video: + assert self.mode == 'eval', "only allow video saving in eval mode" + self.logger.init_video_recorder() + + def _init_world(self): + self.logger.log(">> Initializing carla world") + self.world = self.client.get_world() + settings = self.world.get_settings() + settings.synchronous_mode = True + settings.fixed_delta_seconds = self.fixed_delta_seconds + self.world.apply_settings(settings) + CarlaDataProvider.set_client(self.client) + CarlaDataProvider.set_world(self.world) + CarlaDataProvider.set_traffic_manager_port(self.scenario_config['tm_port']) + + def _init_scenic(self, config): + self.logger.log(f">> Initializing scenic simulator: {config.scenic_file}") + self.scenic = ScenicSimulator(config.scenic_file, config.extra_params) + + def _init_renderer(self): + self.logger.log(">> Initializing pygame birdeye renderer") + pygame.init() + flag = pygame.HWSURFACE | pygame.DOUBLEBUF + if not self.render: + flag = flag | pygame.HIDDEN + if self.scenario_category in ['planning', 'scenic']: + # [bird-eye view, Lidar, front view] or [bird-eye view, front view] + if self.env_params['disable_lidar']: + window_size = (self.env_params['display_size'] * 2, self.env_params['display_size'] * self.num_scenario) + else: + window_size = (self.env_params['display_size'] * 3, self.env_params['display_size'] * self.num_scenario) + else: + window_size = (self.env_params['display_size'], self.env_params['display_size'] * self.num_scenario) + self.display = pygame.display.set_mode(window_size, flag) + + # initialize the render for generating observation and visualization + pixels_per_meter = self.env_params['display_size'] / self.env_params['obs_range'] + pixels_ahead_vehicle = (self.env_params['obs_range'] / 2 - self.env_params['d_behind']) * pixels_per_meter + self.birdeye_params = { + 'screen_size': [self.env_params['display_size'], self.env_params['display_size']], + 'pixels_per_meter': pixels_per_meter, + 'pixels_ahead_vehicle': pixels_ahead_vehicle, + } + self.birdeye_render = BirdeyeRender(self.world, self.birdeye_params, logger=self.logger) + + def run_scenes(self, scenes): + self.logger.log(f">> Begin to run the scene...") + ## currently there is only one scene in this list ## + for scene in scenes: + if self.scenic.setSimulation(scene): + self.scenic.update_behavior = self.scenic.runSimulation() + next(self.scenic.update_behavior) + + def train(self, data_loader, start_episode=0, replay_buffer = None): + # general buffer for both agent and scenario + + for _ in tqdm(range(len(data_loader))): + self.current_episode += 1 + if self.current_episode >= self.train_episode: + return + if self.current_episode < start_episode: + continue + # sample scenarios + sampled_scenario_configs, _ = data_loader.sampler() + # reset the index counter to create endless loader + # data_loader.reset_idx_counter() + + scenes = [config.scene for config in sampled_scenario_configs] + # begin to run the scene + self.run_scenes(scenes) + + # get static obs and then reset with init action + static_obs = self.env.get_static_obs(sampled_scenario_configs) + self.scenario_policy.load_model(sampled_scenario_configs) + scenario_init_action, additional_dict = self.scenario_policy.get_init_action(static_obs) + obs, infos = self.env.reset(sampled_scenario_configs, scenario_init_action) + replay_buffer.store_init([static_obs, scenario_init_action], additional_dict=additional_dict) + + # get ego vehicle from scenario + self.agent_policy.set_ego_and_route(self.env.get_ego_vehicles(), infos) + + # start loop + episode_reward = [] + while not self.env.all_scenario_done(): + # get action from agent policy and scenario policy (assume using one batch) + ego_actions = self.agent_policy.get_action(obs, infos, deterministic=False) + scenario_actions = self.scenario_policy.get_action(obs, infos, deterministic=False) + + # apply action to env and get obs + next_obs, rewards, dones, infos = self.env.step(ego_actions=ego_actions, scenario_actions=scenario_actions) + replay_buffer.store([ego_actions, scenario_actions, obs, next_obs, rewards, dones], additional_dict=infos) + obs = copy.deepcopy(next_obs) + episode_reward.append(np.mean(rewards)) + + # train off-policy agent or scenario + if self.mode == 'train_agent' and self.agent_policy.type == 'offpolicy': + loss = self.agent_policy.train(replay_buffer) + elif self.mode == 'train_scenario' and self.scenario_policy.type == 'offpolicy': + self.scenario_policy.train(replay_buffer) + + score_function = get_route_scores if self.scenario_category in ['planning', 'scenic'] else get_perception_scores + all_scores = score_function(self.env.running_results) + + # end up environment + self.env.clean_up() + replay_buffer.finish_one_episode() + self.logger.add_training_results('episode', self.current_episode) + self.logger.add_training_results('episode_reward', np.sum(episode_reward)) + for key, value in all_scores.items(): + self.logger.add_training_results(key, value) + if loss is not None: + critic_loss, actor_loss = loss + self.logger.add_training_results('critic_loss', critic_loss) + self.logger.add_training_results('actor_loss', actor_loss) + else: + critic_loss, actor_loss = 0, 0 + self.logger.add_training_results('critic_loss', critic_loss) + self.logger.add_training_results('actor_loss', actor_loss) + self.logger.log(f">> Episode: {self.current_episode}, #buffer_len: {replay_buffer.buffer_len}, critic: {critic_loss:.3f}, actor: {actor_loss:.3f}") + self.logger.save_training_results() + + # train on-policy agent or scenario + if self.mode == 'train_agent' and self.agent_policy.type == 'onpolicy': + self.agent_policy.train(replay_buffer) + elif self.mode == 'train_scenario' and self.scenario_policy.type in ['init_state', 'onpolicy']: + self.scenario_policy.train(replay_buffer) + + # eval during training + if (self.current_episode+1) % self.eval_in_train_freq == 0: + #self.eval(env, data_loader) + pass + + # save checkpoints + if (self.current_episode+1) % self.save_freq == 0: + if self.mode == 'train_agent': + self.agent_policy.save_model(self.current_episode, replay_buffer) + if self.mode == 'train_scenario': + self.scenario_policy.save_model(self.current_episode) + + self.scenic.destroy() + + def eval(self, data_loader, select = False): + num_finished_scenario = 0 + data_loader.reset_idx_counter() + # recording the score and the id of corresponding selected scenes + map_id_score = {} + behavior_name = data_loader.behavior + route_id = data_loader.route_id + opt_step = data_loader.opt_step + opt_time = 0 + + if route_id is None: + log_name = f'OPT_{behavior_name}' + else: + log_name = f'OPT_{behavior_name}_ROUTE-{route_id}' + + if select: + self.scene_map[log_name] = {} + self.scene_map[log_name][f'opt_time_{opt_time}'] = self.scenic.save_params() + + while len(data_loader) > 0: + # sample scenarios + sampled_scenario_configs, num_sampled_scenario = data_loader.sampler() + num_finished_scenario += num_sampled_scenario + assert num_sampled_scenario == 1, 'scenic can only run one scene at one time' + + scenes = [config.scene for config in sampled_scenario_configs] + # begin to run the scene + self.run_scenes(scenes) + + # reset envs with new config, get init action from scenario policy, and run scenario + static_obs = self.env.get_static_obs(sampled_scenario_configs) + self.scenario_policy.load_model(sampled_scenario_configs) + scenario_init_action, _ = self.scenario_policy.get_init_action(static_obs, deterministic=True) + obs, infos = self.env.reset(sampled_scenario_configs, scenario_init_action) + + # get ego vehicle from scenario + self.agent_policy.set_ego_and_route(self.env.get_ego_vehicles(), infos) + + score_list = {s_i: [] for s_i in range(num_sampled_scenario)} + while not self.env.all_scenario_done(): + # get action from agent policy and scenario policy (assume using one batch) + ego_actions = self.agent_policy.get_action(obs, infos, deterministic=True) + scenario_actions = self.scenario_policy.get_action(obs, infos, deterministic=True) + + # apply action to env and get obs + obs, rewards, _, infos = self.env.step(ego_actions=ego_actions, scenario_actions=scenario_actions) + + # save video + if self.save_video: + if self.scenario_category in ['planning', 'scenic']: + self.logger.add_frame(pygame.surfarray.array3d(self.display).transpose(1, 0, 2)) + else: + self.logger.add_frame({s_i['scenario_id']: ego_actions[n_i]['annotated_image'] for n_i, s_i in enumerate(infos)}) + + # accumulate scores of corresponding scenario + reward_idx = 0 + for s_i in infos: + score = rewards[reward_idx] if self.scenario_category in ['planning', 'scenic'] else 1-infos[reward_idx]['iou_loss'] + score_list[s_i['scenario_id']].append(score) + reward_idx += 1 + + # clean up all things + self.logger.log(">> All scenarios are completed. Clearning up all actors") + self.env.clean_up() + + # save video + if self.save_video: + data_ids = [config.data_id for config in sampled_scenario_configs] + self.logger.save_video(data_ids=data_ids, log_name= log_name) + + # print score for ranking + self.logger.log(f'[{num_finished_scenario}/{data_loader.num_total_scenario}] Ranking scores for batch scenario:', color='yellow') + for s_i in score_list.keys(): + self.logger.log('\t Env id ' + str(s_i) + ': ' + str(np.mean(score_list[s_i])), color='yellow') + + # calculate evaluation results + score_function = get_route_scores if self.scenario_category in ['planning', 'scenic'] else get_perception_scores + all_running_results = self.logger.add_eval_results(records=self.env.running_results) + all_scores = score_function(all_running_results) + self.logger.add_eval_results(scores=all_scores) + self.logger.print_eval_results() + if len(self.env.running_results) % self.save_freq == 0: + self.logger.save_eval_results(log_name) + + if infos[0]['collision']: + self.scenic.record_params() + if select and (num_finished_scenario % opt_step == 0): + opt_time += 1 + self.scenic.update_params() + self.scene_map[log_name][f'opt_time_{opt_time}'] = self.scenic.save_params() + data_loader.train_scene(opt_time) + + self.logger.save_eval_results(log_name) + + if select: + self.scene_map[log_name]['select_id'] = self.select_adv_scene(self.logger.eval_records, score_function, data_loader.select_num) + self.dump_scene_map(sampled_scenario_configs[0].scenario_id) + + self.logger.clear() + self.scenic.destroy() + + def select_adv_scene(self, results, score_function, select_num): + # define your own selection mechanism here + map_id_score_collision = {} + map_id_score_non_collision = {} + for i in results.keys(): + score = score_function({i:results[i]}) + if score['collision_rate'] == 1: + map_id_score_collision[i] = score['final_score'] + else: + map_id_score_non_collision[i] = score['final_score'] + + # Sort the collision scenes by their scores + collision_scenes_sorted = sorted(map_id_score_collision.items(), key=lambda x: x[1]) + + # Get the number of scenes to select from the collision cases + num_collision_selected = min(select_num, len(collision_scenes_sorted)) + + # Select the lowest scored scenes with collision + selected_scene_id = [scene[0] for scene in collision_scenes_sorted[:num_collision_selected]] + + # If not enough collision scenes, select remaining scenes + num_non_collision_selected = select_num - num_collision_selected + if num_non_collision_selected > 0: + # Sort the non-collision scenes by their scores + non_collision_scenes_sorted = sorted(map_id_score_non_collision.items(), key=lambda x: x[1]) + # Select the lowest scored scenes from the non-collision cases + selected_scene_id.extend([scene[0] for scene in non_collision_scenes_sorted[:num_non_collision_selected]]) + return sorted(selected_scene_id) + + def run(self, test_epoch = None): + # get scenario data of different maps + config_list = scenic_parse(self.scenario_config, self.logger) + + + ### load rl model ## + if self.mode == 'train_scenario': + ## we only need the pretrained surrogate model here ## + pass + elif self.mode == 'train_agent': + ## initlize buffer ### + Buffer = RouteReplayBuffer if self.scenario_category in ['scenic', 'planning'] else PerceptionReplayBuffer + replay_buffer = Buffer(self.num_scenario, self.mode, self.buffer_capacity) + + ### repeat the training, 20 is just a random placeholder + config_list = config_list * 20 + + ### check if resume ### + if self.continue_agent_training: + self.logger.load_training_results() + start_episode = self.check_continue_training(self.agent_policy, replay_buffer) + 1 + if start_episode >= self.train_episode: + return + else: + self.clean_cache(self.agent_policy.model_path) + start_episode = -1 + + elif self.mode == 'eval': + ### load trained model for evaluation ### + if test_epoch: + self.agent_policy.load_model(episode=test_epoch) + + last_town = None + for config in config_list: + + ## set log name ## + if config.route_id is None: + log_name = f'OPT_{config.behavior}' + else: + log_name = f'OPT_{config.behavior}_ROUTE-{config.route_id}' + + ## check if all done ## + if self.mode == 'eval': + if self.logger.check_eval_dir(log_name) == config.select_num: + self.logger.log(f">> This scenario and route have been done.") + continue + elif self.mode == 'train_agent': + if self.current_episode >= self.train_episode - 1: + return + + if self.current_episode + config.select_num < start_episode: + self.current_episode += config.select_num + continue + + # initialize scenic + self._init_scenic(config) + + # initialize map and render + if last_town != config.extra_params['town']: + self._init_world() + self._init_renderer() + last_town = config.extra_params['town'] + self.world.scenic = self.scenic + + # create scenarios within the vectorized wrapper + self.env = VectorWrapper( + self.env_params, + self.scenario_config, + self.world, + self.birdeye_render, + self.display, + self.logger + ) + + # prepare data loader and buffer + data_loader = ScenicDataLoader(self.scenic, config, self.num_scenario) + # run with different modes + + if self.mode == 'train_scenario': + ### select hard scenic scenario config on the surrogate model ### + self.scene_map = self.load_scene_map(config.scenario_id) + self.agent_policy.set_mode('eval') + self.scenario_policy.set_mode('eval') + self.eval(data_loader, select = True) + elif self.mode == 'train_agent': + ### train the surrogate model on the selected hard scenrios ### + self.agent_policy.set_mode('train') + self.scenario_policy.set_mode('eval') + self.train(data_loader, start_episode, replay_buffer) + elif self.mode == 'eval': + ### evaluate the trained agent on different test models ### + self.agent_policy.set_mode('eval') + self.scenario_policy.set_mode('eval') + self.eval(data_loader) + else: + raise NotImplementedError(f"Unsupported mode: {self.mode}.") + + def check_continue_training(self, policy, replay_buffer): + # load previous checkpoint + policy.load_model(replay_buffer = replay_buffer) + if policy.continue_episode == 0: + start_episode = 0 + self.logger.log('>> Previous checkpoint not found. Training from scratch.') + else: + start_episode = policy.continue_episode + self.logger.log(f'>> Continue training from previous checkpoint, epoch: {start_episode}.') + return start_episode + + def dump_scene_map(self, scenario_id): + # load previous checkpoint + scenic_dir = os.path.join(self.scenario_config['scenic_dir'], f'scenario_{scenario_id}') + f = open(os.path.join(scenic_dir, f"{scenic_dir.split('/')[-1]}.json"), 'w') + json_dumps_str = json.dumps(self.scene_map, indent=4) + print(json_dumps_str, file=f) + f.close() + + def load_scene_map(self, scenario_id): + # load previous checkpoint + scenic_dir = os.path.join(self.scenario_config['scenic_dir'], f'scenario_{scenario_id}') + try: + with open(os.path.join(scenic_dir, f"{scenic_dir.split('/')[-1]}.json"), 'r') as f: + data = json.loads(f.read()) + except: + data = {} + return data + + def clean_cache(self, path): + # Get a list of all files in directory + all_files = glob.glob(os.path.join(path, '*')) + + # Specify the file to keep + file_to_keep = os.path.join(path, 'model.sac.-001.torch') + + # Remove all files except the one to keep + for file in all_files: + if file != file_to_keep: + os.remove(file) + + def close(self): + pygame.quit() # close pygame renderer + if self.env: + self.env.clean_up() + + diff --git a/scene/safebench/scenic_runner_dynamic.py b/scene/safebench/scenic_runner_dynamic.py new file mode 100644 index 00000000..43773e50 --- /dev/null +++ b/scene/safebench/scenic_runner_dynamic.py @@ -0,0 +1,540 @@ +''' +Date: 2023-01-31 22:23:17 +LastEditTime: 2023-03-07 12:28:17 +Description: + Copyright (c) 2022-2023 Safebench Team + + This work is licensed under the terms of the MIT license. + For a copy, see +''' + +import copy +import os +import json +import glob +import random + +import numpy as np +import carla +import pygame +from tqdm import tqdm + +from safebench.gym_carla.env_wrapper import VectorWrapper +from safebench.gym_carla.envs.render import BirdeyeRender +from safebench.gym_carla.replay_buffer import RouteReplayBuffer, PerceptionReplayBuffer + +from safebench.agent import AGENT_POLICY_LIST +from safebench.scenario import SCENARIO_POLICY_LIST + +from safebench.scenario.scenario_manager.carla_data_provider import CarlaDataProvider +from safebench.scenario.scenario_data_loader import ScenarioDataLoader, ScenicDataLoader +from safebench.scenario.tools.scenario_utils import scenario_parse, dynamic_scenic_parse + +from safebench.util.logger import Logger, setup_logger_kwargs +from safebench.util.metric_util import get_route_scores, get_perception_scores +from safebench.util.scenic_utils import ScenicSimulator + +class ScenicRunner: + def __init__(self, agent_config, scenario_config): + self.scenario_config = scenario_config + self.agent_config = agent_config + + self.seed = scenario_config['seed'] + self.exp_name = scenario_config['exp_name'] + self.output_dir = scenario_config['output_dir'] + self.mode = scenario_config['mode'] + self.save_video = scenario_config['save_video'] + + self.render = scenario_config['render'] + self.num_scenario = scenario_config['num_scenario'] + self.fixed_delta_seconds = scenario_config['fixed_delta_seconds'] + self.scenario_category = scenario_config['scenario_category'] + + # continue training flag + self.continue_agent_training = scenario_config['continue_agent_training'] + self.continue_scenario_training = scenario_config['continue_scenario_training'] + + # apply settings to carla + self.client = carla.Client('localhost', scenario_config['port']) + self.client.set_timeout(10.0) + self.world = None + self.env = None + + self.env_params = { + 'auto_ego': scenario_config['auto_ego'], + 'obs_type': agent_config['obs_type'], + 'scenario_category': self.scenario_category, + 'ROOT_DIR': scenario_config['ROOT_DIR'], + 'warm_up_steps': 9, # number of ticks after spawning the vehicles + 'disable_lidar': True, # show bird-eye view lidar or not + 'display_size': 128, # screen size of one bird-eye view window + 'obs_range': 32, # observation range (meter) + 'd_behind': 12, # distance behind the ego vehicle (meter) + 'max_past_step': 1, # the number of past steps to draw + 'discrete': False, # whether to use discrete control space + 'discrete_acc': [-3.0, 0.0, 3.0], # discrete value of accelerations + 'discrete_steer': [-0.2, 0.0, 0.2], # discrete value of steering angles + 'continuous_accel_range': [-3.0, 3.0], # continuous acceleration range + 'continuous_steer_range': [-0.3, 0.3], # continuous steering angle range + 'max_episode_step': scenario_config['max_episode_step'], # maximum timesteps per episode + 'max_waypt': 12, # maximum number of waypoints + 'lidar_bin': 0.125, # bin size of lidar sensor (meter) + 'out_lane_thres': 4, # threshold for out of lane (meter) + 'desired_speed': 8, # desired speed (m/s) + 'image_sz': 1024, # TODO: move to config of od scenario + } + + + # pass config from scenario to agent + agent_config['mode'] = scenario_config['mode'] + agent_config['ego_action_dim'] = scenario_config['ego_action_dim'] + agent_config['ego_state_dim'] = scenario_config['ego_state_dim'] + agent_config['ego_action_limit'] = scenario_config['ego_action_limit'] + + # define logger + logger_kwargs = setup_logger_kwargs( + self.exp_name, + self.output_dir, + self.seed, + agent=agent_config['policy_type'], + scenario=scenario_config['policy_type'], + scenario_category=self.scenario_category + ) + self.logger = Logger(**logger_kwargs) + + # prepare parameters + if self.mode == 'train_agent': + self.buffer_capacity = agent_config['buffer_capacity'] + self.eval_in_train_freq = agent_config['eval_in_train_freq'] + self.save_freq = agent_config['save_freq'] + self.train_episode = agent_config['train_episode'] + self.current_episode = -1 + self.logger.save_config(agent_config) + self.logger.create_training_dir() + elif self.mode == 'train_scenario': + self.save_freq = agent_config['save_freq'] + self.logger.create_eval_dir(load_existing_results=False) + elif self.mode == 'eval': + self.save_freq = agent_config['save_freq'] + self.logger.log('>> Evaluation Mode, skip config saving', 'yellow') + self.logger.create_eval_dir(load_existing_results=False) + else: + raise NotImplementedError(f"Unsupported mode: {self.mode}.") + + # define agent and scenario + self.logger.log('>> Agent Policy: ' + agent_config['policy_type']) + self.logger.log('>> Scenario Policy: ' + scenario_config['policy_type']) + + if self.scenario_config['auto_ego']: + self.logger.log('>> Using auto-polit for ego vehicle, action of policy will be ignored', 'yellow') + if scenario_config['policy_type'] == 'ordinary' and self.mode != 'train_agent': + self.logger.log('>> Ordinary scenario can only be used in agent training', 'red') + raise Exception() + self.logger.log('>> ' + '-' * 40) + + # define agent and scenario policy + self.agent_policy = AGENT_POLICY_LIST[agent_config['policy_type']](agent_config, logger=self.logger) + self.scenario_policy = SCENARIO_POLICY_LIST[scenario_config['policy_type']](scenario_config, logger=self.logger) + if self.save_video: + assert self.mode == 'eval', "only allow video saving in eval mode" + self.logger.init_video_recorder() + + def _init_world(self): + self.logger.log(">> Initializing carla world") + self.world = self.client.get_world() + settings = self.world.get_settings() + settings.synchronous_mode = True + settings.fixed_delta_seconds = self.fixed_delta_seconds + self.world.apply_settings(settings) + CarlaDataProvider.set_client(self.client) + CarlaDataProvider.set_world(self.world) + CarlaDataProvider.set_traffic_manager_port(self.scenario_config['tm_port']) + + def _init_scenic(self, config): + self.logger.log(f">> Initializing scenic simulator: {config.scenic_file}") + self.scenic = ScenicSimulator(config.scenic_file, config.extra_params) + + def _init_renderer(self): + self.logger.log(">> Initializing pygame birdeye renderer") + pygame.init() + flag = pygame.HWSURFACE | pygame.DOUBLEBUF + if not self.render: + flag = flag | pygame.HIDDEN + if self.scenario_category in ['planning', 'scenic']: + # [bird-eye view, Lidar, front view] or [bird-eye view, front view] + if self.env_params['disable_lidar']: + window_size = (self.env_params['display_size'] * 2, self.env_params['display_size'] * self.num_scenario) + else: + window_size = (self.env_params['display_size'] * 3, self.env_params['display_size'] * self.num_scenario) + else: + window_size = (self.env_params['display_size'], self.env_params['display_size'] * self.num_scenario) + self.display = pygame.display.set_mode(window_size, flag) + + # initialize the render for generating observation and visualization + pixels_per_meter = self.env_params['display_size'] / self.env_params['obs_range'] + pixels_ahead_vehicle = (self.env_params['obs_range'] / 2 - self.env_params['d_behind']) * pixels_per_meter + self.birdeye_params = { + 'screen_size': [self.env_params['display_size'], self.env_params['display_size']], + 'pixels_per_meter': pixels_per_meter, + 'pixels_ahead_vehicle': pixels_ahead_vehicle, + } + self.birdeye_render = BirdeyeRender(self.world, self.birdeye_params, logger=self.logger) + + def run_scenes(self, scenes): + self.logger.log(f">> Begin to run the scene...") + ## currently there is only one scene in this list ## + for scene in scenes: + if self.scenic.setSimulation(scene): + self.scenic.update_behavior = self.scenic.runSimulation() + next(self.scenic.update_behavior) + + def train(self, data_loader, start_episode=0, replay_buffer = None): + # general buffer for both agent and scenario + + for _ in tqdm(range(len(data_loader))): + self.current_episode += 1 + if self.current_episode >= self.train_episode: + return + if self.current_episode < start_episode: + continue + # sample scenarios + sampled_scenario_configs, _ = data_loader.sampler() + # reset the index counter to create endless loader + # data_loader.reset_idx_counter() + + scenes = [config.scene for config in sampled_scenario_configs] + # begin to run the scene + self.run_scenes(scenes) + + # get static obs and then reset with init action + static_obs = self.env.get_static_obs(sampled_scenario_configs) + self.scenario_policy.load_model(sampled_scenario_configs) + scenario_init_action, additional_dict = self.scenario_policy.get_init_action(static_obs) + obs, infos = self.env.reset(sampled_scenario_configs, scenario_init_action) + replay_buffer.store_init([static_obs, scenario_init_action], additional_dict=additional_dict) + + # get ego vehicle from scenario + self.agent_policy.set_ego_and_route(self.env.get_ego_vehicles(), infos) + + # start loop + episode_reward = [] + while not self.env.all_scenario_done(): + # get action from agent policy and scenario policy (assume using one batch) + ego_actions = self.agent_policy.get_action(obs, infos, deterministic=False) + scenario_actions = self.scenario_policy.get_action(obs, infos, deterministic=False) + + # apply action to env and get obs + next_obs, rewards, dones, infos = self.env.step(ego_actions=ego_actions, scenario_actions=scenario_actions) + replay_buffer.store([ego_actions, scenario_actions, obs, next_obs, rewards, dones], additional_dict=infos) + obs = copy.deepcopy(next_obs) + episode_reward.append(np.mean(rewards)) + + # train off-policy agent or scenario + if self.mode == 'train_agent' and self.agent_policy.type == 'offpolicy': + loss = self.agent_policy.train(replay_buffer) + elif self.mode == 'train_scenario' and self.scenario_policy.type == 'offpolicy': + self.scenario_policy.train(replay_buffer) + + score_function = get_route_scores if self.scenario_category in ['planning', 'scenic'] else get_perception_scores + all_scores = score_function(self.env.running_results) + + # end up environment + self.env.clean_up() + replay_buffer.finish_one_episode() + self.logger.add_training_results('episode', self.current_episode) + self.logger.add_training_results('episode_reward', np.sum(episode_reward)) + for key, value in all_scores.items(): + self.logger.add_training_results(key, value) + if loss is not None: + critic_loss, actor_loss = loss + self.logger.add_training_results('critic_loss', critic_loss) + self.logger.add_training_results('actor_loss', actor_loss) + else: + critic_loss, actor_loss = 0, 0 + self.logger.add_training_results('critic_loss', critic_loss) + self.logger.add_training_results('actor_loss', actor_loss) + self.logger.log(f">> Episode: {self.current_episode}, #buffer_len: {replay_buffer.buffer_len}, critic: {critic_loss:.3f}, actor: {actor_loss:.3f}") + self.logger.save_training_results() + + # train on-policy agent or scenario + if self.mode == 'train_agent' and self.agent_policy.type == 'onpolicy': + self.agent_policy.train(replay_buffer) + elif self.mode == 'train_scenario' and self.scenario_policy.type in ['init_state', 'onpolicy']: + self.scenario_policy.train(replay_buffer) + + # eval during training + if (self.current_episode+1) % self.eval_in_train_freq == 0: + #self.eval(env, data_loader) + pass + + # save checkpoints + if (self.current_episode+1) % self.save_freq == 0: + if self.mode == 'train_agent': + self.agent_policy.save_model(self.current_episode, replay_buffer) + if self.mode == 'train_scenario': + self.scenario_policy.save_model(self.current_episode) + + self.scenic.destroy() + + def eval(self, data_loader, select = False): + num_finished_scenario = 0 + data_loader.reset_idx_counter() + # recording the score and the id of corresponding selected scenes + map_id_score = {} + behavior_name = data_loader.behavior + opt_step = data_loader.opt_step + opt_time = 0 + + log_name = f'OPT_{behavior_name}' + + if select: + self.scene_map[log_name] = {} + self.scene_map[log_name][f'opt_time_{opt_time}'] = self.scenic.save_params() + + while len(data_loader) > 0: + # sample scenarios + sampled_scenario_configs, num_sampled_scenario = data_loader.sampler() + num_finished_scenario += num_sampled_scenario + assert num_sampled_scenario == 1, 'scenic can only run one scene at one time' + + scenes = [config.scene for config in sampled_scenario_configs] + # begin to run the scene + self.run_scenes(scenes) + + # reset envs with new config, get init action from scenario policy, and run scenario + static_obs = self.env.get_static_obs(sampled_scenario_configs) + self.scenario_policy.load_model(sampled_scenario_configs) + scenario_init_action, _ = self.scenario_policy.get_init_action(static_obs, deterministic=True) + obs, infos = self.env.reset(sampled_scenario_configs, scenario_init_action) + + # get ego vehicle from scenario + self.agent_policy.set_ego_and_route(self.env.get_ego_vehicles(), infos) + + score_list = {s_i: [] for s_i in range(num_sampled_scenario)} + while not self.env.all_scenario_done(): + # get action from agent policy and scenario policy (assume using one batch) + ego_actions = self.agent_policy.get_action(obs, infos, deterministic=True) + scenario_actions = self.scenario_policy.get_action(obs, infos, deterministic=True) + + # apply action to env and get obs + obs, rewards, _, infos = self.env.step(ego_actions=ego_actions, scenario_actions=scenario_actions) + + # save video + if self.save_video: + self.logger.add_frame(pygame.surfarray.array3d(self.display).transpose(1, 0, 2)) + + # accumulate scores of corresponding scenario + reward_idx = 0 + for s_i in infos: + score = rewards[reward_idx] if self.scenario_category in ['planning', 'scenic'] else 1-infos[reward_idx]['iou_loss'] + score_list[s_i['scenario_id']].append(score) + reward_idx += 1 + + # clean up all things + self.logger.log(">> All scenarios are completed. Clearning up all actors") + self.env.clean_up() + + # save video + if self.save_video: + data_ids = [config.data_id for config in sampled_scenario_configs] + self.logger.save_video(data_ids=data_ids, log_name= log_name) + + # print score for ranking + self.logger.log(f'[{num_finished_scenario}/{data_loader.num_total_scenario}] Ranking scores for batch scenario:', color='yellow') + for s_i in score_list.keys(): + self.logger.log('\t Env id ' + str(s_i) + ': ' + str(np.mean(score_list[s_i])), color='yellow') + + # calculate evaluation results + score_function = get_route_scores if self.scenario_category in ['planning', 'scenic'] else get_perception_scores + all_running_results = self.logger.add_eval_results(records=self.env.running_results) + all_scores = score_function(all_running_results) + self.logger.add_eval_results(scores=all_scores) + self.logger.print_eval_results() + if len(self.env.running_results) % self.save_freq == 0: + self.logger.save_eval_results(log_name) + + if infos[0]['collision']: + self.scenic.record_params() + + if select and (num_finished_scenario % opt_step == 0): + opt_time += 1 + self.scenic.update_params() + self.scene_map[log_name][f'opt_time_{opt_time}'] = self.scenic.save_params() + data_loader.train_scene(opt_time) + + self.logger.save_eval_results(log_name) + + if select: + self.scene_map[log_name]['select_id'] = self.select_adv_scene(self.logger.eval_records, score_function, data_loader.select_num) + self.dump_scene_map() + + self.logger.clear() + self.scenic.destroy() + + def select_adv_scene(self, results, score_function, select_num): + # define your own selection mechanism here + map_id_score_collision = {} + map_id_score_non_collision = {} + for i in results.keys(): + score = score_function({i:results[i]}) + if score['collision_rate'] == 1: + map_id_score_collision[i] = score['final_score'] + else: + map_id_score_non_collision[i] = score['final_score'] + + # Sort the collision scenes by their scores + collision_scenes_sorted = sorted(map_id_score_collision.items(), key=lambda x: x[1]) + + # Get the number of scenes to select from the collision cases + num_collision_selected = min(select_num, len(collision_scenes_sorted)) + + # Select the lowest scored scenes with collision + selected_scene_id = [scene[0] for scene in collision_scenes_sorted[:num_collision_selected]] + + # If not enough collision scenes, select remaining scenes + num_non_collision_selected = select_num - num_collision_selected + if num_non_collision_selected > 0: + # Sort the non-collision scenes by their scores + non_collision_scenes_sorted = sorted(map_id_score_non_collision.items(), key=lambda x: x[1]) + # Select the lowest scored scenes from the non-collision cases + selected_scene_id.extend([scene[0] for scene in non_collision_scenes_sorted[:num_non_collision_selected]]) + return sorted(selected_scene_id) + + def run(self, test_epoch = None): + # get scenario data of different maps + config_list = dynamic_scenic_parse(self.scenario_config, self.logger) + + + ### load rl model ## + if self.mode == 'train_scenario': + ## we only need the pretrained surrogate model here ## + pass + elif self.mode == 'train_agent': + ## initlize buffer ### + Buffer = RouteReplayBuffer if self.scenario_category in ['scenic', 'planning'] else PerceptionReplayBuffer + replay_buffer = Buffer(self.num_scenario, self.mode, self.buffer_capacity) + + ### repeat the training, 20 is just a random placeholder + config_list = config_list * 20 + + ### check if resume ### + if self.continue_agent_training: + self.logger.load_training_results() + start_episode = self.check_continue_training(self.agent_policy, replay_buffer) + 1 + if start_episode >= self.train_episode: + return + else: + self.clean_cache(self.agent_policy.model_path) + start_episode = -1 + + elif self.mode == 'eval' and test_epoch: + ### load trained model for evaluation ### + self.agent_policy.load_model(episode=test_epoch) + + last_town = None + for config in config_list: + + ## set log name ## + log_name = f'OPT_{config.behavior}' + + ## check if all done ## + if self.mode == 'eval': + if self.logger.check_eval_dir(log_name) == config.select_num: + self.logger.log(f">> This scenario and route have been done.") + continue + elif self.mode == 'train_agent': + if self.current_episode >= self.train_episode - 1: + return + + if self.current_episode + config.select_num < start_episode: + self.current_episode += config.select_num + continue + + # initialize scenic + self._init_scenic(config) + + # initialize map and render + self._init_world() + self._init_renderer() + self.world.scenic = self.scenic + + # create scenarios within the vectorized wrapper + self.env = VectorWrapper( + self.env_params, + self.scenario_config, + self.world, + self.birdeye_render, + self.display, + self.logger + ) + + # prepare data loader and buffer + data_loader = ScenicDataLoader(self.scenic, config, self.num_scenario) + # run with different modes + + if self.mode == 'train_scenario': + ### select hard scenic scenario config on the surrogate model ### + self.scene_map = self.load_scene_map() + self.agent_policy.set_mode('eval') + self.scenario_policy.set_mode('eval') + self.eval(data_loader, select = True) + elif self.mode == 'train_agent': + ### train the surrogate model on the selected hard scenrios ### + self.agent_policy.set_mode('train') + self.scenario_policy.set_mode('eval') + self.train(data_loader, start_episode, replay_buffer) + elif self.mode == 'eval': + ### evaluate the trained agent on different test models ### + self.agent_policy.set_mode('eval') + self.scenario_policy.set_mode('eval') + self.eval(data_loader) + else: + raise NotImplementedError(f"Unsupported mode: {self.mode}.") + + def check_continue_training(self, policy, replay_buffer): + # load previous checkpoint + policy.load_model(replay_buffer = replay_buffer) + if policy.continue_episode == 0: + start_episode = 0 + self.logger.log('>> Previous checkpoint not found. Training from scratch.') + else: + start_episode = policy.continue_episode + self.logger.log(f'>> Continue training from previous checkpoint, epoch: {start_episode}.') + return start_episode + + def dump_scene_map(self): + # load previous checkpoint + scenic_dir = self.scenario_config['scenic_dir'] + f = open(os.path.join(scenic_dir, f"dynamic_scenario.json"), 'w') + json_dumps_str = json.dumps(self.scene_map, indent=4) + print(json_dumps_str, file=f) + f.close() + + def load_scene_map(self): + # load previous checkpoint + scenic_dir = self.scenario_config['scenic_dir'] + try: + with open(os.path.join(scenic_dir, f"dynamic_scenario.json"), 'r') as f: + data = json.loads(f.read()) + except: + data = {} + return data + + def clean_cache(self, path): + # Get a list of all files in directory + all_files = glob.glob(os.path.join(path, '*')) + + # Specify the file to keep + file_to_keep = os.path.join(path, 'model.sac.-001.torch') + + # Remove all files except the one to keep + for file in all_files: + if file != file_to_keep: + os.remove(file) + + def close(self): + pygame.quit() # close pygame renderer + if self.env: + self.env.clean_up() + + \ No newline at end of file