From 65c1550cfaee89c980a7b9f722e8925363507834 Mon Sep 17 00:00:00 2001 From: andrewcoh <54679309+andrewcoh@users.noreply.github.com> Date: Wed, 17 Mar 2021 17:46:15 -0400 Subject: [PATCH] R15 fix elo (#5151) * add group done to ELO computation * add not interrupted --- ml-agents/mlagents/trainers/ghost/trainer.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/ml-agents/mlagents/trainers/ghost/trainer.py b/ml-agents/mlagents/trainers/ghost/trainer.py index cd08ee24f0..2449734b75 100644 --- a/ml-agents/mlagents/trainers/ghost/trainer.py +++ b/ml-agents/mlagents/trainers/ghost/trainer.py @@ -190,7 +190,11 @@ def _process_trajectory(self, trajectory: Trajectory) -> None: i.e. in asymmetric games. We assume the last reward determines the winner. :param trajectory: Trajectory. """ - if trajectory.done_reached: + if ( + trajectory.done_reached + and trajectory.all_group_dones_reached + and not trajectory.interrupted + ): # Assumption is that final reward is >0/0/<0 for win/draw/loss final_reward = ( trajectory.steps[-1].reward + trajectory.steps[-1].group_reward