HumanCompatibleAI
diff --git a/‎src/il_representations/algos/__init__.py‎
Lines changed: 103 additions & 78 deletions b/‎src/il_representations/algos/__init__.py‎
Lines changed: 103 additions & 78 deletions
diff --git a/‎src/il_representations/algos/augmenters.py‎
Lines changed: 5 additions & 10 deletions b/‎src/il_representations/algos/augmenters.py‎
Lines changed: 5 additions & 10 deletions
diff --git a/‎src/il_representations/algos/base_learner.py‎
Lines changed: 3 additions & 4 deletions b/‎src/il_representations/algos/base_learner.py‎
Lines changed: 3 additions & 4 deletions
diff --git a/‎src/il_representations/algos/batch_extenders.py‎
Lines changed: 20 additions & 13 deletions b/‎src/il_representations/algos/batch_extenders.py‎
Lines changed: 20 additions & 13 deletions
diff --git a/‎src/il_representations/algos/decoders.py‎
Lines changed: 103 additions & 57 deletions b/‎src/il_representations/algos/decoders.py‎
Lines changed: 103 additions & 57 deletions
@@ -1,21 +1,16 @@
-import enum
-from torchvision import transforms
-from imitation.augment.color import ColorSpace  # noqa: F401
-from imitation.augment.convenience import StandardAugmentations
-from il_representations.algos.utils import gaussian_blur
-import torch
-from abc import ABC, abstractmethod
-import PIL
 """
 These are pretty basic: when constructed, they take in a list of augmentations, and
 either augment just the context, or both the context and the target, depending on the algorithm.
 """
+from abc import ABC, abstractmethod
+
+from imitation.augment.color import ColorSpace  # noqa: F401
+from imitation.augment.convenience import StandardAugmentations
 
 
 class Augmenter(ABC):
     def __init__(self, augmenter_spec, color_space):
-        augment_op = StandardAugmentations.from_string_spec(
-            augmenter_spec, color_space)
+        augment_op = StandardAugmentations.from_string_spec(augmenter_spec, color_space)
         self.augment_op = augment_op
 
     @abstractmethod
 
@@ -1,4 +1,5 @@
 import gym
+
 from il_representations.algos.utils import set_global_seeds
 
 
@@ -13,12 +14,10 @@ def __init__(self, env):
         # if EncoderSimplePolicyHead is refactored.
         if isinstance(self.action_space, gym.spaces.Discrete):
             self.action_size = env.action_space.n
-        elif (isinstance(self.action_space, gym.spaces.Box)
-              and len(self.action_space.shape) == 1):
+        elif (isinstance(self.action_space, gym.spaces.Box) and len(self.action_space.shape) == 1):
             self.action_size, = self.action_space.shape
         else:
-            raise NotImplementedError(
-                f"can't handle action space {self.action_space}")
+            raise NotImplementedError(f"can't handle action space {self.action_space}")
 
     def set_random_seed(self, seed):
         if seed is None:
 
@@ -1,14 +1,17 @@
+"""
+BatchExtenders are used in situations where you want to pass a batch forward
+for loss that is different than the batch seen by your encoder. The currently
+implemented situation where this is the case is Momentum, where you want to
+pass forward a bunch of negatives from prior encoding runs to increase the
+difficulty of your prediction task. One might also imagine this being useful
+for doing trajectory-mixing in a RNN case where batches naturally need to be
+all from a small number of trajectories, but this isn't yet implemented.
+"""
 from abc import ABC, abstractmethod
+
 import torch
-from torch.distributions import Normal
+
 from il_representations.algos.utils import independent_multivariate_normal
-"""
-BatchExtenders are used in situations where you want to pass a batch forward for loss that is different than the 
-batch seen by your encoder. The currently implemented situation where this is the case is Momentum, where you want 
-to pass forward a bunch of negatives from prior encoding runs to increase the difficulty of your prediction task. 
-One might also imagine this being useful for doing trajectory-mixing in a RNN case where batches naturally need 
-to be all from a small number of trajectories, but this isn't yet implemented. 
-"""
 
 
 class BatchExtender(ABC):
@@ -33,20 +36,24 @@ def __init__(self, queue_dim, queue_size, sample=False):
         self.queue_ptr = 0
 
     def __call__(self, context_dist, target_dist):
-        # Call up current contents of the queue, duplicate. Add targets to the queue,
-        # potentially overriding old information in the process. Return targets concatenated to contents of queue
+        # Call up current contents of the queue, duplicate. Add targets to the
+        # queue, potentially overriding old information in the process. Return
+        # targets concatenated to contents of queue
         targets_loc = target_dist.loc
         targets_covariance = target_dist.covariance_matrix
         device = targets_loc.device
         assert targets_loc.device == targets_covariance.device
 
-        # Pull out the diagonals of our MultivariateNormal covariance matrices, so we don't store all the extra 0s
-        targets_scale = torch.stack([batch_element_matrix.diag() for batch_element_matrix in targets_covariance])
+        # Pull out the diagonals of our MultivariateNormal covariance matrices,
+        # so we don't store all the extra 0s
+        targets_scale = torch.stack(
+            [batch_element_matrix.diag() for batch_element_matrix in targets_covariance])
 
         batch_size = targets_loc.shape[0]
         queue_targets_scale = self.queue_scale.clone().detach().to(device)
         queue_targets_loc = self.queue_loc.clone().detach().to(device)
-        # TODO: Currently requires the queue size to be a multiple of the batch size. Don't require that.
+        # TODO: Currently requires the queue size to be a multiple of the batch
+        # size. Don't require that.
         self.queue_loc[self.queue_ptr:self.queue_ptr + batch_size] = targets_loc
         self.queue_scale[self.queue_ptr:self.queue_ptr + batch_size] = targets_scale
         self.queue_ptr = (self.queue_ptr + batch_size) % self.queue_size
 
@@ -1,31 +1,36 @@
-import functools
-import torch.nn as nn
+"""
+LossDecoders are meant to be mappings between the representation being learned,
+and the representation or tensor that is fed directly into the loss. In many
+cases, these are the same, and this will just be a NoOp.
+
+Some cases where it is different:
+- When you are using a Projection Head in your contrastive loss, and comparing
+  similarities of vectors that are k >=1 nonlinear layers downstream from the
+  actual representation you'll use in later tasks
+- When you're learning a VAE, and the loss is determined by how effectively you
+  can reconstruct the image from a representation vector, the LossDecoder will
+  handle that representation -> image mapping
+- When you're predicting actions given current and next state, you'll want to
+  predict those actions given both the representation of the current state, and
+  also information about the next state. This occasional need for extra
+  information beyond the central context state is why we have `extra_context`
+  as an optional bit of data that pair constructors can return, to be passed
+  forward for use here
+"""
 import copy
-import torch
-import torch.nn.functional as F
-from torch.distributions import MultivariateNormal
-from il_representations.algos.utils import independent_multivariate_normal
+import functools
+
 import gym.spaces as spaces
 import numpy as np
+import torch
+from torch.distributions import MultivariateNormal
+import torch.nn as nn
+import torch.nn.functional as F
 
+from il_representations.algos.utils import independent_multivariate_normal
 
-"""
-LossDecoders are meant to be mappings between the representation being learned, 
-and the representation or tensor that is fed directly into the loss. In many cases, these are the 
-same, and this will just be a NoOp. 
-
-Some cases where it is different: 
-- When you are using a Projection Head in your contrastive loss, and comparing similarities of vectors that are 
-k >=1 nonlinear layers downstream from the actual representation you'll use in later tasks 
-- When you're learning a VAE, and the loss is determined by how effectively you can reconstruct the image 
-from a representation vector, the LossDecoder will handle that representation -> image mapping 
-- When you're predicting actions given current and next state, you'll want to predict those actions given 
-both the representation of the current state, and also information about the next state. This occasional
-need for extra information beyond the central context state is why we have `extra_context` as an optional 
-bit of data that pair constructors can return, to be passed forward for use here 
-"""
+# TODO change shape to dim throughout this file and the code
 
-#TODO change shape to dim throughout this file and the code
 
 class LossDecoder(nn.Module):
     def __init__(self, representation_dim, projection_shape, sample=False):
@@ -51,20 +56,23 @@ def get_vector(self, z_dist):
             return z_dist.loc
 
     def ones_like_projection_dim(self, x):
-        return torch.ones(size=(x.shape[0], self.projection_dim,), device=x.device)
+        return torch.ones(size=(
+            x.shape[0],
+            self.projection_dim,
+        ), device=x.device)
+
 
 class NoOp(LossDecoder):
     def forward(self, z, traj_info, extra_context=None):
         return z
 
+
 class ProjectionHead(LossDecoder):
     def __init__(self, representation_dim, projection_shape, sample=False, learn_scale=False):
         super(ProjectionHead, self).__init__(representation_dim, projection_shape, sample)
 
-        self.shared_mlp = nn.Sequential(nn.Linear(self.representation_dim, 256),
-                                      nn.ReLU(),
-                                      nn.Linear(256, 256),
-                                      nn.ReLU())
+        self.shared_mlp = nn.Sequential(nn.Linear(self.representation_dim, 256), nn.ReLU(),
+                                        nn.Linear(256, 256), nn.ReLU())
         self.mean_layer = nn.Linear(256, self.projection_dim)
 
         if learn_scale:
@@ -75,14 +83,24 @@ def __init__(self, representation_dim, projection_shape, sample=False, learn_sca
     def forward(self, z_dist, traj_info, extra_context=None):
         z = self.get_vector(z_dist)
         shared_repr = self.shared_mlp(z)
-        return independent_multivariate_normal(loc=self.mean_layer(shared_repr), scale=torch.exp(self.scale_layer(shared_repr)))
+        return independent_multivariate_normal(loc=self.mean_layer(shared_repr),
+                                               scale=torch.exp(self.scale_layer(shared_repr)))
 
 
 class MomentumProjectionHead(LossDecoder):
-    def __init__(self, representation_dim, projection_shape, sample=False, momentum_weight=0.99, learn_scale=False):
-        super(MomentumProjectionHead, self).__init__(representation_dim, projection_shape, sample=sample)
-        self.context_decoder = ProjectionHead(representation_dim, projection_shape,
-                                              sample=sample, learn_scale=learn_scale)
+    def __init__(self,
+                 representation_dim,
+                 projection_shape,
+                 sample=False,
+                 momentum_weight=0.99,
+                 learn_scale=False):
+        super(MomentumProjectionHead, self).__init__(representation_dim,
+                                                     projection_shape,
+                                                     sample=sample)
+        self.context_decoder = ProjectionHead(representation_dim,
+                                              projection_shape,
+                                              sample=sample,
+                                              learn_scale=learn_scale)
         self.target_decoder = copy.deepcopy(self.context_decoder)
         for param in self.target_decoder.parameters():
             param.requires_grad = False
@@ -93,32 +111,39 @@ def decode_context(self, z_dist, traj_info, extra_context=None):
 
     def decode_target(self, z_dist, traj_info, extra_context=None):
         """
-        Encoder target/keys using momentum-updated key encoder. Had some thought of making _momentum_update_key_encoder
-        a backwards hook, but seemed overly complex for an initial POC
+        Encoder target/keys using momentum-updated key encoder. Had some
+        thought of making _momentum_update_key_encoder a backwards hook, but
+        seemed overly complex for an initial POC
         :param x:
         :return:
         """
         with torch.no_grad():
             self._momentum_update_key_encoder()
             decoded_z_dist = self.target_decoder(z_dist, traj_info, extra_context=extra_context)
-            return MultivariateNormal(loc=decoded_z_dist.loc.detach(), covariance_matrix=decoded_z_dist.covariance_matrix.detach())
+            return MultivariateNormal(loc=decoded_z_dist.loc.detach(),
+                                      covariance_matrix=decoded_z_dist.covariance_matrix.detach())
 
     @torch.no_grad()
     def _momentum_update_key_encoder(self):
-        for param_q, param_k in zip(self.context_decoder.parameters(), self.target_decoder.parameters()):
-            param_k.data = param_k.data * self.momentum_weight + param_q.data * (1. - self.momentum_weight)
+        for param_q, param_k in zip(self.context_decoder.parameters(),
+                                    self.target_decoder.parameters()):
+            param_k.data = param_k.data * self.momentum_weight + param_q.data * (
+                1. - self.momentum_weight)
 
 
 class BYOLProjectionHead(MomentumProjectionHead):
     def __init__(self, representation_dim, projection_shape, momentum_weight=0.99, sample=False):
-        super(BYOLProjectionHead, self).__init__(representation_dim, projection_shape,
-                                                 sample=sample, momentum_weight=momentum_weight)
+        super(BYOLProjectionHead, self).__init__(representation_dim,
+                                                 projection_shape,
+                                                 sample=sample,
+                                                 momentum_weight=momentum_weight)
         self.context_predictor = ProjectionHead(projection_shape, projection_shape)
 
     def forward(self, z_dist, traj_info, extra_context=None):
         internal_dist = super().forward(z_dist, traj_info, extra_context=extra_context)
         prediction_dist = self.context_predictor(internal_dist, traj_info, extra_context=None)
-        return independent_multivariate_normal(loc=F.normalize(prediction_dist.loc, dim=1), scale=prediction_dist.scale)
+        return independent_multivariate_normal(loc=F.normalize(prediction_dist.loc, dim=1),
+                                               scale=prediction_dist.scale)
 
     def decode_target(self, z_dist, traj_info, extra_context=None):
         with torch.no_grad():
@@ -128,39 +153,60 @@ def decode_target(self, z_dist, traj_info, extra_context=None):
 
 
 class ActionConditionedVectorDecoder(LossDecoder):
-    def __init__(self, representation_dim, projection_shape, action_space, sample=False, action_encoding_dim=128,
-                 action_encoder_layers=1, learn_scale=False, action_embedding_dim=5, use_lstm=False):
-        super(ActionConditionedVectorDecoder, self).__init__(representation_dim, projection_shape, sample=sample)
+    def __init__(self,
+                 representation_dim,
+                 projection_shape,
+                 action_space,
+                 sample=False,
+                 action_encoding_dim=128,
+                 action_encoder_layers=1,
+                 learn_scale=False,
+                 action_embedding_dim=5,
+                 use_lstm=False):
+        super(ActionConditionedVectorDecoder, self).__init__(representation_dim,
+                                                             projection_shape,
+                                                             sample=sample)
         self.learn_scale = learn_scale
 
-        # Machinery for turning raw actions into vectors. If actions are discrete, this is done via an Embedding.
+        # Machinery for turning raw actions into vectors. If actions are
+        # discrete, this is done via an Embedding.
         # If actions are continuous/box, this is done via a simple flattening.
         if isinstance(action_space, spaces.Discrete):
-            self.action_processor = nn.Embedding(num_embeddings=action_space.n, embedding_dim=action_embedding_dim)
+            self.action_processor = nn.Embedding(num_embeddings=action_space.n,
+                                                 embedding_dim=action_embedding_dim)
             processed_action_dim = action_embedding_dim
             self.action_shape = ()  # discrete actions are just numbers
         elif isinstance(action_space, spaces.Box):
-            self.action_processor = functools.partial(torch.flatten,
-                                                      start_dim=2)
+            self.action_processor = functools.partial(torch.flatten, start_dim=2)
             processed_action_dim = np.prod(action_space.shape)
             self.action_shape = action_space.shape
         else:
-            raise NotImplementedError("Action conditioning is only currently implemented for Discrete and Box action spaces")
+            raise NotImplementedError(
+                "Action conditioning is only currently implemented for Discrete and Box action "
+                "spaces")
 
-        # Machinery for aggregating information from an arbitrary number of actions into a single vector,
-        # either through a LSTM, or by simply averaging the vector representations of the k states together
+        # Machinery for aggregating information from an arbitrary number of
+        # actions into a single vector, either through a LSTM, or by simply
+        # averaging the vector representations of the k states together
         if use_lstm:
-            self.action_encoder = nn.LSTM(processed_action_dim, action_encoding_dim, action_encoder_layers, batch_first=True)
+            self.action_encoder = nn.LSTM(processed_action_dim,
+                                          action_encoding_dim,
+                                          action_encoder_layers,
+                                          batch_first=True)
         else:
             self.action_encoder = None
             action_encoding_dim = processed_action_dim
 
-        # Machinery for mapping a concatenated (context representation, action representation) into a projection
-        self.action_conditioned_projection = nn.Linear(representation_dim + action_encoding_dim, projection_shape)
+        # Machinery for mapping a concatenated (context representation, action
+        # representation) into a projection
+        self.action_conditioned_projection = nn.Linear(representation_dim + action_encoding_dim,
+                                                       projection_shape)
 
-        # If learning scale/std deviation parameter, declare a layer for that, otherwise, return a unit-constant vector
+        # If learning scale/std deviation parameter, declare a layer for that,
+        # otherwise, return a unit-constant vector
         if self.learn_scale:
-            self.scale_projection = nn.Linear(representation_dim + action_encoding_dim, projection_shape)
+            self.scale_projection = nn.Linear(representation_dim + action_encoding_dim,
+                                              projection_shape)
         else:
             self.scale_projection = self.ones_like_projection_dim
 
@@ -192,9 +238,9 @@ def decode_context(self, z_dist, traj_info, extra_context=None):
         assert action_encoding_vector.shape[0] == batch_dim, \
             action_encoding_vector.shape
 
-        # Concatenate context representation and action representation and map to a merged representation
+        # Concatenate context representation and action representation and map
+        # to a merged representation
         merged_vector = torch.cat([z, action_encoding_vector], dim=1)
         mean_projection = self.action_conditioned_projection(merged_vector)
         scale = self.scale_projection(merged_vector)
         return independent_multivariate_normal(loc=mean_projection, scale=scale)
-