commit

cinjon · cinjon · commit d47a219d3bec · 2019-11-12T13:20:43.000-08:00
diff --git a/cifar.py b/cifar.py
@@ -1,5 +1,6 @@
 from collections import namedtuple
 import os
+import sys
 import cv2
 import time
 import math
@@ -11,7 +12,7 @@
 from glob import glob
 from PIL import Image
 
-from comet_ml import Experiment as CometExperiment
+from comet_ml import Experiment as CometExperiment, OfflineExperiment
 import torch
 import torch.nn as nn
 import torch.optim as optim
@@ -34,7 +35,18 @@
 
 parser = argparse.ArgumentParser()
 parser.add_argument("--mode", default="run", help="run, array, or job")
+parser.add_argument(
+    '--time',
+    type=float,
+    default=10,
+    help='the number of hours',
+)
 parser.add_argument("--not_pretrain", action="store_true", default=False)
+parser.add_argument('--local_comet_dir',
+		    type=str,
+		    default=None,
+		    help='local dir to process comet locally only. '
+		    'primarily for fb, will stop remote calls.')
 parser.add_argument('--name',
                     type=str,
                     help='the identifying name of this experiment.',
@@ -334,31 +346,47 @@ def forward(self, x):
 def main(args):
     print('Pretrain? ', not args.not_pretrain)
     print(args.model)
-
-    comet_exp = CometExperiment(api_key="hIXq6lDzWzz24zgKv7RYz6blo",
-                                project_name="selfcifar",
-                                workspace="cinjon",
-                                auto_metric_logging=True,
-                                auto_output_logging=None,
-                                auto_param_logging=False)
+    start_time = time.time()
+    
+    if opt['local_comet_dir']:
+        comet_exp = OfflineExperiment(
+            api_key="hIXq6lDzWzz24zgKv7RYz6blo",
+            project_name="selfcifar",
+            workspace="cinjon",
+            auto_metric_logging=True,
+            auto_output_logging=None,
+            auto_param_logging=False,
+            offline_directory=opt['local_comet_dir'])
+    else:
+        comet_exp = CometExperiment(
+            api_key="hIXq6lDzWzz24zgKv7RYz6blo",
+            project_name="selfcifar",
+            workspace="cinjon",
+            auto_metric_logging=True,
+            auto_output_logging=None,
+            auto_param_logging=False)
     comet_exp.log_parameters(vars(args))
     comet_exp.set_name(args.name)
 
     # Build model
-    path = "/misc/kcgscratch1/ChoGroup/resnick/spaceofmotion/zeping/bsn"
+    # path = "/misc/kcgscratch1/ChoGroup/resnick/spaceofmotion/zeping/bsn"
     if args.model == "amdim":
-        hparams = load_hparams_from_tags_csv(os.path.join(path, "meta_tags.csv"))
+        hparams = load_hparams_from_tags_csv('/checkpoint/cinjon/amdim/meta_tags.csv')
+        # hparams = load_hparams_from_tags_csv(os.path.join(path, "meta_tags.csv"))
         model = AMDIMModel(hparams)
         if not args.not_pretrain:
-            model.load_state_dict(
-                torch.load(os.path.join(path, "_ckpt_epoch_434.ckpt"))["state_dict"])
+            # _path = os.path.join(path, "_ckpt_epoch_434.ckpt")
+            _path = '/checkpoint/cinjon/amdim/_ckpt_epoch_434.ckpt'
+            model.load_state_dict(torch.load(_path)["state_dict"])                
         else:
             print("AMDIM not loading checkpoint") # Debug
         linear_model = LinearModel(AMDIM_OUTPUT_DIM, args.num_classes)
     elif args.model == "ccc":
         model = CCCModel(None)
         if not args.not_pretrain:
-            checkpoint = torch.load(os.path.join(path, "TimeCycleCkpt14.pth"))
+            # _path = os.path.join(path, "TimeCycleCkpt14.pth")
+            _path = '/checkpoint/cinjon/spaceofmotion/bsn/TimeCycleCkpt14.pth'
+            checkpoint = torch.load(_path)
             base_dict = {
                 '.'.join(k.split('.')[1:]): v
                 for k, v in list(checkpoint['state_dict'].items())}
@@ -369,7 +397,9 @@ def main(args):
     elif args.model == "corrflow":
         model = CORRFLOWModel(None)
         if not args.not_pretrain:
-            checkpoint = torch.load(os.path.join(path, "corrflow.kineticsmodel.pth"))
+            _path = '/checkpoint/cinjon/spaceofmotion/supercons/corrflow.kineticsmodel.pth'
+            # _path = os.path.join(path, "corrflow.kineticsmodel.pth")
+            checkpoint = torch.load(_path)
             base_dict = {
                 '.'.join(k.split('.')[1:]): v
                 for k, v in list(checkpoint['state_dict'].items())}
@@ -433,8 +463,7 @@ def main(args):
     batch_size = args.batch_size * torch.cuda.device_count()
     # CIFAR-10
     if args.num_classes == 10:
-        data_path = ("/misc/kcgscratch1/ChoGroup/resnick/spaceofmotion/zeping/"
-                     "bsn/data/cifar-10-batches-py")
+        data_path = ("/private/home/cinjon/cifar-data/cifar-10-batches-py")
         _train_dataset = CIFAR_dataset(
             glob(os.path.join(data_path, "data*")),
             args.num_classes,
@@ -484,8 +513,7 @@ def main(args):
         #     val_dev_dataset, shuffle=False, batch_size=batch_size, num_workers=args.num_workers)
     # CIFAR-100
     elif args.num_classes == 100:
-        data_path = ("/misc/kcgscratch1/ChoGroup/resnick/spaceofmotion/zeping/"
-                     "bsn/data/cifar-100-python")
+        data_path = ("/private/home/cinjon/cifar-data/cifar-100-python")
         _train_dataset = CIFAR_dataset(
             [os.path.join(data_path, "train")],
             args.num_classes,
@@ -529,6 +557,10 @@ def main(args):
         train_acc = 0
         train_loss_sum = 0.0
         for iter, input in enumerate(train_dataloader):
+            if time.time() - start_time > args.time*3600 - 10 and comet_exp is not None:
+                comet_exp.end()
+                sys.exit(-1)
+            
             imgs = input[0].to(device)
             if args.model != "resnet":
                 imgs = imgs.unsqueeze(1)
@@ -704,8 +736,10 @@ def main(args):
         if val_acc > best_acc:
             best_acc = val_acc
             best_epoch = epoch
-            save_path = os.path.join(log_dir, "{}.pth".format(epoch))
-            torch.save(linear_model.state_dict(), save_path)
+            linear_save_path = os.path.join(log_dir, "{}.linear.pth".format(epoch))
+            model_save_path = os.path.join(log_dir, "{}.model.pth".format(epoch))
+            torch.save(linear_model.state_dict(), linear_save_path)
+            torch.save(model.state_dict(), model_save_path)            
 
         # Check bias and variance
         print("Epoch {} lr {} total: train_loss:{} train_acc:{} val_loss:{} val_acc:{}".format(
diff --git a/dataset.py b/dataset.py
@@ -292,8 +292,10 @@ def __len__(self):
 class TEMImages(TEMDataset):
     def __init__(self, opt, subset=None, fps=30, image_dir=None, img_loading_func=None, video_info_path=None):
         self.do_augment = opt['do_augment'] and subset == 'train'
+        self.module = opt['representation_module']
+        self.ccc_img_size = opt.get('ccc_img_size')
         self.ext = 'npy'
-        if '240x426' in opt['gym_image_dir']:
+        if opt['dataset'] == 'gymnastics' and '240x426' in opt['gym_image_dir']:
             self.ext = 'png'
         super(TEMImages, self).__init__(opt, subset, feature_dirs=None, fps=fps, image_dir=image_dir, img_loading_func=img_loading_func, video_info_path=video_info_path) 
 
@@ -303,8 +305,15 @@ def _get_video_data(self, data, index):
         path = os.path.join(self.image_dir, name)
         path = Path(path)
         paths = [path / ('%010.4f.%s' % ((i / self.fps), self.ext)) for i in indices]
-        imgs = [self.img_loading_func(p.absolute(), do_augment=self.do_augment)
-                for p in paths if p.exists()]
+        if self.module == 'ccc':
+            imgs = [
+                self.img_loading_func(p.absolute(),
+                                      do_augment=self.do_augment,
+                                      img_size=self.ccc_img_size)
+                    for p in paths if p.exists()]
+        else:
+            imgs = [self.img_loading_func(p.absolute(), do_augment=self.do_augment)
+                    for p in paths if p.exists()]
         try:
             if type(imgs[0]) == np.array:
                 video_data = np.array(imgs)
diff --git a/gen_pem_results_jobs.py b/gen_pem_results_jobs.py
@@ -54,6 +54,14 @@
     3975: 28, 3984: 6,
     # TSN RGB 2
     4005: 1, 4006: 15, 4003: 17, 4002: 6, 4001: 9,
+    # TSN RGB 3
+    4593: 21, 4611: 14, 4599: 14,  # 4596 got lost in the shuffle :(
+    4659: 30, 4656: 23, 4647: 36, 4650: 22,
+    4623: 30, 4620: 23, 4632: 27, 4617: 28,
+    # CorrFLow NFC NF:
+    4802: 1, 4779: 16,
+    # CCC FT
+    4560: 34, 4584: 32, 4572: 4, 4575: 3, 4554: 36, 4578: 14
 }
 
 
diff --git a/gen_postprocessed_results_jobs.py b/gen_postprocessed_results_jobs.py
@@ -37,7 +37,7 @@
     c2 = int(c2)
     counter = c2
 
-    print(c1, c2)
+    print(pem_results_subdir, c1, c2)
     if c1 in fixed:
         print('Got from fixed')
         job = fixed[c1]
diff --git a/gen_tem_results_jobs.py b/gen_tem_results_jobs.py
@@ -56,7 +56,21 @@
     # TSN RGB Gym 3 lulz. And a bunch more.
     4596: 5, 4593: 14, 4611: 7, 4599: 7, # DFC
     4659: 5, 4656: 1, 4647: 5, 4650: 1, # NFC Reg
-    4623: 3, 4620: 3, 4632: 2, 4617: 3 # NFC NF
+    4623: 3, 4620: 3, 4632: 2, 4617: 3, # NFC NF
+    # CCC FT DFC Gym:
+    4575: 4, 4578: 11, 4584: 4, 4554: 10, 4560: 11, 4572: 14,
+    # AMDIM NFC:
+    4440: 1,
+    # AMDIM NF:
+    4695: 1,
+    # Corrflow NF
+    4793: 14, 4779: 2, 4776: 2, 4802: 14, 4792: 11,
+    # TSN Thumos NFC Reg
+    4863: 2, 4872: 1,
+    # TSN Thumos NFC NF
+    4908: 8, 4905: 8, 4911: 8, 4926: 8, 4923: 8, 4914: 8,
+    # TSN Thumos DFC
+    4881: 1, 4896: 3, 4899: 1, 4902: 3
 }
 
 
@@ -92,4 +106,4 @@
     # print(sorted(_job.items()))
     fb_run_batch(_job, counter, email, code_directory)
     print('\n')                
-print(ns)
+print(ns+1)
diff --git a/main.py b/main.py
@@ -799,12 +799,15 @@ def main(opt):
         print(counter, job, '\n', opt)
         opt.update(job)
         print(opt, flush=True)
+        print('\n***\n%s\n***\n' % opt['do_feat_conversion'])
         if 'debug' in mode:
             opt.update({'num_gpus': 2, 'data_workers': 12,
                         'name': 'dbg', 'counter': 0,
                         'tem_batch_size': 1,
                         # 'gym_image_dir': '/checkpoint/cinjon/spaceofmotion/sep052019/rawframes.426x240.12',
-                        'local_comet_dir': None})
+                        'local_comet_dir': None,
+                        'dataset': 'thumosimages',
+                        'video_info': '/private/home/cinjon/Code/BSN-boundary-sensitive-network.pytorch/data/thumos14_annotations',                         'ccc_img_size': 128})
 
     if 'debugrun' not in mode:
         main(opt)
diff --git a/models.py b/models.py
@@ -13,7 +13,9 @@
 from representations.ccc.model import transforms_regular_video as ccc_regular_transforms
 from representations.ccc.representation import Representation as CCCRepresentation
 from representations.ccc.representation import THUMOS_OUTPUT_DIM as CCCThumosDim
+from representations.ccc.representation import THUMOS_OUTPUT_DIM_128 as CCCThumosDim128
 from representations.ccc.representation import GYMNASTICS_OUTPUT_DIM as CCCGymnasticsDim
+from representations.ccc.representation import GYMNASTICS_OUTPUT_DIM_128 as CCCGymnasticsDim128
 from representations.ccc.representation import ACTIVITYNET_OUTPUT_DIM as CCCActivitynetDim
 
 from representations.corrflow.model import Model as CorrFlowModel
@@ -48,6 +50,7 @@
 from representations.tsn.model import tsn_model as tsn_model_func
 from representations.tsn.model import img_loading_func as tsn_img_loading_func
 from representations.tsn.model import GYMNASTICS_OUTPUT_DIM as TSNGymDim
+from representations.tsn.model import THUMOS_OUTPUT_DIM as TSNThumosDim
 from representations.tsn.representation import Representation as TSNRepresentation
 
 
@@ -97,6 +100,9 @@ def _get_module(key):
         'tsn-gymnastics': (
             tsn_model_func, TSNRepresentation, tsn_img_loading_func, TSNGymDim
         ),
+        'tsn-thumosimages': (
+            tsn_model_func, TSNRepresentation, tsn_img_loading_func, TSNThumosDim
+        ),
     }.get(key)
 
 
@@ -152,6 +158,11 @@ def __init__(self, opt):
         if self.do_representation:
             key = '%s-%s' % (opt['representation_module'], opt['dataset'])                
             model, representation, _, representation_dim = _get_module(key)
+            if opt['representation_module'] == 'ccc' and opt['ccc_img_size'] == 128:
+                if opt['dataset'] == 'gymnastics':
+                    representation_dim = CCCGymnasticsDim128
+                elif opt['dataset'] == 'thumosimages':
+                    representation_dim = CCCThumosDim128                    
             tags_csv = opt['representation_tags']
             if tags_csv:
                 hparams = load_hparams_from_tags_csv(tags_csv)
diff --git a/opts.py b/opts.py
@@ -65,6 +65,7 @@ def parse_opt():
     parser.add_argument('--tem_nonlinear_factor', type=float, default=0.01)
     parser.add_argument('--tem_reset_params', action='store_true')
     parser.add_argument('--gym_image_dir', type=str, default='/checkpoint/cinjon/spaceofmotion/sep052019/rawframes.426x240.12')
+    parser.add_argument('--ccc_img_size', type=int, default=256)
     parser.add_argument('--tsn_config', type=str, default='~/Code/BSN-boundary-sensitive-network.pytorch/representations/tsn/temp_tsn_rgb_bninception.py')
 
     # PEM model settings
diff --git a/pem_jobs.py b/pem_jobs.py
@@ -30,7 +30,7 @@
 
 
 def run(find_counter=None):
-    counter = 854 # NOTE: adjust each time 451, 715, 750, 782, 814
+    counter = 950 # NOTE: adjust each time 451, 715, 750, 782, 814, 854
     
     for tem_results_subdir in sorted(os.listdir(tem_results_dir)):
         # if counter - start_counter > 100:
@@ -103,7 +103,7 @@ def run(find_counter=None):
                                     func(__job, counter, email, code_directory)
                                 elif counter == find_counter:
                                     return __job
-    print(counter) # ended w 782, 814, 854, 950
+    print(counter) # ended w 782, 814, 854, 950, 1054
     
 if __name__ == '__main__':
     run()
diff --git a/representations/amdim/representation.py b/representations/amdim/representation.py
@@ -72,9 +72,9 @@ def __init__(self, opts):
         self.repr_layer1 = self.make_layer(ResidualBlock, channels, channels, 2, stride=2)
         self.repr_layer2 = self.make_layer(ResidualBlock, channels, channels, 2, stride=2)
         if opts['dataset'] == 'gymnastics':
-            self.fc_layer = nn.Linear(2432, 400)
+            self.fc_layer = nn.Linear(640, 400) # 2432
         elif opts['dataset'] == 'thumosimages':
-            self.fc_layer = nn.Linear(2432, 400)
+            self.fc_layer = nn.Linear(640, 400)
         elif opts['dataset'] == 'activitynet':
             self.fc_layer = nn.Linear(2432, 400)
 
diff --git a/representations/ccc/model.py b/representations/ccc/model.py
@@ -99,18 +99,20 @@ def fliplr(x):
     return x.astype(float)
 
 
-def img_loading_func(path, do_augment=False):
+def img_loading_func(path, do_augment=False, img_size=None):
+    img_size = img_size or imgSize
+    
     img = load_image(path)
     # Ok, so this was fine. It was actually rgb. It was just ... distorted.
     ht, wd = img.size(1), img.size(2)
     if ht <= wd:
         ratio  = float(wd) / float(ht)
         # width, height
-        img = resize(img, int(imgSize * ratio), imgSize)
+        img = resize(img, int(img_size * ratio), img_size)
     else:
         ratio  = float(ht) / float(wd)
         # width, height
-        img = resize(img, imgSize, int(imgSize * ratio))
+        img = resize(img, img_size, int(img_size * ratio))
 
     if do_augment:
         if random.random() > 0.5:
diff --git a/representations/ccc/representation.py b/representations/ccc/representation.py
@@ -5,7 +5,9 @@
 
 
 THUMOS_OUTPUT_DIM = 330752 # 512*32*57  # at imgSize=150 it's 330752
+THUMOS_OUTPUT_DIM_128 = 237568 # 512*32*57  # at imgSize=150 it's 330752
 GYMNASTICS_OUTPUT_DIM = 933888 # 407040 ... at imgSize=150, it's 330752 ... strange?
+GYMNASTICS_OUTPUT_DIM_128 = 237568 # imgSize=125
 ACTIVITYNET_OUTPUT_DIM = 184832 # 524288 ... at imgSize=150, it's 184832
 
 
diff --git a/representations/resnet/model.py b/representations/resnet/model.py
@@ -1,6 +1,7 @@
 import copy
 import random
 
+import cv2
 import numpy as np
 from PIL import Image
 import torch
@@ -45,8 +46,14 @@ def to_torch(ndarray):
 
 
 def load_image(img_path):
-    img = np.load(img_path)
-    img = Image.fromarray(img)
+    img_path = str(img_path)
+    if img_path.endswith('png'):
+        img = cv2.imread(img_path)
+        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
+        img = Image.fromarray(img)
+    elif img_path.endswith('npy'):
+        img = np.load(img_path)
+        img = Image.fromarray(img)        
     return img
 
 
diff --git a/representations/resnet/representation.py b/representations/resnet/representation.py
@@ -84,7 +84,7 @@ def __init__(self, opts):
             self.fc_layer = nn.Linear(2048, 400)
 
     def forward(self, representation):
-        return self.fc_layer(out)
+        return self.fc_layer(representation)
         
         # # thumosimages shape representation is [800, 64, 44, 80]
         # print(representation.shape)
diff --git a/representations/tsn/model.py b/representations/tsn/model.py
diff --git a/run_cifar_jobs.py b/run_cifar_jobs.py
diff --git a/tem_jobs.py b/tem_jobs.py