push.

cinjon · cinjon · commit 600679f2a6aa · 2019-11-26T07:33:57.000-08:00
diff --git a/cifar.py b/cifar.py
@@ -35,6 +35,7 @@
 
 parser = argparse.ArgumentParser()
 parser.add_argument("--mode", default="run", help="run, array, or job")
+parser.add_argument("--do_nonlinear", action="store_true", default=False)
 parser.add_argument(
     '--time',
     type=float,
@@ -339,6 +340,42 @@ def forward(self, x):
         # x = self.fc2(x)
         return x
 
+
+class NonLinearModel(nn.Module):
+    def __init__(self, in_channels, num_classes, num_layers=3):
+        super(LinearModel, self).__init__()
+        self.in_channels = in_channels
+        self.num_classes = num_classes
+        self.num_layers = num_layers
+
+        self.nonlinear = nn.Sequential([
+            nn.Sequential([
+                nn.Linear(self.in_channels, self.in_channels),
+                nn.ReLU()
+            ])
+            for _ in num_layers
+        ])
+        self.linear = nn.Linear(self.in_channels, self.num_classes)
+        self.linear.weight.data.normal_(0, 0.01)
+        self.linear.bias.data.zero_()
+
+        # self.fc1 = nn.Linear(self.in_channels, 512)
+        # self.fc2 = nn.Linear(512, self.num_classes)
+        # self.relu = nn.ReLU(inplace=True)
+        # self.fc1.weight.data.normal_(0, 0.01)
+        # self.fc1.bias.data.zero_()
+        # self.fc2.weight.data.normal_(0, 0.01)
+        # self.fc2.bias.data.zero_()
+
+
+    def forward(self, x):
+        x = x.view(x.size(0), -1)
+        x = self.nonlinear(x)
+        x = self.linear(x)
+        # x = self.relu(self.fc1(x))
+        # x = self.fc2(x)
+        return x
+    
 #######################################################
 # Main
 #######################################################
@@ -370,6 +407,8 @@ def main(args):
 
     # Build model
     # path = "/misc/kcgscratch1/ChoGroup/resnick/spaceofmotion/zeping/bsn"
+    linear_cls = NonLinearModel if args.do_nonlinear else LinearModel
+    
     if args.model == "amdim":
         hparams = load_hparams_from_tags_csv('/checkpoint/cinjon/amdim/meta_tags.csv')
         # hparams = load_hparams_from_tags_csv(os.path.join(path, "meta_tags.csv"))
@@ -380,7 +419,7 @@ def main(args):
             model.load_state_dict(torch.load(_path)["state_dict"])                
         else:
             print("AMDIM not loading checkpoint") # Debug
-        linear_model = LinearModel(AMDIM_OUTPUT_DIM, args.num_classes)
+        linear_model = linear_cls(AMDIM_OUTPUT_DIM, args.num_classes)
     elif args.model == "ccc":
         model = CCCModel(None)
         if not args.not_pretrain:
@@ -393,7 +432,7 @@ def main(args):
             model.load_state_dict(base_dict)
         else:
             print("CCC not loading checkpoint") # Debug
-        linear_model = LinearModel(CCC_OUTPUT_DIM, args.num_classes).to(device)
+        linear_model = linaer_cls(CCC_OUTPUT_DIM, args.num_classes) #.to(device)
     elif args.model == "corrflow":
         model = CORRFLOWModel(None)
         if not args.not_pretrain:
@@ -406,7 +445,7 @@ def main(args):
             model.load_state_dict(base_dict)
         else:
             print("CorrFlow not loading checkpoing") # Debug
-        linear_model = LinearModel(CORRFLOW_OUTPUT_DIM, args.num_classes)
+        linear_model = linear_cls(CORRFLOW_OUTPUT_DIM, args.num_classes)
     elif args.model == "resnet":
         if not args.not_pretrain:
             resnet = torchvision.models.resnet50(pretrained=True)
@@ -415,7 +454,7 @@ def main(args):
             print("ResNet not loading checkpoint") # Debug
         modules = list(resnet.children())[:-1]
         model = nn.Sequential(*modules)
-        linear_model = LinearModel(RESNET_OUTPUT_DIM, args.num_classes)
+        linear_model = linear_cls(RESNET_OUTPUT_DIM, args.num_classes)
     else:
         raise Exception("model type has to be amdim, ccc, corrflow or resnet")
 
@@ -454,8 +493,9 @@ def main(args):
 
     # Set up log dir
     now = datetime.datetime.now()
-    log_dir = "{}{:%Y%m%dT%H%M}".format(args.model, now)
-    log_dir = os.path.join("weights", log_dir)
+    log_dir = '/checkpoint/cinjon/spaceofmotion/bsn/cifar-%d-weights/%s/%s' % (args.num_classes, args.model, args.name)
+    # log_dir = "{}{:%Y%m%dT%H%M}".format(args.model, now)
+    # log_dir = os.path.join("weights", log_dir)
     if not os.path.exists(log_dir):
         os.makedirs(log_dir)
     print("Saving to {}".format(log_dir))
@@ -557,7 +597,7 @@ def main(args):
         train_acc = 0
         train_loss_sum = 0.0
         for iter, input in enumerate(train_dataloader):
-            if time.time() - start_time > args.time*3600 - 10 and comet_exp is not None:
+            if time.time() - start_time > args.time*3600 - 300 and comet_exp is not None:
                 comet_exp.end()
                 sys.exit(-1)
             
@@ -702,6 +742,10 @@ def main(args):
         val_acc = 0
         val_loss_sum = 0.0
         for iter, input in enumerate(val_dataloader):
+            if time.time() - start_time > args.time*3600 - 300 and comet_exp is not None:
+                comet_exp.end()
+                sys.exit(-1)
+                
             imgs = input[0].to(device)
             if args.model != "resnet":
                 imgs = imgs.unsqueeze(1)
diff --git a/compute.py b/compute.py
@@ -0,0 +1,32 @@
+import math
+import time
+import torch
+import torch.nn as nn
+import numpy as np
+
+if __name__ == "__main__":
+    a = np.load("/checkpoint/cinjon/spaceofmotion/bsn/test_reps_random_amdim.npy")
+    a = a.reshape((a.shape[0], -1))
+    a = a - np.mean(a, 0, keepdims=True)
+    a = torch.from_numpy(a.T).half().to(0)
+    print(a.size())
+    b = np.load("/checkpoint/cinjon/spaceofmotion/bsn/test_reps_random_amdim.npy")
+    b = b.reshape((b.shape[0], -1))
+    # b_shape = b.shape
+    # third_size = int(b_shape[0] / 3)
+    b = b - np.mean(b, 0, keepdims=True)
+    # b1 = b[:third_size, :]
+    # b2 = b[third_size:, :]
+    # b = b1
+    
+    with torch.no_grad():
+        b = torch.from_numpy(b).half().to(1)
+        print(b.size())
+        mat_mult = nn.Linear(in_features=b.shape[0], out_features=a.shape[0], bias=False)
+        print(mat_mult.weight.size())
+        mat_mult.weight.data = a
+        mat_mult_gpu = nn.DataParallel(mat_mult, device_ids=[0, 1]).to('cuda:0')
+        result = mat_mult_gpu(b.t())
+        print(result.size())
+        print(float(torch.norm(result.cpu())))
+        # print(result.data.t())
diff --git a/gen_pem_results_jobs.py b/gen_pem_results_jobs.py
@@ -61,7 +61,48 @@
     # CorrFLow NFC NF:
     4802: 1, 4779: 16,
     # CCC FT
-    4560: 34, 4584: 32, 4572: 4, 4575: 3, 4554: 36, 4578: 14
+    4560: 34, 4584: 32, 4572: 4, 4575: 3, 4554: 36, 4578: 14,
+    # TSN Thumos DFC
+    4896: 15, 4899: 28, 4902: 6, 4881: 9,
+    # TSN THumos NFC Reg
+    4863: 0, 4872: 12,
+    # TSN Thumos NFC NF
+    4914: 1, 4908: 0, 4923: 2, 4905: 1, 4911: 2, # Done
+    # DFC Resnet Reg: Gym
+    4971: 24, 4956: 21, 4965: 27, 4974: 26,
+    # DFC Resnet-Rand: Gym
+    5256: 30, 5244: 20, 5247: 28, 5238: 13, 5241: 30, 5262: 31, 5253: 27, # Done
+    # NFC Resnet-Rand Reg: Gym
+    5193: 10, 5175: 31, 5187: 25, 5181: 21, 5211: 13, 5199: 30, # Done
+    # DFC Resnet NotRand: Thumos
+    4989: 8, 4980: 10, # Done
+    # DFC Resnet Rand: Thumos
+    5343: 6, 5355: 3, 5319: 0, 5325: 1, 5349: 6, # Done
+    # NFC Reg ResNet Rand: Thumos
+    5307: 5, 5271: 2, 5274: 1, 5295: 2, 5277: 3, # Done
+    # TSN Gym Rand NFC
+    5370: 23, 5388: 29, 5373: 27, 5391: 23,
+    # TSN Rand DFC:
+    5445: 30, 5442: 26, # done
+    # CorrFlow Rand DFC Gymnastics
+    5712: 16, 5733: 18, 5742: 25, 5718: 4,
+    # Corrflow Rand DFC ThumosImages
+    5823: 2, 5802: 2, 5832: 9, 5808: 2,
+    # CCC Thumos Rand dFC
+    5646: 4, 5634: 6, 5619: 13, 5640: 2, 5628: 6,  # done?
+    # Thumos Resnet Rand NL DFC:
+    6108: 6, 6123: 12, 6102: 5, 6090: 4, 6081: 4, 6114: 19,
+    # Thumos Resnet Reg NL DFC:
+    6030: 4, 6027: 25, 6015: 9, 6018: 24,
+    # Gymnastics Resnet Rand NL DFC:
+    6054: 26, 6048: 24, 6075: 32, 6069: 31, 6057: 21, 6078: 19, 6039: 21,
+    # TSN Rnad DFC Thumos: 
+    5538: 8, 5547: 14,
+    # Thumos Rand TSN NFC
+    5475: 4, 5478: 24,
+    # Gymnastics CCC Reg Rand:
+    5565: 18, 5586: 18, 5559: 18, 5598: 18, 5571: 28, 5580: 18
+    
 }
 
 
diff --git a/gen_pgm_results_jobs.py b/gen_pgm_results_jobs.py
@@ -24,6 +24,7 @@
 check = 0
 for tem_results_subdir in os.listdir(tem_results_dir):
     counter = int(regex.match(tem_results_subdir).groups()[0])
+
     
     print(tem_results_dir, tem_results_subdir, counter)
     job = run(find_counter=counter)
diff --git a/gen_postprocessed_results_jobs.py b/gen_postprocessed_results_jobs.py
@@ -37,7 +37,7 @@
     c2 = int(c2)
     counter = c2
 
-    print(pem_results_subdir, c1, c2)
+    print(pem_results_subdir, c1, c2, '\n')
     if c1 in fixed:
         print('Got from fixed')
         job = fixed[c1]
diff --git a/gen_tem_results_jobs.py b/gen_tem_results_jobs.py
@@ -70,16 +70,51 @@
     # TSN Thumos NFC NF
     4908: 8, 4905: 8, 4911: 8, 4926: 8, 4923: 8, 4914: 8,
     # TSN Thumos DFC
-    4881: 1, 4896: 3, 4899: 1, 4902: 3
+    4881: 1, 4896: 3, 4899: 1, 4902: 3,
+    # DFC Resnet Reg
+    4971: 8, 4956: 3, 4965: 7, 4974: 6,
+    # DFC Resnet-Rand
+    5256: 19, 5244: 14, 5247: 13, 5238: 21, 5241: 22, 5262: 10, 5253: 7,
+    # NFC Resnet-Rand Reg
+    5193: 21, 5175: 12, 5187: 18, 5181: 13, 5211: 10, 5199: 8, # In the queue.
+    # DFC Resnet NotRand:
+    4989: 2, 4980: 3,
+    # DFC Resnet Rand
+    5343: 11, 5355: 25, 5319: 16, 5325: 24, 5349: 16,
+    # NFC Reg ResNet Rand
+    5307: 17, 5271: 17, 5274: 24, 5295: 13, 5277: 25, # done
+    # TSN Gym Rand NFC:
+    5370: 7, 5388: 5, 5373: 4, 5391: 7,
+    # TSN Rand DFC 
+    5442: 3, 5445: 3, # done
+    # CorrFlow Rand DFC Gymnastics
+    5712: 7, 5733: 5, 5742: 7, 5718: 19,
+    # Corrflow Rand DFC ThumosImages
+    5823: 10, 5802: 10, 5832: 13, 5808: 18,
+    # CCC Thumos Rand dFC
+    5646: 9, 5634: 21, 5619: 4, 5640: 9, 5628: 13, # done
+    # Thumos Resnet Rand NL DFC:
+    6108: 10, 6123: 8, 6102: 16, 6090: 7, 6081: 11, 6114: 14,
+    # Thumos Resnet Reg NL DFC:
+    6030: 3, 6027: 3, 6015: 3, 6018: 3,
+    # Gymnastics Resnet Rand NL DFC:
+    6054: 10, 6048: 8, 6075: 8, 6069: 18, 6057: 9, 6078: 9, 6039: 24, # done
+    # TSN Rnad DFC Thumos: 
+    5538: 3, 5547: 2,
+    # Thumos Rand TSN NFC
+    5475: 1, 5478: 1,
+    # Gymnastics CCC Reg Rand:
+    5565: 4, 5586: 4, 5559: 4, 5598: 4, 5571: 4, 5580: 4
 }
 
 
 num_gpus = 8
+check = 0
 for ns, ckpt_subdir in enumerate(sorted(os.listdir(ckpt_directory))):
     counter = int(regex.match(ckpt_subdir).groups()[0])
     
     print(counter, ckpt_subdir)
-    
+
     _job = run(find_counter=counter)
     if type(_job) == tuple:
         _job = _job[1]
@@ -104,6 +139,7 @@
         _job['time'] = 10
     print(_job['dataset'], _job['representation_module'], _job['do_feat_conversion'])
     # print(sorted(_job.items()))
+    check += 1
     fb_run_batch(_job, counter, email, code_directory)
     print('\n')                
-print(ns+1)
+print(ns+1, check)
diff --git a/main.py b/main.py
@@ -305,7 +305,11 @@ def BSN_Train_TEM(opt):
             os.path.join(opt["checkpoint_path"], opt['name']))
         if opt['representation_checkpoint']:
             # print(model.representation_model.backbone.inception_5b_3x3.weight[0][0])
-            partial_load(opt['representation_checkpoint'], model)
+            if opt['do_random_model']:
+                print('DOING RANDOM MDOEL!!!')
+            else:
+                print('DOING Pretrianed modelll!!!')                
+                partial_load(opt['representation_checkpoint'], model)
             # print(model.representation_model.backbone.inception_5b_3x3.weight[0][0])
         if not opt['no_freeze']:
             for param in model.representation_model.parameters():
@@ -798,16 +802,19 @@ def main(opt):
         counter, job = tem_jobs.run(find_counter=jobid)
         print(counter, job, '\n', opt)
         opt.update(job)
-        print(opt, flush=True)
+        print(sorted(opt.items()), flush=True)
         print('\n***\n%s\n***\n' % opt['do_feat_conversion'])
         if 'debug' in mode:
             opt.update({'num_gpus': 2, 'data_workers': 12,
                         'name': 'dbg', 'counter': 0,
-                        'tem_batch_size': 1,
+                        'tem_batch_size': 1, 'do_feat_conversion': True,
                         # 'gym_image_dir': '/checkpoint/cinjon/spaceofmotion/sep052019/rawframes.426x240.12',
                         'local_comet_dir': None,
                         'dataset': 'thumosimages',
-                        'video_info': '/private/home/cinjon/Code/BSN-boundary-sensitive-network.pytorch/data/thumos14_annotations',                         'ccc_img_size': 128})
+                        'video_info': '/private/home/cinjon/Code/BSN-boundary-sensitive-network.pytorch/data/thumos14_annotations',
+                        'ccc_img_size': 128,
+                        # 'do_random_model': True
+            })
 
     if 'debugrun' not in mode:
         main(opt)
diff --git a/opts.py b/opts.py
@@ -68,6 +68,7 @@ def parse_opt():
     parser.add_argument('--ccc_img_size', type=int, default=256)
     parser.add_argument('--tsn_config', type=str, default='~/Code/BSN-boundary-sensitive-network.pytorch/representations/tsn/temp_tsn_rgb_bninception.py')
 
+    
     # PEM model settings
     parser.add_argument('--pem_feat_dim', type=int, default=32)
     parser.add_argument('--pem_hidden_dim', type=int, default=256)
@@ -143,7 +144,8 @@ def parse_opt():
     parser.add_argument('--do_augment', action='store_true')
     parser.add_argument('--do_representation', action='store_true')
     parser.add_argument('--do_feat_conversion', action='store_true')
-    parser.add_argument('--no_freeze', action='store_true', default=False)    
+    parser.add_argument('--no_freeze', action='store_true', default=False)
+    parser.add_argument('--do_random_model', action='store_true') 
     parser.add_argument('--do_gradient_checkpointing', action='store_true', default=False)    
     parser.add_argument(
         '--representation_module',
diff --git a/pem_jobs.py b/pem_jobs.py
@@ -30,8 +30,9 @@
 
 
 def run(find_counter=None):
-    counter = 950 # NOTE: adjust each time 451, 715, 750, 782, 814, 854
-    
+    counter = 1622 # NOTE: adjust each time 451, 715, 750, 782, 814, 854, 950, 1054, 1150, 1382
+
+    check = 0
     for tem_results_subdir in sorted(os.listdir(tem_results_dir)):
         # if counter - start_counter > 100:
         #     print('Stopping at %d' % counter)
@@ -98,12 +99,13 @@ def run(find_counter=None):
                                 __job['pem_lr_milestones'] = milestones
                                 __job['pem_step_gamma'] = pem_step_gamma
                                 __job['name'] = '%s-%05d' % (_job['name'], counter)
-                                
+
+                                check += 1
                                 if not find_counter:
                                     func(__job, counter, email, code_directory)
                                 elif counter == find_counter:
                                     return __job
-    print(counter) # ended w 782, 814, 854, 950, 1054
+    print(counter, check, check // 8) # ended w 782, 814, 854, 950, 1054, 1150, 1382, 1486, 1622
     
 if __name__ == '__main__':
     run()
diff --git a/representations/amdim/representation.py b/representations/amdim/representation.py
@@ -72,11 +72,11 @@ def __init__(self, opts):
         self.repr_layer1 = self.make_layer(ResidualBlock, channels, channels, 2, stride=2)
         self.repr_layer2 = self.make_layer(ResidualBlock, channels, channels, 2, stride=2)
         if opts['dataset'] == 'gymnastics':
-            self.fc_layer = nn.Linear(640, 400) # 2432
+            self.fc_layer = nn.Linear(640, 400) # 2432, 640
         elif opts['dataset'] == 'thumosimages':
-            self.fc_layer = nn.Linear(640, 400)
+            self.fc_layer = nn.Linear(640, 400) 
         elif opts['dataset'] == 'activitynet':
-            self.fc_layer = nn.Linear(2432, 400)
+            self.fc_layer = nn.Linear(640, 400)
 
     def forward(self, representation):
         # thumosimages shape representation is [bs*nf, 2560, 75]
diff --git a/representations/corrflow/representation.py b/representations/corrflow/representation.py
@@ -75,7 +75,7 @@ def __init__(self, opts):
         if opts['dataset'] == 'gymnastics':
             self.fc_layer = nn.Linear(7168, 400) # 7168, 3584
         elif opts['dataset'] == 'thumosimages':
-            self.fc_layer = nn.Linear(1920, 400) # 
+            self.fc_layer = nn.Linear(3840, 400) # 1920
         elif opts['dataset'] == 'activitynet':
             self.fc_layer = nn.Linear(2560, 400)
 
diff --git a/representations/resnet/model.py b/representations/resnet/model.py
diff --git a/representations/resnet/representation.py b/representations/resnet/representation.py
diff --git a/run_cifar_jobs.py b/run_cifar_jobs.py
diff --git a/tem_jobs.py b/tem_jobs.py