Update I3D detector

KeithKKRR · KeithKKRR · commit ad2d546983f6 · 2024-10-14T14:54:02.000+08:00
Clean code format and update config file for training
diff --git a/training/config/detector/i3d.yaml b/training/config/detector/i3d.yaml
@@ -1,37 +1,25 @@
-# log dir 
-log_dir: /data/home/zhiyuanyan/logs/i3d
-
 # model setting
-pretrained: /data/home/zhiyuanyan/torch_ckpts/I3D_8x8_R50.pth   # path to a pre-trained model, if using one
+pretrained: training/pretrained/I3D_8x8_R50.pth   # path to a pre-trained model, if using one
 model_name: i3d   # model name
-backbone_name: xception  # backbone name
-
-#backbone setting
-backbone_config:
-  mode: original
-  num_classes: 1
-  inc: 3
-  dropout: false
 
 # dataset
 all_dataset: [FaceForensics++, FF-F2F, FF-DF, FF-FS, FF-NT, FaceShifter, DeepFakeDetection, Celeb-DF-v1, Celeb-DF-v2, DFDCP, DFDC, DeeperForensics-1.0, UADFV]
 train_dataset: [FaceForensics++]
 test_dataset: [Celeb-DF-v2]
 
 compression: c23  # compression-level for videos
-train_batchSize: 8   # training batch size
-test_batchSize: 8   # test batch size
+train_batchSize: 32   # training batch size
+test_batchSize: 32   # test batch size
 workers: 8   # number of data loading workers
 frame_num: {'train': 32, 'test': 32}   # number of frames to use per video in training and testing
 resolution: 224   # resolution of output image to network
 with_mask: false   # whether to include mask information in the input
 with_landmark: false   # whether to include facial landmark information in the input
 video_mode: True  # whether to use video-level data
-clip_size: 16  # number of frames in each clip
-
+clip_size: 8  # number of frames in each clip
 
 # data augmentation
-use_data_augmentation: true  # Add this flag to enable/disable data augmentation
+use_data_augmentation: false  # Add this flag to enable/disable data augmentation
 data_aug:
   flip_prob: 0.5
   rotate_prob: 0.5
@@ -45,8 +33,8 @@ data_aug:
   quality_upper: 100
 
 # mean and std for normalization
-mean: [0.5, 0.5, 0.5]
-std: [0.5, 0.5, 0.5]
+mean: [0.485, 0.456, 0.406]
+std: [0.229, 0.224, 0.225]
 
 # optimizer config
 optimizer:
@@ -66,7 +54,7 @@ optimizer:
 
 # training config
 lr_scheduler: null   # learning rate scheduler
-nEpochs: 30   # number of epochs to train for
+nEpochs: 100   # number of epochs to train for
 start_epoch: 0   # manual epoch number (useful for restarts)
 save_epoch: 1   # interval epochs for saving models
 rec_iter: 100   # interval iterations for recording
@@ -83,6 +71,5 @@ losstype: null
 metric_scoring: auc   # metric for evaluation (auc, acc, eer, ap)
 
 # cuda
-
 cuda: true   # whether to use CUDA acceleration
 cudnn: true   # whether to use CuDNN for convolution operations
diff --git a/training/detectors/i3d_detector.py b/training/detectors/i3d_detector.py
@@ -57,7 +57,6 @@
 OUTPUT_DIR: .
 """
 
-
 '''
 # author: Zhiyuan Yan
 # email: zhiyuanyan@link.cuhk.edu.cn
@@ -85,31 +84,16 @@
 }
 '''
 
-import os
-import datetime
 import logging
-import numpy as np
-from sklearn import metrics
-from typing import Union
-from collections import defaultdict
-
-import torch
-import torch.nn as nn
-import torch.nn.functional as F
-import torch.optim as optim
-from torch.nn import DataParallel
-from torch.utils.tensorboard import SummaryWriter
-
-from metrics.base_metrics_class import calculate_metrics_for_train
+import os
+import sys
 
-from .base_detector import AbstractDetector
 from detectors import DETECTOR
-from networks import BACKBONE
 from loss import LOSSFUNC
+from metrics.base_metrics_class import calculate_metrics_for_train
 
+from .base_detector import AbstractDetector
 
-import os
-import sys
 current_file_path = os.path.abspath(__file__)
 parent_dir = os.path.dirname(os.path.dirname(current_file_path))
 project_root_dir = os.path.dirname(parent_dir)
@@ -120,14 +104,13 @@
 from .utils.slowfast.models.video_model_builder import ResNet as ResNetOri
 from .utils.slowfast.config.defaults import get_cfg
 from torch import nn
-import random
-
 
 random_select = True
 no_time_pool = True
 
 logger = logging.getLogger(__name__)
 
+
 @DETECTOR.register_module(module_name='i3d')
 class I3DDetector(AbstractDetector):
     def __init__(self, config):
@@ -137,7 +120,7 @@ def __init__(self, config):
         cfg.NUM_GPUS = 1
         cfg.TEST.BATCH_SIZE = 1
         cfg.TRAIN.BATCH_SIZE = 1
-        cfg.DATA.NUM_FRAMES = 16
+        cfg.DATA.NUM_FRAMES = config['clip_size']
         self.resnet = ResNetOri(cfg)
         if config['pretrained'] is not None:
             print(f"loading pretrained model from {config['pretrained']}")
@@ -150,45 +133,44 @@ def __init__(self, config):
             self.resnet.load_state_dict(modified_weights, strict=True)
 
         self.loss_func = nn.BCELoss()  # The output of the model is a probability value between 0 and 1 (haved used sigmoid)
-    
+
     def build_backbone(self, config):
         pass
-    
+
     def build_loss(self, config):
         # prepare the loss function
         loss_class = LOSSFUNC[config['loss_func']]
         loss_func = loss_class()
         return loss_func
-    
+
     def features(self, data_dict: dict) -> torch.tensor:
-        inputs = [data_dict['image'].permute(0,2,1,3,4)]
+        inputs = [data_dict['image'].permute(0, 2, 1, 3, 4)]
         pred = self.resnet(inputs)
-        output = {}
-        output["final_output"] = pred
+        output = {"final_output": pred}
+
         return output["final_output"]
 
     def classifier(self, features: torch.tensor):
         pass
-    
+
     def get_losses(self, data_dict: dict, pred_dict: dict) -> dict:
         label = data_dict['label'].float()
         pred = pred_dict['cls'].view(-1)
         loss = self.loss_func(pred, label)
         loss_dict = {'overall': loss}
+
         return loss_dict
-    
+
     def get_train_metrics(self, data_dict: dict, pred_dict: dict) -> dict:
         label = data_dict['label']
         pred = pred_dict['cls']
-        # compute metrics for batch data
         auc, eer, acc, ap = calculate_metrics_for_train(label.detach(), pred.detach())
         metric_batch_dict = {'acc': acc, 'auc': auc, 'eer': eer, 'ap': ap}
+
         return metric_batch_dict
 
     def forward(self, data_dict: dict, inference=False) -> dict:
-        # get the probability
         prob = self.features(data_dict)
-        # build the prediction dict for each output
         pred_dict = {'cls': prob, 'prob': prob, 'feat': prob}
-        
+
         return pred_dict