diff --git a/environment.yml b/environment.yml index 966341f..73246f0 100644 --- a/environment.yml +++ b/environment.yml @@ -1,13 +1,22 @@ name: mvbreid +channels: + - pytorch + # - conda-forge dependencies: -- python=3.7 +- python>3.7 - numpy - Cython +- matplotlib +- tensorboardX +- opencv - h5py - Pillow - six - scipy - pip - gcc_linux-64 +- pytorch +- torchvision +- cudatoolkit=10.0 - pip: - comet_ml \ No newline at end of file diff --git a/scripts/default_parser.py b/scripts/default_parser.py index 6796c39..7979926 100644 --- a/scripts/default_parser.py +++ b/scripts/default_parser.py @@ -168,8 +168,6 @@ def init_parser(): help='cmc ranks') parser.add_argument('--rerank', action='store_true', help='use person re-ranking (by Zhong et al. CVPR2017)') - parser.add_argument('--combine-method', type=str, default='none', choices=["none", "mean"], - help='use combine method of [none | mean]') parser.add_argument('--visrank', action='store_true', help='visualize ranked results, only available in evaluation mode') @@ -181,6 +179,11 @@ def init_parser(): help='visualize k CAMs') parser.add_argument('--viscam-only', action='store_true', help='no CMC or mAP calculation, for debugging') + parser.add_argument('--combine-method', type=str, default='none', + choices=["none", "mean", "feed_forward", "self_attention"], + help='which combine method to use') + parser.add_argument('--save-embed', type=str, + help='location to save combined embeddings and pids') # ************************************************************ # Miscs diff --git a/scripts/main.py b/scripts/main.py index defef76..a21b953 100755 --- a/scripts/main.py +++ b/scripts/main.py @@ -61,7 +61,8 @@ def build_engine(args, datamanager, model, optimizer, scheduler, experiment=expe use_cpu=args.use_cpu, label_smooth=args.label_smooth, experiment=experiment, - combine_method=args.combine_method + combine_method=args.combine_method, + save_embed=args.save_embed ) else: engine = torchreid.engine.ImageTripletEngine( @@ -75,7 +76,8 @@ def build_engine(args, datamanager, model, optimizer, scheduler, experiment=expe use_cpu=args.use_cpu, label_smooth=args.label_smooth, experiment=experiment, - combine_method=args.combine_method + combine_method=args.combine_method, + save_embed=args.save_embed ) else: diff --git a/setup.py b/setup.py index 183a748..7dbd20c 100644 --- a/setup.py +++ b/setup.py @@ -5,7 +5,7 @@ def readme(): - with open('README.rst') as f: + with open('README.md') as f: content = f.read() return content diff --git a/torchreid/engine/engine.py b/torchreid/engine/engine.py index 3fc6f4c..5171c65 100644 --- a/torchreid/engine/engine.py +++ b/torchreid/engine/engine.py @@ -13,7 +13,7 @@ from torch.nn import functional as F import torchreid -from torchreid.utils import AverageMeter, visualize_ranked_results, visualize_cam, save_checkpoint, re_ranking, combine_by_id +from torchreid.utils import AverageMeter, visualize_ranked_results, visualize_cam, save_checkpoint, re_ranking, CombineMultipleImages from torchreid.losses import DeepSupervision from torchreid import metrics @@ -32,7 +32,8 @@ class Engine(object): use_cpu (bool, optional): use cpu. Default is False. """ - def __init__(self, datamanager, model, optimizer=None, scheduler=None, use_cpu=False, experiment=None, combine_method="mean"): + def __init__(self, datamanager, model, optimizer=None, scheduler=None, use_cpu=False, + experiment=None, combine_method="mean", save_embed=None): self.datamanager = datamanager self.model = model self.optimizer = optimizer @@ -41,6 +42,7 @@ def __init__(self, datamanager, model, optimizer=None, scheduler=None, use_cpu=F self.writer = None self.experiment = experiment self.combine_method = combine_method + self.save_embed = save_embed # check attributes if not isinstance(self.model, nn.Module): @@ -83,6 +85,9 @@ def run(self, arch, save_dir='log', max_epoch=0, start_epoch=0, fixbase_epoch=0, Default is False. This is only enabled when test_only=True. """ trainloader, testloader = self.datamanager.return_dataloaders() + gallery_cam_num = 3 + self.combine_fn = CombineMultipleImages(self.combine_method, self.model.module.feature_dim, + gallery_cam_num, trainloader, self.model) self.test_only = test_only if not test_only: @@ -263,6 +268,7 @@ def _evaluate(self, arch, epoch, dataset_name='', queryloader=None, galleryloade with self.experiment.test(): if not viscam_only: batch_time = AverageMeter() + combine_time = AverageMeter() self.model.eval() @@ -274,7 +280,7 @@ def _evaluate(self, arch, epoch, dataset_name='', queryloader=None, galleryloade imgs = imgs.cuda() end = time.time() features = self._extract_features(imgs) - batch_time.update(time.time() - end) + batch_time.update(time.time() - end, len(pids), True) features = features.data.cpu() qf.append(features) q_pids.extend(pids) @@ -293,7 +299,7 @@ def _evaluate(self, arch, epoch, dataset_name='', queryloader=None, galleryloade imgs = imgs.cuda() end = time.time() features = self._extract_features(imgs) - batch_time.update(time.time() - end) + batch_time.update(time.time() - end, len(pids), True) features = features.data.cpu() gf.append(features) g_pids.extend(pids) @@ -302,20 +308,21 @@ def _evaluate(self, arch, epoch, dataset_name='', queryloader=None, galleryloade g_pids = np.asarray(g_pids) g_camids = np.asarray(g_camids) - # gf = gf.numpy() - # unique_ids = set(g_pids) - # new_g_pids = [] - # gf_by_id = np.empty((len(unique_ids), gf.shape[-1])) - # for i, gid in enumerate(unique_ids): - # gf_by_id[i] = np.mean(gf[np.asarray(g_pids) == gid], axis=0) - # new_g_pids.append(gid) - # gf = torch.tensor(gf_by_id, dtype=torch.float) - # g_pids = np.array(new_g_pids) - - gf, g_pids = combine_by_id(gf, g_pids, self.combine_method) + end = time.time() + num_images = len(g_pids) + self.combine_fn.train() + gf, g_pids = self.combine_fn(gf, g_pids, g_camids) + if self.save_embed: + assert osp.isdir(self.save_embed) + path = osp.realpath(self.save_embed) + np.save(path + '/gf-' + self.combine_method + '.npy', gf) + np.save(path + '/g_pids-' + self.combine_method + '.npy', g_pids) + combine_time.update(time.time() - end, num_images, True) + time.time() - end + gf = torch.tensor(gf, dtype=torch.float) print('Done, obtained {}-by-{} matrix'.format(gf.size(0), gf.size(1))) - print('Speed: {:.4f} sec/batch'.format(batch_time.avg)) + print('Speed: {:.4f} sec/image'.format(batch_time.avg + combine_time.avg)) if normalize_feature: print('Normalzing features with L2 norm ...') diff --git a/torchreid/engine/image/softmax.py b/torchreid/engine/image/softmax.py index facaacc..77b5dda 100644 --- a/torchreid/engine/image/softmax.py +++ b/torchreid/engine/image/softmax.py @@ -63,8 +63,8 @@ class ImageSoftmaxEngine(engine.Engine): """ def __init__(self, datamanager, model, optimizer, scheduler=None, use_cpu=False, - label_smooth=True, experiment=None, combine_method="mean"): - super(ImageSoftmaxEngine, self).__init__(datamanager, model, optimizer, scheduler, use_cpu, experiment, combine_method) + label_smooth=True, experiment=None, combine_method="mean", save_embed=None): + super(ImageSoftmaxEngine, self).__init__(datamanager, model, optimizer, scheduler, use_cpu, experiment, combine_method, save_embed) self.criterion = CrossEntropyLoss( num_classes=self.datamanager.num_train_pids, diff --git a/torchreid/engine/image/triplet.py b/torchreid/engine/image/triplet.py index abf5ead..0fcbf7f 100644 --- a/torchreid/engine/image/triplet.py +++ b/torchreid/engine/image/triplet.py @@ -70,8 +70,8 @@ class ImageTripletEngine(engine.Engine): def __init__(self, datamanager, model, optimizer, margin=0.3, weight_t=1, weight_x=1, scheduler=None, use_cpu=False, - label_smooth=True, experiment=None, combine_method="mean"): - super(ImageTripletEngine, self).__init__(datamanager, model, optimizer, scheduler, use_cpu, experiment, combine_method) + label_smooth=True, experiment=None, combine_method="mean", save_embed=None): + super(ImageTripletEngine, self).__init__(datamanager, model, optimizer, scheduler, use_cpu, experiment, combine_method, save_embed) self.weight_t = weight_t self.weight_x = weight_x diff --git a/torchreid/models/osnet.py b/torchreid/models/osnet.py index bafe09c..a678d54 100644 --- a/torchreid/models/osnet.py +++ b/torchreid/models/osnet.py @@ -210,6 +210,7 @@ def __init__(self, num_classes, blocks, layers, channels, feature_dim=512, loss= assert num_blocks == len(layers) assert num_blocks == len(channels) - 1 self.loss = loss + self.feature_dim = feature_dim # convolutional backbone self.conv1 = ConvLayer(3, channels[0], 7, stride=2, padding=3, IN=IN) diff --git a/torchreid/utils/__init__.py b/torchreid/utils/__init__.py index 01e6769..7d5914b 100644 --- a/torchreid/utils/__init__.py +++ b/torchreid/utils/__init__.py @@ -7,5 +7,5 @@ from .reidtools import * from .torchtools import * from .rerank import re_ranking -from .multi_image import combine_by_id +from .multi_image import CombineMultipleImages from .model_complexity import compute_model_complexity diff --git a/torchreid/utils/avgmeter.py b/torchreid/utils/avgmeter.py index f476d82..30bde98 100644 --- a/torchreid/utils/avgmeter.py +++ b/torchreid/utils/avgmeter.py @@ -22,8 +22,11 @@ def reset(self): self.sum = 0 self.count = 0 - def update(self, val, n=1): + def update(self, val, n=1, individual=False): self.val = val - self.sum += val * n + if individual: + self.sum += val * n + else: + self.sum += val self.count += n self.avg = self.sum / self.count \ No newline at end of file diff --git a/torchreid/utils/multi_image.py b/torchreid/utils/multi_image.py index 0c04b38..892ffcb 100644 --- a/torchreid/utils/multi_image.py +++ b/torchreid/utils/multi_image.py @@ -1,16 +1,47 @@ import numpy as np import torch -__all__ = ['combine_by_id'] +from torch import nn +__all__ = ['CombineMultipleImages'] -def combine_by_id(gf, g_pids, method): + +class CombineMultipleImages: """ - transforms features of same bag to a bag embedding + Both returned gf and g_pids are numpy array of float32 """ - if method == "none": - print("Does not combine by id") + def __init__(self, method, embed_dim, input_count, trainloader, encoder): + self.encoder = encoder + self.trainloader = trainloader + if method == "none": + self.fn = Identity() + elif method == "mean": + self.fn = Mean() + elif method == "feed_forward": + self.fn = FeedForward(embed_dim, input_count) + elif method == "self_attention": + self.fn = SelfAttention(embed_dim, input_count) + + def train(self): + self.fn.train(self.encoder, self.trainloader) + + def __call__(self, gf, g_pids, g_camids): + return self.fn(gf, g_pids, g_camids) + + +class CombineFunction: + def train(self, encoder, dataloader): + pass + + def __call__(self, gf, g_pids, g_camids): + raise NotImplementedError + + +class Identity(CombineFunction): + def __call__(self, gf, g_pids, g_camids): return gf, g_pids - elif method == "mean": - print("Calculating mean by id ...") + + +class Mean(CombineFunction): + def __call__(self, gf, g_pids, g_camids): gf = gf.numpy() unique_ids = set(g_pids) new_g_pids = [] @@ -18,14 +49,56 @@ def combine_by_id(gf, g_pids, method): for i, gid in enumerate(unique_ids): gf_by_id[i] = np.mean(gf[np.asarray(g_pids) == gid], axis=0) new_g_pids.append(gid) - gf = torch.tensor(gf_by_id, dtype=torch.float) + gf = np.array(gf_by_id) g_pids = np.array(new_g_pids) return gf, g_pids - elif method == "self_attention": - # TODO: self attention - return distmat - elif method == "multi_head_attention": - # TODO: multi-headed attention - return distmat - else: - raise ValueError('Must be valid combine-method') + + +class FeedForward(CombineFunction): # TODO: + def __init__(self, embed_dim, input_count): + super().__init__() + self.model = FeedForwardNN(embed_dim, input_count) + + def train(self, encoder, dataloader): + for data in dataloader: + imgs = data[0] + pids = data[1] + cam_ids = data[2] + # print(len(data)) + # exit() + + def __call__(self, gf, g_pids, g_camids): + result = self.model(gf, g_pids, g_camids) + # Some modification on result + return result + + +class SelfAttention(CombineFunction): + def __init__(self, embed_dim, input_count): + self.model = SelfAttentionNN(input_dim, output_dim, input_count) + + def train(self, dataloader): + pass + + def __call__(self, gf, g_pids, g_camids): + result = self.model(gf, g_pids, g_camids) + # Some modification on result + return result + + +class FeedForwardNN(nn.Module): + def __init__(self, embed_dim, input_count): + super().__init__() + self.fc1 = nn.Linear(embed_dim * input_count, embed_dim * input_count) + self.fc2 = nn.Linear(embed_dim * input_count, embed_dim) + + def forward(self, x): + pass + + +class SelfAttentionNN(nn.Module): + def __init__(self, embed_dim, input_count): + super().__init__() + + def forward(self, x): + pass