diff --git a/.gitignore b/.gitignore index 42337ed..03d0e4b 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,5 @@ +try_model.py + output* data/ ngc/ diff --git a/lib/fcn/config.py b/lib/fcn/config.py index fdb69d1..1146e2e 100644 --- a/lib/fcn/config.py +++ b/lib/fcn/config.py @@ -264,7 +264,7 @@ __C.TRAIN.EMBEDDING_LOSS_WEIGHT_NONMATCH = 1.0 __C.TRAIN.EMBEDDING_LOSS_WEIGHT_BACKGROUND = 1.0 -# region refinement network data processing +# Region refinement network data processing __C.TRAIN.max_augmentation_tries = 10 # Padding @@ -436,7 +436,7 @@ def cfg_from_file(filename): """Load a config file and merge it into the default options.""" import yaml with open(filename, 'r') as f: - yaml_cfg = edict(yaml.load(f)) + yaml_cfg = edict(yaml.load(f, Loader=yaml.Loader)) _merge_a_into_b(yaml_cfg, __C) diff --git a/lib/networks/SEG.py b/lib/networks/SEG.py index 13ed76b..dfc24a6 100644 --- a/lib/networks/SEG.py +++ b/lib/networks/SEG.py @@ -129,32 +129,32 @@ def bias_parameters(self): def update_model(model, data): model_dict = model.state_dict() - print('model keys') - print('=================================================') - for k, v in model_dict.items(): - print(k) - print('=================================================') + # print('model keys') + # print('=================================================') + # for k, v in model_dict.items(): + # print(k) + # print('=================================================') if data is not None: - print('data keys') - print('=================================================') + # print('data keys') + # print('=================================================') data_new = data.copy() for k, v in data.items(): - print(k) + # print(k) # legency with the orignially trained model if 'module.' in k: data_new[k[7:]] = v if 'decoder.features.' in k: new_key = 'decoder.' + k[17:] data_new[new_key] = v - print('=================================================') + # print('=================================================') pretrained_dict = {k: v for k, v in data_new.items() if k in model_dict and v.size() == model_dict[k].size()} - print('load the following keys from the pretrained model') - print('=================================================') - for k, v in pretrained_dict.items(): - print(k) - print('=================================================') + # print('load the following keys from the pretrained model') + # print('=================================================') + # for k, v in pretrained_dict.items(): + # print(k) + # print('=================================================') model_dict.update(pretrained_dict) model.load_state_dict(model_dict) diff --git a/lib/networks/resnet.py b/lib/networks/resnet.py index 6e0f9d2..2980c5b 100644 --- a/lib/networks/resnet.py +++ b/lib/networks/resnet.py @@ -294,25 +294,25 @@ def resnet18(pretrained=False, **kwargs): def update_model(model, data): model_dict = model.state_dict() - print('model keys') - print('=================================================') - for k, v in model_dict.items(): - print(k) - print('=================================================') + # print('model keys') + # print('=================================================') + # for k, v in model_dict.items(): + # print(k) + # print('=================================================') if data is not None: - print('data keys') - print('=================================================') - for k, v in data.items(): - print(k) - print('=================================================') + # print('data keys') + # print('=================================================') + # for k, v in data.items(): + # print(k) + # print('=================================================') pretrained_dict = {k: v for k, v in data.items() if k in model_dict and v.size() == model_dict[k].size()} - print('load the following keys from the pretrained model') - print('=================================================') - for k, v in pretrained_dict.items(): - print(k) - print('=================================================') + # print('load the following keys from the pretrained model') + # print('=================================================') + # for k, v in pretrained_dict.items(): + # print(k) + # print('=================================================') model_dict.update(pretrained_dict) model.load_state_dict(model_dict) @@ -336,8 +336,8 @@ def resnet34(pretrained=False, **kwargs): # model.load_state_dict(model_zoo.load_url(model_urls['resnet34'])) data = model_zoo.load_url(model_urls['resnet34']) update_model(model, data) - else: - print('=============no pretrained weights===============') + # else: + # print('=============no pretrained weights===============') return model diff --git a/lib/utils/mean_shift.py b/lib/utils/mean_shift.py index 7630293..f3cd779 100644 --- a/lib/utils/mean_shift.py +++ b/lib/utils/mean_shift.py @@ -51,7 +51,7 @@ def connected_components(Z, epsilon, metric='cosine'): n, d = Z.shape K = 0 - cluster_labels = torch.ones(n, dtype=torch.long) * -1 + cluster_labels = torch.ones(n, dtype=torch.long, device=Z.device) * -1 for i in range(n): if cluster_labels[i] == -1: @@ -64,7 +64,7 @@ def connected_components(Z, epsilon, metric='cosine'): # If at least one component already has a label, then use the mode of the label if torch.unique(cluster_labels[component_seeds]).shape[0] > 1: - temp = cluster_labels[component_seeds].numpy() + temp = cluster_labels[component_seeds].to(device='cpu').numpy() temp = temp[temp != -1] label = torch.tensor(get_label_mode(temp)) else: diff --git a/model.py b/model.py new file mode 100644 index 0000000..96eedc3 --- /dev/null +++ b/model.py @@ -0,0 +1,192 @@ +#!/usr/bin/env python3 + +# Copyright (c) 2020 NVIDIA Corporation. All rights reserved. +# This work is licensed under the NVIDIA Source Code License - Non-commercial. Full +# text can be found in LICENSE.md + +"""Test a PoseCNN on images""" + +from typing import TypedDict + +import torch +import torch.nn.parallel +import torch.backends.cudnn as cudnn +import torch.utils.data +import torch.nn as nn + +import argparse +import pprint +import time, os, sys +import os.path as osp +import numpy as np +import cv2 +import scipy.io +import glob +import json + +# import tools._init_paths as _init_paths +# from lib.fcn.test_dataset import test_sample +# from lib.fcn.config import cfg, cfg_from_file, get_output_dir +# import lib.networks as networks +# from lib.utils.blob import pad_im +# from lib.utils import mask as util_ +import tools._init_paths +from fcn.test_dataset import test_sample +from fcn.config import cfg, cfg_from_file, get_output_dir +import networks +from utils.blob import pad_im +from utils import mask as util_ + +RGB_Image_np = np.ndarray +DEPTH_Image_np = np.ndarray +RGB_Image = torch.Tensor +DEPTH_Image = torch.Tensor +VertexMap = torch.Tensor +class RGB_and_VertexMap(TypedDict): + image_color: RGB_Image + depth: VertexMap + +class UnseenObjectClustering(nn.Module): + + def __init__( + self, + gpu_id: int = 0, #GPU id to use + pretrained: str = "data/checkpoints/seg_resnet34_8s_embedding_cosine_rgbd_add_sampling_epoch_16.checkpoint.pth", #Initialize with pretrained checkpoint + pretrained_crop: str = "data/checkpoints/seg_resnet34_8s_embedding_cosine_rgbd_add_crop_sampling_epoch_16.checkpoint.pth", #Initialize with pretrained checkpoint for crops + camera_params_filename: str = None, #Directory of where the camera-parameters json is + cfg_file: str = "experiments/cfgs/seg_resnet34_8s_embedding_cosine_rgbd_add_tabletop.yml", #Optional config file + # dataset_name: str = 'shapenet_scene_train', #Dataset to train on + randomize: bool = False, #Whether or not to randomize (do not use a fixed seed) + network_name = 'seg_resnet34_8s_embedding', #Network + # image_path = None, ################, + visualize: bool = False, #Whether to visualise the segmentations or not + ) -> None: + + super(UnseenObjectClustering, self).__init__() + + self.cfg = cfg + if cfg_file is not None: + self.cfg.NANA = "yess" + cfg_from_file(cfg_file) + + if len(self.cfg.TEST.CLASSES) == 0: + self.cfg.TEST.CLASSES = self.cfg.TRAIN.CLASSES + self.cfg.TRAIN.EMBEDDING_METRIC = 'cosine' + # print('Using config:') + # pprint.pprint(cfg) + + if not randomize: + # fix the random seeds (numpy and caffe) for reproducibility + np.random.seed(cfg.RNG_SEED) + + # device + self.cfg.gpu_id = 0 + self.cfg.device = torch.device('cuda:{:d}'.format(self.cfg.gpu_id)) + self.cfg.instance_id = 0 + num_classes = 2 + self.cfg.MODE = 'TEST' + # cfg.TEST.VISUALIZE = visualize + print('GPU device {:d}'.format(gpu_id)) + + # check if intrinsics available + if os.path.exists(camera_params_filename): + with open(camera_params_filename) as f: + self.camera_params = json.load(f) + else: + self.camera_params = None + raise Warning("The camera parameters were not given") + + # prepare network + if pretrained: + network_data = torch.load(pretrained) + # print("=> using pre-trained network '{}'".format(pretrained)) + else: + network_data = None + print("no pretrained network specified") + sys.exit() + + self.network = networks.__dict__[network_name](num_classes, self.cfg.TRAIN.NUM_UNITS, network_data).cuda(device=self.cfg.device) + self.network = torch.nn.DataParallel(self.network, device_ids=[self.cfg.gpu_id]).cuda(device=self.cfg.device) + cudnn.benchmark = True + self.network.eval() + + if pretrained_crop: + network_data_crop = torch.load(pretrained_crop) + self.network_crop = networks.__dict__[network_name](num_classes, self.cfg.TRAIN.NUM_UNITS, network_data_crop).cuda(device=self.cfg.device) + self.network_crop = torch.nn.DataParallel(self.network_crop, device_ids=[self.cfg.gpu_id]).cuda(device=self.cfg.device) + self.network_crop.eval() + else: + self.network_crop = None + + # if cfg.TEST.VISUALIZE: + # index_images = np.random.permutation(len(images_color)) + # else: + # index_images = range(len(images_color)) + + def compute_xyz(self, depth_img, fx, fy, px, py, height, width) -> VertexMap: + indices = util_.build_matrix_of_indices(height, width) + z_e = depth_img + x_e = (indices[..., 1] - px) * z_e / fx + y_e = (indices[..., 0] - py) * z_e / fy + xyz_img = np.stack([x_e, y_e, z_e], axis=-1) # Shape: [H x W x 3] + return xyz_img + + def read_sample(self, rgb: RGB_Image_np, depth: DEPTH_Image_np, camera_params) -> RGB_and_VertexMap: + # bgr image + bgr_permute = [2, 1, 0] + bgr = rgb[..., bgr_permute] + + if self.cfg.INPUT == 'DEPTH' or self.cfg.INPUT == 'RGBD': + # depth image + # depth_img = cv2.imread(filename_depth, cv2.IMREAD_ANYDEPTH) + depth = depth.astype(np.float32) / 1000.0 + + height = depth.shape[0] + width = depth.shape[1] + fx = camera_params['fx'] + fy = camera_params['fy'] + px = camera_params['x_offset'] + py = camera_params['y_offset'] + xyz_img = self.compute_xyz(depth, fx, fy, px, py, height, width) + else: + xyz_img = None + + im_tensor = torch.from_numpy(bgr) / 255.0 + pixel_mean = torch.tensor(self.cfg.PIXEL_MEANS / 255.0).float() + im_tensor -= pixel_mean + image_blob = im_tensor.permute(2, 0, 1) + sample = {'image_color': image_blob.unsqueeze(0)} + + if self.cfg.INPUT == 'DEPTH' or self.cfg.INPUT == 'RGBD': + depth_blob = torch.from_numpy(xyz_img).permute(2, 0, 1) + sample['depth'] = depth_blob.unsqueeze(0) + + return sample + + def forward( + self, + rgb_img: RGB_Image_np, + depth_img: DEPTH_Image_np, + ): + assert rgb_img.shape[-1] == 3, "The rgb image must have the 3 channels at the end" + assert rgb_img.shape[:-1] == depth_img.shape, f"The rgb and depth images have different shapes, {rgb_img.shape} and {depth_img.shape} respectively" + + single_image = False + if len(rgb_img.shape) == 3: + rgb_img = np.expand_dims(rgb_img, axis=0) + depth_img = np.expand_dims(depth_img, axis=0) + single_image = True + + n_images = rgb_img.shape[0] + predictions = [] + + for i in range(n_images): + sample = self.read_sample(rgb_img[i], depth_img[i], self.camera_params) + torch.cuda.synchronize() + torch.cuda.empty_cache() + out_label, out_label_refined = test_sample(sample, self.network, self.network_crop) + predictions.append(out_label_refined) + + if single_image: + return predictions[0] + return predictions diff --git a/requirement.txt b/requirement.txt index 18e03c5..0f23e6e 100644 --- a/requirement.txt +++ b/requirement.txt @@ -1,3 +1,4 @@ +Cython opencv-python == 4.2.0.34 transforms3d pillow @@ -7,5 +8,5 @@ easydict pyyaml future scipy -python-pcl +# python-pcl imageio