nepfaff · evelyn-fu · Apr 9, 2025 · Apr 9, 2025 · Apr 9, 2025 · Apr 10, 2025
diff --git a/.gitignore b/.gitignore
@@ -5,3 +5,4 @@ checkpoints
 __pycache__
 outputs
 .vscode
+~/
diff --git a/README.md b/README.md
@@ -108,6 +108,30 @@ automatic annotations using DINO work well in many cases but can struggle with t
 gripper masks. All downstream object tracking and reconstruction results are sensitive
 to the segmentation quality and thus spending a bit of effort here might be worthwhile.
 
+##### Gripper masking with fine tuned models
+We provide fine tuned networks for SAM2 and GroundingDINO for the segmentation and annotation
+of the gripper used in our provided dataset which can be downloaded from [here](https://mitprod-my.sharepoint.com/personal/nepfaff_mit_edu/_layouts/15/onedrive.aspx?id=%2Fpersonal%2Fnepfaff%5Fmit%5Fedu%2FDocuments%2Fscalable%5Freal2sim%5Fmodel%5Fweights&ga=1).
+
+Please put the downloaded checkpoint files in the `./checkpoints` directory.
+We used mmdetection's implementation to fine tune Grounding DINO. Please see the 
+[mmdetection Official Github](https://github.com/open-mmlab/mmdetection/tree/main)
+for installation instructions. 
+
+When `--txt_prompt` is set to `gripper`, the segmentation script will use the gripper fine tuned
+models for annotation and segmentation.
+
+To fine tune your own object detection model for your gripper, see [these instructions](https://github.com/open-mmlab/mmdetection/blob/main/configs/grounding_dino/README.md)
+from the mmdetection Official Github.
+
+To fine tune your own segmentation model for your gripper, see [these instructions](https://github.com/facebookresearch/sam2/blob/main/training/README.md) for training from the
+SAM2 Official Github.
+
+An example of segmentation failure on the gripper with default models: \
+<img src="assets/mask_sam2_default.png" width="200"> 
+
+Gripper segmentation on the same image with custom models: \
+<img src="assets/mask_sam2_custom.png" width="200"> 
+
 ### Submodules
 
 #### robot_payload_id

diff --git a/assets/mask_sam2_custom.png b/assets/mask_sam2_custom.png
diff --git a/assets/mask_sam2_default.png b/assets/mask_sam2_default.png
diff --git a/configs/coco_detection.py b/configs/coco_detection.py
@@ -0,0 +1,102 @@
+# This configuration file is taken from https://github.com/open-mmlab/mmdetection/tree/main/configs
+
+# dataset settings
+dataset_type = "CocoDataset"
+data_root = "data/coco/"
+
+# Example to use different file client
+# Method 1: simply set the data root and let the file I/O module
+# automatically infer from prefix (not support LMDB and Memcache yet)
+
+# data_root = 's3://openmmlab/datasets/detection/coco/'
+
+# Method 2: Use `backend_args`, `file_client_args` in versions before 3.0.0rc6
+# backend_args = dict(
+#     backend='petrel',
+#     path_mapping=dict({
+#         './data/': 's3://openmmlab/datasets/detection/',
+#         'data/': 's3://openmmlab/datasets/detection/'
+#     }))
+backend_args = None
+
+train_pipeline = [
+    dict(type="LoadImageFromFile", backend_args=backend_args),
+    dict(type="LoadAnnotations", with_bbox=True),
+    dict(type="Resize", scale=(1333, 800), keep_ratio=True),
+    dict(type="RandomFlip", prob=0.5),
+    dict(type="PackDetInputs"),
+]
+test_pipeline = [
+    dict(type="LoadImageFromFile", backend_args=backend_args),
+    dict(type="Resize", scale=(1333, 800), keep_ratio=True),
+    # If you don't have a gt annotation, delete the pipeline
+    dict(type="LoadAnnotations", with_bbox=True),
+    dict(
+        type="PackDetInputs",
+        meta_keys=("img_id", "img_path", "ori_shape", "img_shape", "scale_factor"),
+    ),
+]
+train_dataloader = dict(
+    batch_size=2,
+    num_workers=2,
+    persistent_workers=True,
+    sampler=dict(type="DefaultSampler", shuffle=True),
+    batch_sampler=dict(type="AspectRatioBatchSampler"),
+    dataset=dict(
+        type=dataset_type,
+        data_root=data_root,
+        ann_file="annotations/instances_train2017.json",
+        data_prefix=dict(img="train2017/"),
+        filter_cfg=dict(filter_empty_gt=True, min_size=32),
+        pipeline=train_pipeline,
+        backend_args=backend_args,
+    ),
+)
+val_dataloader = dict(
+    batch_size=1,
+    num_workers=2,
+    persistent_workers=True,
+    drop_last=False,
+    sampler=dict(type="DefaultSampler", shuffle=False),
+    dataset=dict(
+        type=dataset_type,
+        data_root=data_root,
+        ann_file="annotations/instances_val2017.json",
+        data_prefix=dict(img="val2017/"),
+        test_mode=True,
+        pipeline=test_pipeline,
+        backend_args=backend_args,
+    ),
+)
+test_dataloader = val_dataloader
+
+val_evaluator = dict(
+    type="CocoMetric",
+    ann_file=data_root + "annotations/instances_val2017.json",
+    metric="bbox",
+    format_only=False,
+    backend_args=backend_args,
+)
+test_evaluator = val_evaluator
+
+# inference on test dataset and
+# format the output results for submission.
+# test_dataloader = dict(
+#     batch_size=1,
+#     num_workers=2,
+#     persistent_workers=True,
+#     drop_last=False,
+#     sampler=dict(type='DefaultSampler', shuffle=False),
+#     dataset=dict(
+#         type=dataset_type,
+#         data_root=data_root,
+#         ann_file=data_root + 'annotations/image_info_test-dev2017.json',
+#         data_prefix=dict(img='test2017/'),
+#         test_mode=True,
+#         pipeline=test_pipeline))
+# test_evaluator = dict(
+#     type='CocoMetric',
+#     metric='bbox',
+#     format_only=True,
+#     ann_file=data_root + 'annotations/image_info_test-dev2017.json',
+#     outfile_prefix='./work_dirs/coco_detection/test')
diff --git a/configs/default_runtime.py b/configs/default_runtime.py
@@ -0,0 +1,28 @@
+# This configuration file is taken from https://github.com/open-mmlab/mmdetection/tree/main/configs
+
+default_scope = "mmdet"
+
+default_hooks = dict(
+    timer=dict(type="IterTimerHook"),
+    logger=dict(type="LoggerHook", interval=50),
+    param_scheduler=dict(type="ParamSchedulerHook"),
+    checkpoint=dict(type="CheckpointHook", interval=1),
+    sampler_seed=dict(type="DistSamplerSeedHook"),
+    visualization=dict(type="DetVisualizationHook"),
+)
+
+env_cfg = dict(
+    cudnn_benchmark=False,
+    mp_cfg=dict(mp_start_method="fork", opencv_num_threads=0),
+    dist_cfg=dict(backend="nccl"),
+)
+
+vis_backends = [dict(type="LocalVisBackend")]
+visualizer = dict(
+    type="DetLocalVisualizer", vis_backends=vis_backends, name="visualizer"
+)
+log_processor = dict(type="LogProcessor", window_size=50, by_epoch=True)
+
+log_level = "INFO"
+load_from = None
+resume = False
-Original file line number
+Diff line change
@@ Expand Up / @@ -5,3 +5,4 @@ checkpoints @@
     __pycache__
     outputs
     .vscode
+    ~/