open-mmlab · manaspgandhi · Jun 18, 2025
diff --git a/.dev_scripts/github/update_model_index.py b/.dev_scripts/github/update_model_index.py
@@ -89,7 +89,7 @@ def get_task_dict(md_file):
     Returns:
         dict: Task name of each method.
     """
-    with open(md_file, 'r') as md:
+    with open(md_file, 'r', encoding='utf-8') as md:
         lines = md.readlines()
     i = 0
     task_dict = dict()

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -29,7 +29,13 @@ repos:
     rev: v2.1.0
     hooks:
       - id: codespell
-        args: ["--skip", "*.ipynb,tools/data/hvu/label_map.json,docs/zh_cn/*", "-L", "formating,te,nd,thre,Gool,gool,lod"]
+        args:
+          [
+            "--skip",
+            "*.ipynb,tools/data/hvu/label_map.json,docs/zh_cn/*",
+            "-L",
+            "formating,te,nd,thre,Gool,gool,lod",
+          ]
   - repo: https://github.com/executablebooks/mdformat
     rev: 0.7.9
     hooks:
@@ -50,7 +56,7 @@ repos:
         files: ^configs/.*\.md$
         require_serial: true
   - repo: https://github.com/myint/docformatter
-    rev: v1.3.1
+    rev: v1.7.7
     hooks:
       - id: docformatter
         args: ["--in-place", "--wrap-descriptions", "79"]

diff --git a/apps/stylegan_projector.py b/apps/stylegan_projector.py
@@ -1,8 +1,9 @@
-r"""
-    This app is used to invert the styleGAN series synthesis network. We find
-    the matching latent vector w for given images so that we can manipulate
-    images in the latent feature space.
-    Ref: https://github.com/rosinality/stylegan2-pytorch/blob/master/projector.py # noqa
+r"""This app is used to invert the styleGAN series synthesis network.
+
+We find
+the matching latent vector w for given images so that we can manipulate
+images in the latent feature space.
+Ref: https://github.com/rosinality/stylegan2-pytorch/blob/master/projector.py # noqa
 """
 import argparse
 import os

diff --git a/configs/ada/metafile.yml b/configs/ada/metafile.yml
@@ -5,7 +5,7 @@ Collections:
   Name: ADA
   Paper:
   - https://arxiv.org/pdf/2006.06676.pdf
-  README: configs/ada/README.md
+  README: configs\ada\README.md
 Models:
 - Config: https://github.com/open-mmlab/mmgeneration/tree/master/configs/styleganv3/stylegan3_t_ada_fp16_gamma6.6_metfaces_1024_b4x8.py
   In Collection: ADA

diff --git a/configs/biggan/metafile.yml b/configs/biggan/metafile.yml
@@ -5,7 +5,7 @@ Collections:
   Name: BigGAN
   Paper:
   - https://openreview.net/forum?id=B1xsqj09Fm
-  README: configs/biggan/README.md
+  README: configs\biggan\README.md
 Models:
 - Config: https://github.com/open-mmlab/mmgeneration/blob/master/configs/biggan/biggan_cifar10_32x32_b25x2_500k.py
   In Collection: BigGAN

diff --git a/configs/cyclegan/metafile.yml b/configs/cyclegan/metafile.yml
@@ -7,7 +7,7 @@ Collections:
     Networks'
   Paper:
   - https://openaccess.thecvf.com/content_iccv_2017/html/Zhu_Unpaired_Image-To-Image_Translation_ICCV_2017_paper.html
-  README: configs/cyclegan/README.md
+  README: configs\cyclegan\README.md
 Models:
 - Config: https://github.com/open-mmlab/mmgeneration/tree/master/configs/cyclegan/cyclegan_lsgan_resnet_in_facades_b1x1_80k.py
   In Collection: 'CycleGAN: Unpaired Image-to-Image Translation Using Cycle-Consistent

diff --git a/configs/dcgan/metafile.yml b/configs/dcgan/metafile.yml
@@ -7,7 +7,7 @@ Collections:
     Networks
   Paper:
   - https://arxiv.org/abs/1511.06434
-  README: configs/dcgan/README.md
+  README: configs\dcgan\README.md
 Models:
 - Config: https://github.com/open-mmlab/mmgeneration/tree/master/configs/dcgan/dcgan_mnist-64_b128x1_Glr4e-4_Dlr1e-4_5k.py
   In Collection: Unsupervised Representation Learning with Deep Convolutional Generative

diff --git a/configs/ggan/metafile.yml b/configs/ggan/metafile.yml
@@ -5,7 +5,7 @@ Collections:
   Name: GGAN
   Paper:
   - https://arxiv.org/abs/1705.02894
-  README: configs/ggan/README.md
+  README: configs\ggan\README.md
 Models:
 - Config: https://github.com/open-mmlab/mmgeneration/tree/master/configs/ggan/ggan_celeba-cropped_dcgan-archi_lr-1e-3_64_b128x1_12m.py
   In Collection: GGAN

diff --git a/configs/improved_ddpm/metafile.yml b/configs/improved_ddpm/metafile.yml
@@ -5,7 +5,7 @@ Collections:
   Name: Improved-DDPM
   Paper:
   - https://arxiv.org/abs/2102.09672
-  README: configs/improved_ddpm/README.md
+  README: configs\improved_ddpm\README.md
 Models:
 - Config: https://github.com/open-mmlab/mmgeneration/blob/master/configs/improved_ddpm/ddpm_cosine_hybird_timestep-4k_drop0.3_cifar10_32x32_b8x16_500k.py
   In Collection: Improved-DDPM

diff --git a/configs/lsgan/metafile.yml b/configs/lsgan/metafile.yml
@@ -5,7 +5,7 @@ Collections:
   Name: LSGAN
   Paper:
   - https://openaccess.thecvf.com/content_iccv_2017/html/Mao_Least_Squares_Generative_ICCV_2017_paper.html
-  README: configs/lsgan/README.md
+  README: configs\lsgan\README.md
 Models:
 - Config: https://github.com/open-mmlab/mmgeneration/tree/master/configs/lsgan/lsgan_dcgan-archi_lr-1e-3_celeba-cropped_64_b128x1_12m.py
   In Collection: LSGAN

diff --git a/configs/pggan/metafile.yml b/configs/pggan/metafile.yml
@@ -5,7 +5,7 @@ Collections:
   Name: PGGAN
   Paper:
   - https://arxiv.org/abs/1710.10196
-  README: configs/pggan/README.md
+  README: configs\pggan\README.md
 Models:
 - Config: https://github.com/open-mmlab/mmgeneration/tree/master/configs/pggan/pggan_celeba-cropped_128_g8_12Mimgs.py
   In Collection: PGGAN

diff --git a/configs/pix2pix/metafile.yml b/configs/pix2pix/metafile.yml
@@ -5,7 +5,7 @@ Collections:
   Name: Pix2Pix
   Paper:
   - https://openaccess.thecvf.com/content_cvpr_2017/html/Isola_Image-To-Image_Translation_With_CVPR_2017_paper.html
-  README: configs/pix2pix/README.md
+  README: configs\pix2pix\README.md
 Models:
 - Config: https://github.com/open-mmlab/mmgeneration/tree/master/configs/pix2pix/pix2pix_vanilla_unet_bn_facades_b1x1_80k.py
   In Collection: Pix2Pix

diff --git a/configs/positional_encoding_in_gans/metafile.yml b/configs/positional_encoding_in_gans/metafile.yml
@@ -5,7 +5,7 @@ Collections:
   Name: Positional Encoding in GANs
   Paper:
   - https://openaccess.thecvf.com/content/CVPR2021/html/Xu_Positional_Encoding_As_Spatial_Inductive_Bias_in_GANs_CVPR_2021_paper.html
-  README: configs/positional_encoding_in_gans/README.md
+  README: configs\positional_encoding_in_gans\README.md
 Models:
 - Config: https://github.com/open-mmlab/mmgeneration/tree/master/configs/positional_encoding_in_gans/stylegan2_c2_ffhq_256_b3x8_1100k.py
   In Collection: Positional Encoding in GANs

diff --git a/configs/sagan/metafile.yml b/configs/sagan/metafile.yml
@@ -5,7 +5,7 @@ Collections:
   Name: SAGAN
   Paper:
   - https://proceedings.mlr.press/v97/zhang19d.html
-  README: configs/sagan/README.md
+  README: configs\sagan\README.md
 Models:
 - Config: https://github.com/open-mmlab/mmgeneration/tree/master/configs/sagan/sagan_32_woReLUinplace_lr-2e-4_ndisc5_cifar10_b64x1.py
   In Collection: SAGAN

diff --git a/configs/singan/metafile.yml b/configs/singan/metafile.yml
@@ -5,7 +5,7 @@ Collections:
   Name: SinGAN
   Paper:
   - https://openaccess.thecvf.com/content_ICCV_2019/html/Shaham_SinGAN_Learning_a_Generative_Model_From_a_Single_Natural_Image_ICCV_2019_paper.html
-  README: configs/singan/README.md
+  README: configs\singan\README.md
 Models:
 - Config: https://github.com/open-mmlab/mmgeneration/tree/master/configs/singan/singan_balloons.py
   In Collection: SinGAN

diff --git a/configs/sngan_proj/metafile.yml b/configs/sngan_proj/metafile.yml
@@ -5,7 +5,7 @@ Collections:
   Name: SNGAN
   Paper:
   - https://openreview.net/forum?id=B1QRgziT-
-  README: configs/sngan_proj/README.md
+  README: configs\sngan_proj\README.md
 Models:
 - Config: https://github.com/open-mmlab/mmgeneration/tree/master/configs/sngan_proj/sngan_proj_32_woReLUinplace_lr-2e-4_ndisc5_cifar10_b64x1.py
   In Collection: SNGAN

diff --git a/configs/styleganv1/metafile.yml b/configs/styleganv1/metafile.yml
@@ -5,7 +5,7 @@ Collections:
   Name: StyleGANv1
   Paper:
   - https://openaccess.thecvf.com/content_CVPR_2019/html/Karras_A_Style-Based_Generator_Architecture_for_Generative_Adversarial_Networks_CVPR_2019_paper.html
-  README: configs/styleganv1/README.md
+  README: configs\styleganv1\README.md
 Models:
 - Config: https://github.com/open-mmlab/mmgeneration/tree/master/configs/styleganv1/styleganv1_ffhq_256_g8_25Mimg.py
   In Collection: StyleGANv1

diff --git a/configs/styleganv2/metafile.yml b/configs/styleganv2/metafile.yml
@@ -5,7 +5,7 @@ Collections:
   Name: StyleGANv2
   Paper:
   - https://openaccess.thecvf.com/content_CVPR_2020/html/Karras_Analyzing_and_Improving_the_Image_Quality_of_StyleGAN_CVPR_2020_paper.html
-  README: configs/styleganv2/README.md
+  README: configs\styleganv2\README.md
 Models:
 - Config: https://github.com/open-mmlab/mmgeneration/tree/master/configs/styleganv2/stylegan2_c2_ffhq_1024_b4x8.py
   In Collection: StyleGANv2

diff --git a/configs/styleganv3/metafile.yml b/configs/styleganv3/metafile.yml
@@ -5,7 +5,7 @@ Collections:
   Name: StyleGANv3
   Paper:
   - https://nvlabs-fi-cdn.nvidia.com/stylegan3/stylegan3-paper.pdf
-  README: configs/styleganv3/README.md
+  README: configs\styleganv3\README.md
 Models:
 - Config: https://github.com/open-mmlab/mmgeneration/tree/master/configs/styleganv3/stylegan3_t_noaug_fp16_gamma32.8_ffhq_1024_b4x8.py
   In Collection: StyleGANv3

diff --git a/configs/wgan-gp/metafile.yml b/configs/wgan-gp/metafile.yml
@@ -5,7 +5,7 @@ Collections:
   Name: WGAN-GP
   Paper:
   - https://arxiv.org/abs/1704.00028
-  README: configs/wgan-gp/README.md
+  README: configs\wgan-gp\README.md
 Models:
 - Config: https://github.com/open-mmlab/mmgeneration/tree/master/configs/wgan-gp/wgangp_GN_celeba-cropped_128_b64x1_160kiter.py
   In Collection: WGAN-GP

diff --git a/mmgen/core/evaluation/metrics.py b/mmgen/core/evaluation/metrics.py
@@ -307,7 +307,7 @@ def sliced_wasserstein(distribution_a,
                        distribution_b,
                        dir_repeats=4,
                        dirs_per_repeat=128):
-    r"""sliced Wasserstein distance of two sets of patches.
+    r"""Sliced Wasserstein distance of two sets of patches.
 
     Ref: https://github.com/tkarras/progressive_growing_of_gans/blob/master/metrics/ms_ssim.py  # noqa
 
@@ -441,15 +441,15 @@ def check(self):
 
     @abstractmethod
     def prepare(self, *args, **kwargs):
-        """please implement in subclass."""
+        """Please implement in subclass."""
 
     @abstractmethod
     def feed_op(self, batch, mode):
-        """please implement in subclass."""
+        """Please implement in subclass."""
 
     @abstractmethod
     def summary(self):
-        """please implement in subclass."""
+        """Please implement in subclass."""
 
 
 @METRICS.register_module()
@@ -787,46 +787,46 @@ def summary(self):
 class PR(Metric):
     r"""Improved Precision and recall metric.
 
-        In this metric, we draw real and generated samples respectively, and
-        embed them into a high-dimensional feature space using a pre-trained
-        classifier network. We use these features to estimate the corresponding
-        manifold. We obtain the estimation by calculating pairwise Euclidean
-        distances between all feature vectors in the set and, for each feature
-        vector, construct a hypersphere with radius equal to the distance to its
-        kth nearest neighbor. Together, these hyperspheres define a volume in
-        the feature space that serves as an estimate of the true manifold.
-        Precision is quantified by querying for each generated image whether
-        the image is within the estimated manifold of real images.
-        Symmetrically, recall is calculated by querying for each real image
-        whether the image is within estimated manifold of generated image.
-
-        Ref: https://github.com/NVlabs/stylegan2-ada-pytorch/blob/main/metrics/precision_recall.py  # noqa
-
-        Note that we highly recommend that users should download the vgg16
-        script module from the following address. Then, the `vgg16_script` can
-        be set with user's local path. If not given, we will use the vgg16 from
-        pytorch model zoo. However, this may bring significant different in the
-        final results.
-
-        Tero's vgg16: https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada-pytorch/pretrained/metrics/vgg16.pt
+    In this metric, we draw real and generated samples respectively, and
+    embed them into a high-dimensional feature space using a pre-trained
+    classifier network. We use these features to estimate the corresponding
+    manifold. We obtain the estimation by calculating pairwise Euclidean
+    distances between all feature vectors in the set and, for each feature
+    vector, construct a hypersphere with radius equal to the distance to its
+    kth nearest neighbor. Together, these hyperspheres define a volume in
+    the feature space that serves as an estimate of the true manifold.
+    Precision is quantified by querying for each generated image whether
+    the image is within the estimated manifold of real images.
+    Symmetrically, recall is calculated by querying for each real image
+    whether the image is within estimated manifold of generated image.
+
+    Ref: https://github.com/NVlabs/stylegan2-ada-pytorch/blob/main/metrics/precision_recall.py  # noqa
+
+    Note that we highly recommend that users should download the vgg16
+    script module from the following address. Then, the `vgg16_script` can
+    be set with user's local path. If not given, we will use the vgg16 from
+    pytorch model zoo. However, this may bring significant different in the
+    final results.
+
+    Tero's vgg16: https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada-pytorch/pretrained/metrics/vgg16.pt
 
-        Args:
-            num_images (int): The number of evaluated generated samples.
-            image_shape (tuple): Image shape in order "CHW". Defaults to None.
-            num_real_need (int | None, optional): The number of real images.
-                Defaults to None.
-            full_dataset (bool, optional): Whether to use full dataset for
-                evaluation. Defaults to False.
-            k (int, optional): Kth nearest parameter. Defaults to 3.
-            bgr2rgb (bool, optional): Whether to change the order of image
-                channel. Defaults to True.
-            vgg16_script (str, optional): Path for the Tero's vgg16 module.
-                Defaults to 'work_dirs/cache/vgg16.pt'.
-            row_batch_size (int, optional): The batch size of row data.
-                Defaults to 10000.
-            col_batch_size (int, optional): The batch size of col data.
-                Defaults to 10000.
-        """
+    Args:
+        num_images (int): The number of evaluated generated samples.
+        image_shape (tuple): Image shape in order "CHW". Defaults to None.
+        num_real_need (int | None, optional): The number of real images.
+            Defaults to None.
+        full_dataset (bool, optional): Whether to use full dataset for
+            evaluation. Defaults to False.
+        k (int, optional): Kth nearest parameter. Defaults to 3.
+        bgr2rgb (bool, optional): Whether to change the order of image
+            channel. Defaults to True.
+        vgg16_script (str, optional): Path for the Tero's vgg16 module.
+            Defaults to 'work_dirs/cache/vgg16.pt'.
+        row_batch_size (int, optional): The batch size of row data.
+            Defaults to 10000.
+        col_batch_size (int, optional): The batch size of col data.
+            Defaults to 10000.
+    """
     name = 'PR'
 
     def __init__(self,
@@ -1193,25 +1193,25 @@ def clear(self, clear_reals=False):
 class PPL(Metric):
     r"""Perceptual path length.
 
-        Measure the difference between consecutive images (their VGG16
-        embeddings) when interpolating between two random inputs. Drastic
-        changes mean that multiple features have changed together and that
-        they might be entangled.
+    Measure the difference between consecutive images (their VGG16
+    embeddings) when interpolating between two random inputs. Drastic
+    changes mean that multiple features have changed together and that
+    they might be entangled.
 
-        Ref: https://github.com/rosinality/stylegan2-pytorch/blob/master/ppl.py # noqa
+    Ref: https://github.com/rosinality/stylegan2-pytorch/blob/master/ppl.py # noqa
 
-        Args:
-            num_images (int): The number of evaluated generated samples.
-            image_shape (tuple, optional): Image shape in order "CHW". Defaults
-                to None.
-            crop (bool, optional): Whether crop images. Defaults to True.
-            epsilon (float, optional): Epsilon parameter for path sampling.
-                Defaults to 1e-4.
-            space (str, optional): Latent space. Defaults to 'W'.
-            sampling (str, optional): Sampling mode, whether sampling in full
-                path or endpoints. Defaults to 'end'.
-            latent_dim (int, optional): Latent dimension of input noise.
-                Defaults to 512.
+    Args:
+        num_images (int): The number of evaluated generated samples.
+        image_shape (tuple, optional): Image shape in order "CHW". Defaults
+            to None.
+        crop (bool, optional): Whether crop images. Defaults to True.
+        epsilon (float, optional): Epsilon parameter for path sampling.
+            Defaults to 1e-4.
+        space (str, optional): Latent space. Defaults to 'W'.
+        sampling (str, optional): Sampling mode, whether sampling in full
+            path or endpoints. Defaults to 'end'.
+        latent_dim (int, optional): Latent dimension of input noise.
+            Defaults to 512.
     """
     name = 'PPL'
 
@@ -1389,11 +1389,11 @@ def __next__(self):
 
 @METRICS.register_module()
 class GaussianKLD(Metric):
-    r"""Gaussian KLD (Kullback-Leibler divergence) metric. We calculate the
-    KLD between two gaussian distribution via `mean` and `log_variance`.
-    The passed batch should be a dict instance and contain ``mean_pred``,
-    ``mean_target``, ``logvar_pred``, ``logvar_target``.
-    When call ``feed`` operation, only ``reals`` mode is needed,
+    r"""Gaussian KLD (Kullback-Leibler divergence) metric. We calculate the KLD
+    between two gaussian distribution via `mean` and `log_variance`. The passed
+    batch should be a dict instance and contain ``mean_pred``, ``mean_target``,
+    ``logvar_pred``, ``logvar_target``. When call ``feed`` operation, only
+    ``reals`` mode is needed,
 
     The calculation of KLD can be formulated as:
 
@@ -1421,7 +1421,6 @@ class GaussianKLD(Metric):
             by batchsize. If ``reduction == 'sum'``, the output will be summed.
             If ``reduction == 'mean'``, the output will be divided by the
             number of elements in the output. Defaults to ``'batchmean'``.
-
     """
     name = 'GaussianKLD'