diff --git a/.dev_scripts/github/update_model_index.py b/.dev_scripts/github/update_model_index.py
index 3fc197859..ada154ebe 100755
--- a/.dev_scripts/github/update_model_index.py
+++ b/.dev_scripts/github/update_model_index.py
@@ -89,7 +89,7 @@ def get_task_dict(md_file):
     Returns:
         dict: Task name of each method.
     """
-    with open(md_file, 'r') as md:
+    with open(md_file, 'r', encoding='utf-8') as md:
         lines = md.readlines()
     i = 0
     task_dict = dict()
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 913442955..f848467d9 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -29,7 +29,13 @@ repos:
     rev: v2.1.0
     hooks:
       - id: codespell
-        args: ["--skip", "*.ipynb,tools/data/hvu/label_map.json,docs/zh_cn/*", "-L", "formating,te,nd,thre,Gool,gool,lod"]
+        args:
+          [
+            "--skip",
+            "*.ipynb,tools/data/hvu/label_map.json,docs/zh_cn/*",
+            "-L",
+            "formating,te,nd,thre,Gool,gool,lod",
+          ]
   - repo: https://github.com/executablebooks/mdformat
     rev: 0.7.9
     hooks:
@@ -50,7 +56,7 @@ repos:
         files: ^configs/.*\.md$
         require_serial: true
   - repo: https://github.com/myint/docformatter
-    rev: v1.3.1
+    rev: v1.7.7
     hooks:
       - id: docformatter
         args: ["--in-place", "--wrap-descriptions", "79"]
diff --git a/apps/stylegan_projector.py b/apps/stylegan_projector.py
index 44ac623f6..c3b9d71a2 100644
--- a/apps/stylegan_projector.py
+++ b/apps/stylegan_projector.py
@@ -1,8 +1,9 @@
-r"""
-    This app is used to invert the styleGAN series synthesis network. We find
-    the matching latent vector w for given images so that we can manipulate
-    images in the latent feature space.
-    Ref: https://github.com/rosinality/stylegan2-pytorch/blob/master/projector.py # noqa
+r"""This app is used to invert the styleGAN series synthesis network.
+
+We find
+the matching latent vector w for given images so that we can manipulate
+images in the latent feature space.
+Ref: https://github.com/rosinality/stylegan2-pytorch/blob/master/projector.py # noqa
 """
 import argparse
 import os
diff --git a/configs/ada/metafile.yml b/configs/ada/metafile.yml
index c08af1e31..f256c33bb 100644
--- a/configs/ada/metafile.yml
+++ b/configs/ada/metafile.yml
@@ -5,7 +5,7 @@ Collections:
   Name: ADA
   Paper:
   - https://arxiv.org/pdf/2006.06676.pdf
-  README: configs/ada/README.md
+  README: configs\ada\README.md
 Models:
 - Config: https://github.com/open-mmlab/mmgeneration/tree/master/configs/styleganv3/stylegan3_t_ada_fp16_gamma6.6_metfaces_1024_b4x8.py
   In Collection: ADA
diff --git a/configs/biggan/metafile.yml b/configs/biggan/metafile.yml
index a1cf91d14..59b9438e1 100644
--- a/configs/biggan/metafile.yml
+++ b/configs/biggan/metafile.yml
@@ -5,7 +5,7 @@ Collections:
   Name: BigGAN
   Paper:
   - https://openreview.net/forum?id=B1xsqj09Fm
-  README: configs/biggan/README.md
+  README: configs\biggan\README.md
 Models:
 - Config: https://github.com/open-mmlab/mmgeneration/blob/master/configs/biggan/biggan_cifar10_32x32_b25x2_500k.py
   In Collection: BigGAN
diff --git a/configs/cyclegan/metafile.yml b/configs/cyclegan/metafile.yml
index 454d50d49..8abfbe27a 100644
--- a/configs/cyclegan/metafile.yml
+++ b/configs/cyclegan/metafile.yml
@@ -7,7 +7,7 @@ Collections:
     Networks'
   Paper:
   - https://openaccess.thecvf.com/content_iccv_2017/html/Zhu_Unpaired_Image-To-Image_Translation_ICCV_2017_paper.html
-  README: configs/cyclegan/README.md
+  README: configs\cyclegan\README.md
 Models:
 - Config: https://github.com/open-mmlab/mmgeneration/tree/master/configs/cyclegan/cyclegan_lsgan_resnet_in_facades_b1x1_80k.py
   In Collection: 'CycleGAN: Unpaired Image-to-Image Translation Using Cycle-Consistent
diff --git a/configs/dcgan/metafile.yml b/configs/dcgan/metafile.yml
index 5e2b8f3bb..388025ab5 100644
--- a/configs/dcgan/metafile.yml
+++ b/configs/dcgan/metafile.yml
@@ -7,7 +7,7 @@ Collections:
     Networks
   Paper:
   - https://arxiv.org/abs/1511.06434
-  README: configs/dcgan/README.md
+  README: configs\dcgan\README.md
 Models:
 - Config: https://github.com/open-mmlab/mmgeneration/tree/master/configs/dcgan/dcgan_mnist-64_b128x1_Glr4e-4_Dlr1e-4_5k.py
   In Collection: Unsupervised Representation Learning with Deep Convolutional Generative
diff --git a/configs/ggan/metafile.yml b/configs/ggan/metafile.yml
index d576b281c..0454c6c0a 100644
--- a/configs/ggan/metafile.yml
+++ b/configs/ggan/metafile.yml
@@ -5,7 +5,7 @@ Collections:
   Name: GGAN
   Paper:
   - https://arxiv.org/abs/1705.02894
-  README: configs/ggan/README.md
+  README: configs\ggan\README.md
 Models:
 - Config: https://github.com/open-mmlab/mmgeneration/tree/master/configs/ggan/ggan_celeba-cropped_dcgan-archi_lr-1e-3_64_b128x1_12m.py
   In Collection: GGAN
diff --git a/configs/improved_ddpm/metafile.yml b/configs/improved_ddpm/metafile.yml
index c5e224e56..a132a3721 100644
--- a/configs/improved_ddpm/metafile.yml
+++ b/configs/improved_ddpm/metafile.yml
@@ -5,7 +5,7 @@ Collections:
   Name: Improved-DDPM
   Paper:
   - https://arxiv.org/abs/2102.09672
-  README: configs/improved_ddpm/README.md
+  README: configs\improved_ddpm\README.md
 Models:
 - Config: https://github.com/open-mmlab/mmgeneration/blob/master/configs/improved_ddpm/ddpm_cosine_hybird_timestep-4k_drop0.3_cifar10_32x32_b8x16_500k.py
   In Collection: Improved-DDPM
diff --git a/configs/lsgan/metafile.yml b/configs/lsgan/metafile.yml
index 5e611471f..c24b11cc2 100644
--- a/configs/lsgan/metafile.yml
+++ b/configs/lsgan/metafile.yml
@@ -5,7 +5,7 @@ Collections:
   Name: LSGAN
   Paper:
   - https://openaccess.thecvf.com/content_iccv_2017/html/Mao_Least_Squares_Generative_ICCV_2017_paper.html
-  README: configs/lsgan/README.md
+  README: configs\lsgan\README.md
 Models:
 - Config: https://github.com/open-mmlab/mmgeneration/tree/master/configs/lsgan/lsgan_dcgan-archi_lr-1e-3_celeba-cropped_64_b128x1_12m.py
   In Collection: LSGAN
diff --git a/configs/pggan/metafile.yml b/configs/pggan/metafile.yml
index 1331595e6..6001b8458 100644
--- a/configs/pggan/metafile.yml
+++ b/configs/pggan/metafile.yml
@@ -5,7 +5,7 @@ Collections:
   Name: PGGAN
   Paper:
   - https://arxiv.org/abs/1710.10196
-  README: configs/pggan/README.md
+  README: configs\pggan\README.md
 Models:
 - Config: https://github.com/open-mmlab/mmgeneration/tree/master/configs/pggan/pggan_celeba-cropped_128_g8_12Mimgs.py
   In Collection: PGGAN
diff --git a/configs/pix2pix/metafile.yml b/configs/pix2pix/metafile.yml
index 074b27b08..1b82a9eff 100644
--- a/configs/pix2pix/metafile.yml
+++ b/configs/pix2pix/metafile.yml
@@ -5,7 +5,7 @@ Collections:
   Name: Pix2Pix
   Paper:
   - https://openaccess.thecvf.com/content_cvpr_2017/html/Isola_Image-To-Image_Translation_With_CVPR_2017_paper.html
-  README: configs/pix2pix/README.md
+  README: configs\pix2pix\README.md
 Models:
 - Config: https://github.com/open-mmlab/mmgeneration/tree/master/configs/pix2pix/pix2pix_vanilla_unet_bn_facades_b1x1_80k.py
   In Collection: Pix2Pix
diff --git a/configs/positional_encoding_in_gans/metafile.yml b/configs/positional_encoding_in_gans/metafile.yml
index 824980436..f72f94a2d 100644
--- a/configs/positional_encoding_in_gans/metafile.yml
+++ b/configs/positional_encoding_in_gans/metafile.yml
@@ -5,7 +5,7 @@ Collections:
   Name: Positional Encoding in GANs
   Paper:
   - https://openaccess.thecvf.com/content/CVPR2021/html/Xu_Positional_Encoding_As_Spatial_Inductive_Bias_in_GANs_CVPR_2021_paper.html
-  README: configs/positional_encoding_in_gans/README.md
+  README: configs\positional_encoding_in_gans\README.md
 Models:
 - Config: https://github.com/open-mmlab/mmgeneration/tree/master/configs/positional_encoding_in_gans/stylegan2_c2_ffhq_256_b3x8_1100k.py
   In Collection: Positional Encoding in GANs
diff --git a/configs/sagan/metafile.yml b/configs/sagan/metafile.yml
index 270534df4..5e20d8dc3 100644
--- a/configs/sagan/metafile.yml
+++ b/configs/sagan/metafile.yml
@@ -5,7 +5,7 @@ Collections:
   Name: SAGAN
   Paper:
   - https://proceedings.mlr.press/v97/zhang19d.html
-  README: configs/sagan/README.md
+  README: configs\sagan\README.md
 Models:
 - Config: https://github.com/open-mmlab/mmgeneration/tree/master/configs/sagan/sagan_32_woReLUinplace_lr-2e-4_ndisc5_cifar10_b64x1.py
   In Collection: SAGAN
diff --git a/configs/singan/metafile.yml b/configs/singan/metafile.yml
index 237ffbcaa..de59ca074 100644
--- a/configs/singan/metafile.yml
+++ b/configs/singan/metafile.yml
@@ -5,7 +5,7 @@ Collections:
   Name: SinGAN
   Paper:
   - https://openaccess.thecvf.com/content_ICCV_2019/html/Shaham_SinGAN_Learning_a_Generative_Model_From_a_Single_Natural_Image_ICCV_2019_paper.html
-  README: configs/singan/README.md
+  README: configs\singan\README.md
 Models:
 - Config: https://github.com/open-mmlab/mmgeneration/tree/master/configs/singan/singan_balloons.py
   In Collection: SinGAN
diff --git a/configs/sngan_proj/metafile.yml b/configs/sngan_proj/metafile.yml
index 074a18823..31bc661ef 100644
--- a/configs/sngan_proj/metafile.yml
+++ b/configs/sngan_proj/metafile.yml
@@ -5,7 +5,7 @@ Collections:
   Name: SNGAN
   Paper:
   - https://openreview.net/forum?id=B1QRgziT-
-  README: configs/sngan_proj/README.md
+  README: configs\sngan_proj\README.md
 Models:
 - Config: https://github.com/open-mmlab/mmgeneration/tree/master/configs/sngan_proj/sngan_proj_32_woReLUinplace_lr-2e-4_ndisc5_cifar10_b64x1.py
   In Collection: SNGAN
diff --git a/configs/styleganv1/metafile.yml b/configs/styleganv1/metafile.yml
index ce7164fc6..01bec0562 100644
--- a/configs/styleganv1/metafile.yml
+++ b/configs/styleganv1/metafile.yml
@@ -5,7 +5,7 @@ Collections:
   Name: StyleGANv1
   Paper:
   - https://openaccess.thecvf.com/content_CVPR_2019/html/Karras_A_Style-Based_Generator_Architecture_for_Generative_Adversarial_Networks_CVPR_2019_paper.html
-  README: configs/styleganv1/README.md
+  README: configs\styleganv1\README.md
 Models:
 - Config: https://github.com/open-mmlab/mmgeneration/tree/master/configs/styleganv1/styleganv1_ffhq_256_g8_25Mimg.py
   In Collection: StyleGANv1
diff --git a/configs/styleganv2/metafile.yml b/configs/styleganv2/metafile.yml
index bc7a2c98d..7935617ca 100644
--- a/configs/styleganv2/metafile.yml
+++ b/configs/styleganv2/metafile.yml
@@ -5,7 +5,7 @@ Collections:
   Name: StyleGANv2
   Paper:
   - https://openaccess.thecvf.com/content_CVPR_2020/html/Karras_Analyzing_and_Improving_the_Image_Quality_of_StyleGAN_CVPR_2020_paper.html
-  README: configs/styleganv2/README.md
+  README: configs\styleganv2\README.md
 Models:
 - Config: https://github.com/open-mmlab/mmgeneration/tree/master/configs/styleganv2/stylegan2_c2_ffhq_1024_b4x8.py
   In Collection: StyleGANv2
diff --git a/configs/styleganv3/metafile.yml b/configs/styleganv3/metafile.yml
index c88414b12..63217a904 100755
--- a/configs/styleganv3/metafile.yml
+++ b/configs/styleganv3/metafile.yml
@@ -5,7 +5,7 @@ Collections:
   Name: StyleGANv3
   Paper:
   - https://nvlabs-fi-cdn.nvidia.com/stylegan3/stylegan3-paper.pdf
-  README: configs/styleganv3/README.md
+  README: configs\styleganv3\README.md
 Models:
 - Config: https://github.com/open-mmlab/mmgeneration/tree/master/configs/styleganv3/stylegan3_t_noaug_fp16_gamma32.8_ffhq_1024_b4x8.py
   In Collection: StyleGANv3
diff --git a/configs/wgan-gp/metafile.yml b/configs/wgan-gp/metafile.yml
index d7075f6d3..205de9764 100644
--- a/configs/wgan-gp/metafile.yml
+++ b/configs/wgan-gp/metafile.yml
@@ -5,7 +5,7 @@ Collections:
   Name: WGAN-GP
   Paper:
   - https://arxiv.org/abs/1704.00028
-  README: configs/wgan-gp/README.md
+  README: configs\wgan-gp\README.md
 Models:
 - Config: https://github.com/open-mmlab/mmgeneration/tree/master/configs/wgan-gp/wgangp_GN_celeba-cropped_128_b64x1_160kiter.py
   In Collection: WGAN-GP
diff --git a/mmgen/core/evaluation/metrics.py b/mmgen/core/evaluation/metrics.py
index 40e06b669..362a481b2 100644
--- a/mmgen/core/evaluation/metrics.py
+++ b/mmgen/core/evaluation/metrics.py
@@ -307,7 +307,7 @@ def sliced_wasserstein(distribution_a,
                        distribution_b,
                        dir_repeats=4,
                        dirs_per_repeat=128):
-    r"""sliced Wasserstein distance of two sets of patches.
+    r"""Sliced Wasserstein distance of two sets of patches.
 
     Ref: https://github.com/tkarras/progressive_growing_of_gans/blob/master/metrics/ms_ssim.py  # noqa
 
@@ -441,15 +441,15 @@ def check(self):
 
     @abstractmethod
     def prepare(self, *args, **kwargs):
-        """please implement in subclass."""
+        """Please implement in subclass."""
 
     @abstractmethod
     def feed_op(self, batch, mode):
-        """please implement in subclass."""
+        """Please implement in subclass."""
 
     @abstractmethod
     def summary(self):
-        """please implement in subclass."""
+        """Please implement in subclass."""
 
 
 @METRICS.register_module()
@@ -787,46 +787,46 @@ def summary(self):
 class PR(Metric):
     r"""Improved Precision and recall metric.
 
-        In this metric, we draw real and generated samples respectively, and
-        embed them into a high-dimensional feature space using a pre-trained
-        classifier network. We use these features to estimate the corresponding
-        manifold. We obtain the estimation by calculating pairwise Euclidean
-        distances between all feature vectors in the set and, for each feature
-        vector, construct a hypersphere with radius equal to the distance to its
-        kth nearest neighbor. Together, these hyperspheres define a volume in
-        the feature space that serves as an estimate of the true manifold.
-        Precision is quantified by querying for each generated image whether
-        the image is within the estimated manifold of real images.
-        Symmetrically, recall is calculated by querying for each real image
-        whether the image is within estimated manifold of generated image.
-
-        Ref: https://github.com/NVlabs/stylegan2-ada-pytorch/blob/main/metrics/precision_recall.py  # noqa
-
-        Note that we highly recommend that users should download the vgg16
-        script module from the following address. Then, the `vgg16_script` can
-        be set with user's local path. If not given, we will use the vgg16 from
-        pytorch model zoo. However, this may bring significant different in the
-        final results.
-
-        Tero's vgg16: https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada-pytorch/pretrained/metrics/vgg16.pt
+    In this metric, we draw real and generated samples respectively, and
+    embed them into a high-dimensional feature space using a pre-trained
+    classifier network. We use these features to estimate the corresponding
+    manifold. We obtain the estimation by calculating pairwise Euclidean
+    distances between all feature vectors in the set and, for each feature
+    vector, construct a hypersphere with radius equal to the distance to its
+    kth nearest neighbor. Together, these hyperspheres define a volume in
+    the feature space that serves as an estimate of the true manifold.
+    Precision is quantified by querying for each generated image whether
+    the image is within the estimated manifold of real images.
+    Symmetrically, recall is calculated by querying for each real image
+    whether the image is within estimated manifold of generated image.
+
+    Ref: https://github.com/NVlabs/stylegan2-ada-pytorch/blob/main/metrics/precision_recall.py  # noqa
+
+    Note that we highly recommend that users should download the vgg16
+    script module from the following address. Then, the `vgg16_script` can
+    be set with user's local path. If not given, we will use the vgg16 from
+    pytorch model zoo. However, this may bring significant different in the
+    final results.
+
+    Tero's vgg16: https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada-pytorch/pretrained/metrics/vgg16.pt
 
-        Args:
-            num_images (int): The number of evaluated generated samples.
-            image_shape (tuple): Image shape in order "CHW". Defaults to None.
-            num_real_need (int | None, optional): The number of real images.
-                Defaults to None.
-            full_dataset (bool, optional): Whether to use full dataset for
-                evaluation. Defaults to False.
-            k (int, optional): Kth nearest parameter. Defaults to 3.
-            bgr2rgb (bool, optional): Whether to change the order of image
-                channel. Defaults to True.
-            vgg16_script (str, optional): Path for the Tero's vgg16 module.
-                Defaults to 'work_dirs/cache/vgg16.pt'.
-            row_batch_size (int, optional): The batch size of row data.
-                Defaults to 10000.
-            col_batch_size (int, optional): The batch size of col data.
-                Defaults to 10000.
-        """
+    Args:
+        num_images (int): The number of evaluated generated samples.
+        image_shape (tuple): Image shape in order "CHW". Defaults to None.
+        num_real_need (int | None, optional): The number of real images.
+            Defaults to None.
+        full_dataset (bool, optional): Whether to use full dataset for
+            evaluation. Defaults to False.
+        k (int, optional): Kth nearest parameter. Defaults to 3.
+        bgr2rgb (bool, optional): Whether to change the order of image
+            channel. Defaults to True.
+        vgg16_script (str, optional): Path for the Tero's vgg16 module.
+            Defaults to 'work_dirs/cache/vgg16.pt'.
+        row_batch_size (int, optional): The batch size of row data.
+            Defaults to 10000.
+        col_batch_size (int, optional): The batch size of col data.
+            Defaults to 10000.
+    """
     name = 'PR'
 
     def __init__(self,
@@ -1193,25 +1193,25 @@ def clear(self, clear_reals=False):
 class PPL(Metric):
     r"""Perceptual path length.
 
-        Measure the difference between consecutive images (their VGG16
-        embeddings) when interpolating between two random inputs. Drastic
-        changes mean that multiple features have changed together and that
-        they might be entangled.
+    Measure the difference between consecutive images (their VGG16
+    embeddings) when interpolating between two random inputs. Drastic
+    changes mean that multiple features have changed together and that
+    they might be entangled.
 
-        Ref: https://github.com/rosinality/stylegan2-pytorch/blob/master/ppl.py # noqa
+    Ref: https://github.com/rosinality/stylegan2-pytorch/blob/master/ppl.py # noqa
 
-        Args:
-            num_images (int): The number of evaluated generated samples.
-            image_shape (tuple, optional): Image shape in order "CHW". Defaults
-                to None.
-            crop (bool, optional): Whether crop images. Defaults to True.
-            epsilon (float, optional): Epsilon parameter for path sampling.
-                Defaults to 1e-4.
-            space (str, optional): Latent space. Defaults to 'W'.
-            sampling (str, optional): Sampling mode, whether sampling in full
-                path or endpoints. Defaults to 'end'.
-            latent_dim (int, optional): Latent dimension of input noise.
-                Defaults to 512.
+    Args:
+        num_images (int): The number of evaluated generated samples.
+        image_shape (tuple, optional): Image shape in order "CHW". Defaults
+            to None.
+        crop (bool, optional): Whether crop images. Defaults to True.
+        epsilon (float, optional): Epsilon parameter for path sampling.
+            Defaults to 1e-4.
+        space (str, optional): Latent space. Defaults to 'W'.
+        sampling (str, optional): Sampling mode, whether sampling in full
+            path or endpoints. Defaults to 'end'.
+        latent_dim (int, optional): Latent dimension of input noise.
+            Defaults to 512.
     """
     name = 'PPL'
 
@@ -1389,11 +1389,11 @@ def __next__(self):
 
 @METRICS.register_module()
 class GaussianKLD(Metric):
-    r"""Gaussian KLD (Kullback-Leibler divergence) metric. We calculate the
-    KLD between two gaussian distribution via `mean` and `log_variance`.
-    The passed batch should be a dict instance and contain ``mean_pred``,
-    ``mean_target``, ``logvar_pred``, ``logvar_target``.
-    When call ``feed`` operation, only ``reals`` mode is needed,
+    r"""Gaussian KLD (Kullback-Leibler divergence) metric. We calculate the KLD
+    between two gaussian distribution via `mean` and `log_variance`. The passed
+    batch should be a dict instance and contain ``mean_pred``, ``mean_target``,
+    ``logvar_pred``, ``logvar_target``. When call ``feed`` operation, only
+    ``reals`` mode is needed,
 
     The calculation of KLD can be formulated as:
 
@@ -1421,7 +1421,6 @@ class GaussianKLD(Metric):
             by batchsize. If ``reduction == 'sum'``, the output will be summed.
             If ``reduction == 'mean'``, the output will be divided by the
             number of elements in the output. Defaults to ``'batchmean'``.
-
     """
     name = 'GaussianKLD'
 
diff --git a/mmgen/core/runners/fp16_utils.py b/mmgen/core/runners/fp16_utils.py
index d9ed90f27..de62f2834 100644
--- a/mmgen/core/runners/fp16_utils.py
+++ b/mmgen/core/runners/fp16_utils.py
@@ -17,13 +17,12 @@
 
 
 def nan_to_num(x, nan=0.0, posinf=None, neginf=None, *, out=None):
-    r"""Replaces :literal:`NaN`, positive infinity, and negative infinity
-    values in :attr:`input` with the values specified by :attr:`nan`,
-    :attr:`posinf`, and :attr:`neginf`, respectively. By default,
-    :literal:`NaN`s are replaced with zero, positive infinity is replaced with
-    the greatest finite value representable by :attr:`input`'s dtype, and
-    negative infinity is replaced with the least finite value representable by
-    :attr:`input`'s dtype.
+    r"""Replaces :literal:`NaN`, positive infinity, and negative infinity values
+    in :attr:`input` with the values specified by :attr:`nan`, :attr:`posinf`,
+    and :attr:`neginf`, respectively. By default, :literal:`NaN`s are replaced
+    with zero, positive infinity is replaced with the greatest finite value
+    representable by :attr:`input`'s dtype, and negative infinity is replaced
+    with the least finite value representable by :attr:`input`'s dtype.
 
     .. note::
 
diff --git a/mmgen/models/architectures/arcface/helpers.py b/mmgen/models/architectures/arcface/helpers.py
index 1a5b9c72b..efc6ab76e 100644
--- a/mmgen/models/architectures/arcface/helpers.py
+++ b/mmgen/models/architectures/arcface/helpers.py
@@ -20,7 +20,7 @@ def forward(self, input):
 
 
 def l2_norm(input, axis=1):
-    """l2 normalization.
+    """L2 normalization.
 
     Args:
         input (torch.Tensor): The input tensor.
diff --git a/mmgen/models/architectures/ddpm/modules.py b/mmgen/models/architectures/ddpm/modules.py
index 39a4b4a49..b5662a1a2 100644
--- a/mmgen/models/architectures/ddpm/modules.py
+++ b/mmgen/models/architectures/ddpm/modules.py
@@ -37,6 +37,7 @@ def forward(self, x, y):
     @ACTIVATION_LAYERS.register_module()
     class SiLU(nn.Module):
         r"""Applies the Sigmoid Linear Unit (SiLU) function, element-wise.
+
         The SiLU function is also known as the swish function.
         Args:
             input (bool, optional): Use inplace operation or not.
diff --git a/mmgen/models/architectures/fid_inception.py b/mmgen/models/architectures/fid_inception.py
index b4ec10544..ff23036e8 100644
--- a/mmgen/models/architectures/fid_inception.py
+++ b/mmgen/models/architectures/fid_inception.py
@@ -158,8 +158,8 @@ def forward(self, inp):
 def fid_inception_v3(load_ckpt=True):
     """Build pretrained Inception model for FID computation.
 
-    The Inception model for FID computation uses a different set of weights
-    and has a slightly different structure than torchvision's Inception.
+    The Inception model for FID computation uses a different set of weights and
+    has a slightly different structure than torchvision's Inception.
 
     This method first constructs torchvision's Inception and then patches the
     necessary parts that are different in the FID Inception model.
diff --git a/mmgen/models/architectures/lpips/networks_basic.py b/mmgen/models/architectures/lpips/networks_basic.py
index 26662d514..c4df93df6 100755
--- a/mmgen/models/architectures/lpips/networks_basic.py
+++ b/mmgen/models/architectures/lpips/networks_basic.py
@@ -166,7 +166,7 @@ def __init__(self, chn_in, chn_out=1, use_dropout=False):
 
 
 class Dist2LogitLayer(nn.Module):
-    """takes 2 distances, puts through fc layers, spits out value between [0,
+    """Takes 2 distances, puts through fc layers, spits out value between [0,
     1] (if use_sigmoid is True)"""
 
     def __init__(self, chn_mid=32, use_sigmoid=True):
diff --git a/mmgen/models/architectures/lpips/perceptual_loss.py b/mmgen/models/architectures/lpips/perceptual_loss.py
index ea8d1639d..10b051353 100644
--- a/mmgen/models/architectures/lpips/perceptual_loss.py
+++ b/mmgen/models/architectures/lpips/perceptual_loss.py
@@ -10,7 +10,7 @@
 class PerceptualLoss(torch.nn.Module):
     r"""LPIPS metric with VGG using our perceptually-learned weights.
 
-        Ref: https://github.com/rosinality/stylegan2-pytorch/blob/master/lpips/__init__.py # noqa
+    Ref: https://github.com/rosinality/stylegan2-pytorch/blob/master/lpips/__init__.py # noqa
     """
 
     def __init__(self,
diff --git a/mmgen/models/architectures/lpips/pretrained_networks.py b/mmgen/models/architectures/lpips/pretrained_networks.py
index 3c72ccc22..0ed3499f8 100755
--- a/mmgen/models/architectures/lpips/pretrained_networks.py
+++ b/mmgen/models/architectures/lpips/pretrained_networks.py
@@ -8,7 +8,7 @@
 class vgg16(torch.nn.Module):
     r"""VGG16 feature extractor for LPIPS metric.
 
-        Ref : https://github.com/richzhang/PerceptualSimilarity/blob/master/lpips/pretrained_networks.py # noqa
+    Ref : https://github.com/richzhang/PerceptualSimilarity/blob/master/lpips/pretrained_networks.py # noqa
     """
 
     def __init__(self, requires_grad=False, pretrained=True):
diff --git a/mmgen/models/architectures/sngan_proj/modules.py b/mmgen/models/architectures/sngan_proj/modules.py
index d99e96d4e..38f9be8e5 100644
--- a/mmgen/models/architectures/sngan_proj/modules.py
+++ b/mmgen/models/architectures/sngan_proj/modules.py
@@ -205,7 +205,7 @@ def init_weights(self):
 
 @MODULES.register_module()
 class SNGANDiscResBlock(nn.Module):
-    """resblock used in discriminator of sngan / proj-gan.
+    """Resblock used in discriminator of sngan / proj-gan.
 
     args:
         in_channels (int): input channels.
diff --git a/mmgen/models/architectures/stylegan/ada/upfirdn2d.py b/mmgen/models/architectures/stylegan/ada/upfirdn2d.py
index 51ead27ca..6ac383880 100644
--- a/mmgen/models/architectures/stylegan/ada/upfirdn2d.py
+++ b/mmgen/models/architectures/stylegan/ada/upfirdn2d.py
@@ -40,6 +40,7 @@ def _get_filter_size(f):
 
 def upsample2d(x, f, up=2, padding=0, flip_filter=False, gain=1, impl='cuda'):
     r"""Upsample a batch of 2D images using the given 2D FIR filter.
+
     By default, the result is padded so that its shape is a multiple of the
     input.
     User-specified padding is applied on top of that, with negative values
@@ -93,6 +94,7 @@ def setup_filter(f,
                  gain=1,
                  separable=None):
     r"""Convenience function to setup 2D FIR filter for `upfirdn2d()`.
+
     Args:
         f:           Torch tensor, numpy array, or python list of the shape
                      `[filter_height, filter_width]` (non-separable),
@@ -144,6 +146,7 @@ def downsample2d(x,
                  gain=1,
                  impl='cuda'):
     r"""Downsample a batch of 2D images using the given 2D FIR filter.
+
     By default, the result is padded so that its shape is a fraction of the
     input.
     User-specified padding is applied on top of that, with negative values
diff --git a/mmgen/models/architectures/stylegan/generator_discriminator_v1.py b/mmgen/models/architectures/stylegan/generator_discriminator_v1.py
index ad5ff0906..c7bf53887 100644
--- a/mmgen/models/architectures/stylegan/generator_discriminator_v1.py
+++ b/mmgen/models/architectures/stylegan/generator_discriminator_v1.py
@@ -142,7 +142,7 @@ def train(self, mode=True):
         return super(StyleGANv1Generator, self).train(mode)
 
     def make_injected_noise(self):
-        """make noises that will be injected into feature maps.
+        """Make noises that will be injected into feature maps.
 
         Returns:
             list[Tensor]: List of layer-wise noise tensor.
diff --git a/mmgen/models/architectures/stylegan/generator_discriminator_v2.py b/mmgen/models/architectures/stylegan/generator_discriminator_v2.py
index 51ee9f786..d5b669a56 100644
--- a/mmgen/models/architectures/stylegan/generator_discriminator_v2.py
+++ b/mmgen/models/architectures/stylegan/generator_discriminator_v2.py
@@ -239,7 +239,7 @@ def train(self, mode=True):
         return super(StyleGANv2Generator, self).train(mode)
 
     def make_injected_noise(self):
-        """make noises that will be injected into feature maps.
+        """Make noises that will be injected into feature maps.
 
         Returns:
             list[Tensor]: List of layer-wise noise tensor.
diff --git a/mmgen/models/diffusions/base_diffusion.py b/mmgen/models/diffusions/base_diffusion.py
index 5b32e48eb..a5cb2eed5 100644
--- a/mmgen/models/diffusions/base_diffusion.py
+++ b/mmgen/models/diffusions/base_diffusion.py
@@ -500,6 +500,7 @@ def DDPM_sample(self,
                     show_pbar=False,
                     **kwargs):
         """DDPM sample from random noise.
+
         Args:
             model (torch.nn.Module): Denoising model used to sample images.
             noise (torch.Tensor | callable | None): You can directly give a
@@ -577,7 +578,7 @@ def prepare_diffusion_vars(self):
         """Prepare for variables used in the diffusion process."""
         self.betas = self.get_betas()
         self.alphas = 1.0 - self.betas
-        self.alphas_bar = np.cumproduct(self.alphas, axis=0)
+        self.alphas_bar = np.cumprod(self.alphas, axis=0)
         self.alphas_bar_prev = np.append(1.0, self.alphas_bar[:-1])
         self.alphas_bar_next = np.append(self.alphas_bar[1:], 0.0)
 
@@ -706,8 +707,8 @@ def q_posterior_mean_variance(self,
                                   t,
                                   need_var=True,
                                   logvar=False):
-        r"""Get mean and variance of diffusion posterior
-            `q(x_{t-1} | x_t, x_0)`.
+        r"""Get mean and variance of diffusion posterior `q(x_{t-1} | x_t,
+        x_0)`.
 
         Args:
             x_0 (torch.tensor): The original image before diffusion, shape as
@@ -751,8 +752,8 @@ def p_mean_variance(self,
                         t,
                         clip_denoised=True,
                         denoised_fn=None):
-        r"""Get mean, variance, log variance of denoising process
-        `p(x_{t-1} | x_{t})` and predicted `x_0`.
+        r"""Get mean, variance, log variance of denoising process `p(x_{t-1} |
+        x_{t})` and predicted `x_0`.
 
         Args:
             denoising_output (dict[torch.Tensor]): The output from denoising
diff --git a/mmgen/models/losses/ddpm_loss.py b/mmgen/models/losses/ddpm_loss.py
index e2e441fcf..14779225e 100644
--- a/mmgen/models/losses/ddpm_loss.py
+++ b/mmgen/models/losses/ddpm_loss.py
@@ -366,16 +366,16 @@ def loss_name(self):
 
 @MODULES.register_module()
 class DDPMVLBLoss(DDPMLoss):
-    """Variational lower-bound loss for DDPM-based models.
-    In this loss, we calculate VLB of different timesteps with different
-    method. In detail, ``DiscretizedGaussianLogLikelihoodLoss`` is used at
-    timesteps = 0 and ``GaussianKLDLoss`` at other timesteps.
-    To control the data flow for loss calculation, users should define
-    ``data_info`` and ``data_info_t_0`` for ``GaussianKLDLoss`` and
-    ``DiscretizedGaussianLogLikelihoodLoss`` respectively. If not passed
-    ``_default_data_info`` and ``_default_data_info_t_0`` would be used.
-    To be noted that, we only penalize 'variance' in this loss term, and
-    tensors in output dict corresponding to 'mean' would be detached.
+    """Variational lower-bound loss for DDPM-based models. In this loss, we
+    calculate VLB of different timesteps with different method. In detail,
+    ``DiscretizedGaussianLogLikelihoodLoss`` is used at timesteps = 0 and
+    ``GaussianKLDLoss`` at other timesteps. To control the data flow for loss
+    calculation, users should define ``data_info`` and ``data_info_t_0`` for
+    ``GaussianKLDLoss`` and ``DiscretizedGaussianLogLikelihoodLoss``
+    respectively. If not passed ``_default_data_info`` and
+    ``_default_data_info_t_0`` would be used. To be noted that, we only
+    penalize 'variance' in this loss term, and tensors in output dict
+    corresponding to 'mean' would be detached.
 
     Additionally, we support another log collection function called
     ``name_log_collection``. In this collection method, we would directly
diff --git a/mmgen/models/losses/gan_loss.py b/mmgen/models/losses/gan_loss.py
index b8f4f1ff7..557aa2f8b 100644
--- a/mmgen/models/losses/gan_loss.py
+++ b/mmgen/models/losses/gan_loss.py
@@ -45,7 +45,7 @@ def __init__(self,
                 f'GAN type {self.gan_type} is not implemented.')
 
     def _wgan_loss(self, input, target):
-        """wgan loss.
+        """Wgan loss.
 
         Args:
             input (Tensor): Input tensor.
diff --git a/mmgen/models/losses/pixelwise_loss.py b/mmgen/models/losses/pixelwise_loss.py
index d936569c7..80bbed50a 100644
--- a/mmgen/models/losses/pixelwise_loss.py
+++ b/mmgen/models/losses/pixelwise_loss.py
@@ -41,8 +41,8 @@ def mse_loss(pred, target):
 @weighted_loss
 def gaussian_kld(mean_target, mean_pred, logvar_target, logvar_pred, base='e'):
     r"""Calculate KLD (Kullback-Leibler divergence) of two gaussian
-    distribution.
-    To be noted that in this function, KLD is calcuated in base `e`.
+    distribution. To be noted that in this function, KLD is calcuated in base
+    `e`.
 
     .. math::
         :nowrap:
@@ -83,7 +83,8 @@ def gaussian_kld(mean_target, mean_pred, logvar_target, logvar_pred, base='e'):
 
 
 def approx_gaussian_cdf(x):
-    r"""Approximate the cumulative distribution function of the gaussian distribution.
+    r"""Approximate the cumulative distribution function of the gaussian
+    distribution.
 
     Refers to:
     Approximations to the Cumulative Normal Function and its Inverse for Use on a Pocket Calculator  # noqa
@@ -102,7 +103,6 @@ def approx_gaussian_cdf(x):
 
     Returns:
         torch.Tensor: Calculated cumulative distribution.
-
     """
     factor = np.sqrt(2.0 / np.pi)
     y = factor * (x + 0.044715 * torch.pow(x, 3))
diff --git a/mmgen/models/translation_models/base_translation_model.py b/mmgen/models/translation_models/base_translation_model.py
index c07a9a8bb..ef70a2cb4 100644
--- a/mmgen/models/translation_models/base_translation_model.py
+++ b/mmgen/models/translation_models/base_translation_model.py
@@ -114,12 +114,12 @@ def is_domain_reachable(self, domain):
         return domain in self._reachable_domains
 
     def get_other_domains(self, domain):
-        """get other domains."""
+        """Get other domains."""
         return list(set(self._related_domains) - set([domain]))
 
     @abstractmethod
     def _get_target_generator(self, domain):
-        """get target generator."""
+        """Get target generator."""
 
     def translation(self, image, target_domain=None, **kwargs):
         """Translation Image to target style.
diff --git a/mmgen/models/translation_models/static_translation_gan.py b/mmgen/models/translation_models/static_translation_gan.py
index 7c3da37bb..1c9cfef01 100644
--- a/mmgen/models/translation_models/static_translation_gan.py
+++ b/mmgen/models/translation_models/static_translation_gan.py
@@ -125,7 +125,7 @@ def get_module(self, module):
         return module
 
     def _get_target_generator(self, domain):
-        """get target generator."""
+        """Get target generator."""
         assert self.is_domain_reachable(
             domain
         ), f'{domain} domain is not reachable, available domain list is\
@@ -134,7 +134,7 @@ def _get_target_generator(self, domain):
         return self.get_module(self.generators)[domain]
 
     def _get_target_discriminator(self, domain):
-        """get target discriminator."""
+        """Get target discriminator."""
         assert self.is_domain_reachable(
             domain
         ), f'{domain} domain is not reachable, available domain list is\
diff --git a/mmgen/ops/stylegan3/ops/bias_act.py b/mmgen/ops/stylegan3/ops/bias_act.py
index 38c7b6e26..4baf33c39 100644
--- a/mmgen/ops/stylegan3/ops/bias_act.py
+++ b/mmgen/ops/stylegan3/ops/bias_act.py
@@ -133,12 +133,12 @@ def bias_act(x,
              gain=None,
              clamp=None,
              impl='cuda'):
-    r"""Fused bias and activation function.
-    Adds bias `b` to activation tensor `x`, evaluates activation function
-    `act`, and scales the result by `gain`. Each of the steps is optional.
-    In most cases, the fused op is considerably more efficient than performing
-    the same calculation using standard PyTorch ops. It supports first and
-    second order gradients, but not third order gradients.
+    r"""Fused bias and activation function. Adds bias `b` to activation tensor
+    `x`, evaluates activation function `act`, and scales the result by `gain`.
+    Each of the steps is optional. In most cases, the fused op is considerably
+    more efficient than performing the same calculation using standard PyTorch
+    ops. It supports first and second order gradients, but not third order
+    gradients.
 
     Args:
         x:      Input activation tensor. Can be of any shape.
diff --git a/mmgen/ops/stylegan3/ops/upfirdn2d.py b/mmgen/ops/stylegan3/ops/upfirdn2d.py
index 63cb764b4..2a733d8cc 100644
--- a/mmgen/ops/stylegan3/ops/upfirdn2d.py
+++ b/mmgen/ops/stylegan3/ops/upfirdn2d.py
@@ -32,7 +32,7 @@ def _init():
 
 
 def _parse_scaling(scaling):
-    """parse scaling into list [x, y]"""
+    """Parse scaling into list [x, y]"""
     if isinstance(scaling, int):
         scaling = [scaling, scaling]
     assert isinstance(scaling, (list, tuple))
@@ -43,7 +43,7 @@ def _parse_scaling(scaling):
 
 
 def _parse_padding(padding):
-    """parse padding into list [padx0, padx1, pady0, pady1]"""
+    """Parse padding into list [padx0, padx1, pady0, pady1]"""
     if isinstance(padding, int):
         padding = [padding, padding]
     assert isinstance(padding, (list, tuple))
@@ -56,7 +56,7 @@ def _parse_padding(padding):
 
 
 def _get_filter_size(f):
-    """get width and height of filter kernel."""
+    """Get width and height of filter kernel."""
     if f is None:
         return 1, 1
     assert isinstance(f, torch.Tensor) and f.ndim in [1, 2]
diff --git a/model-index.yml b/model-index.yml
index d058a7b3a..c0aa9b72c 100644
--- a/model-index.yml
+++ b/model-index.yml
@@ -1,18 +1,18 @@
 Import:
-- configs/ada/metafile.yml
-- configs/biggan/metafile.yml
-- configs/cyclegan/metafile.yml
-- configs/dcgan/metafile.yml
-- configs/ggan/metafile.yml
-- configs/improved_ddpm/metafile.yml
-- configs/lsgan/metafile.yml
-- configs/pggan/metafile.yml
-- configs/pix2pix/metafile.yml
-- configs/positional_encoding_in_gans/metafile.yml
-- configs/sagan/metafile.yml
-- configs/singan/metafile.yml
-- configs/sngan_proj/metafile.yml
-- configs/styleganv1/metafile.yml
-- configs/styleganv2/metafile.yml
-- configs/styleganv3/metafile.yml
-- configs/wgan-gp/metafile.yml
+- configs\ada\metafile.yml
+- configs\biggan\metafile.yml
+- configs\cyclegan\metafile.yml
+- configs\dcgan\metafile.yml
+- configs\ggan\metafile.yml
+- configs\improved_ddpm\metafile.yml
+- configs\lsgan\metafile.yml
+- configs\pggan\metafile.yml
+- configs\pix2pix\metafile.yml
+- configs\positional_encoding_in_gans\metafile.yml
+- configs\sagan\metafile.yml
+- configs\singan\metafile.yml
+- configs\sngan_proj\metafile.yml
+- configs\styleganv1\metafile.yml
+- configs\styleganv2\metafile.yml
+- configs\styleganv3\metafile.yml
+- configs\wgan-gp\metafile.yml