diff --git a/metadata-ingestion/scripts/docs_config_table.py b/metadata-ingestion/scripts/docs_config_table.py
index 3c73d11eac1cc3..67e60bb3d517c2 100644
--- a/metadata-ingestion/scripts/docs_config_table.py
+++ b/metadata-ingestion/scripts/docs_config_table.py
@@ -364,8 +364,7 @@ def should_hide_field(schema_field, current_source: str, schema_dict: Dict[str,
         properties = def_schema.get("properties", {})
         if field_name in properties:
             field_schema = properties[field_name]
-            schema_extra = field_schema.get("schema_extra", {})
-            supported_sources = schema_extra.get("supported_sources")
+            supported_sources = field_schema.get("supported_sources", {})
             
             if supported_sources and current_source:
                 return current_source.lower() not in [s.lower() for s in supported_sources]
diff --git a/metadata-ingestion/setup.cfg b/metadata-ingestion/setup.cfg
index 4cc8b1c79eb3f6..ae11023d80faf7 100644
--- a/metadata-ingestion/setup.cfg
+++ b/metadata-ingestion/setup.cfg
@@ -1,7 +1,8 @@
 [mypy]
 plugins =
     ./tests/test_helpers/sqlalchemy_mypy_plugin.py,
-    pydantic.mypy
+    pydantic.mypy,
+    pydantic.v1.mypy
 exclude = ^(venv/|build/|dist/|examples/transforms/setup.py)
 ignore_missing_imports = yes
 namespace_packages = no
diff --git a/metadata-ingestion/setup.py b/metadata-ingestion/setup.py
index a4190617b80479..14393dfe03317c 100644
--- a/metadata-ingestion/setup.py
+++ b/metadata-ingestion/setup.py
@@ -18,10 +18,7 @@
     "typing_extensions>=4.5.0",
     # Actual dependencies.
     "typing-inspect",
-    # pydantic 1.8.2 is incompatible with mypy 0.910.
-    # See https://github.com/samuelcolvin/pydantic/pull/3175#issuecomment-995382910.
-    # pydantic 1.10.3 is incompatible with typing-extensions 4.1.1 - https://github.com/pydantic/pydantic/issues/4885
-    "pydantic>=1.10.0,!=1.10.3",
+    "pydantic>=2.4.0",
     "mixpanel>=4.9.0",
     # Airflow depends on fairly old versions of sentry-sdk, so we want to be loose with our constraints.
     "sentry-sdk",
@@ -58,12 +55,6 @@
     "ruamel.yaml",
 }
 
-pydantic_no_v2 = {
-    # pydantic 2 makes major, backwards-incompatible changes - https://github.com/pydantic/pydantic/issues/4887
-    # Tags sources that require the pydantic v2 API.
-    "pydantic<2",
-}
-
 rest_common = {"requests", "requests_file"}
 
 kafka_common = {
@@ -119,7 +110,7 @@
 
 dbt_common = {
     *sqlglot_lib,
-    "more_itertools",
+    "more-itertools",
 }
 
 cachetools_lib = {
@@ -507,10 +498,7 @@
         # It's technically wrong for packages to depend on setuptools. However, it seems mlflow does it anyways.
         "setuptools",
     },
-    "datahub-debug": {
-        "dnspython==2.7.0",
-        "requests"
-    },
+    "datahub-debug": {"dnspython==2.7.0", "requests"},
     "mode": {"requests", "python-liquid", "tenacity>=8.0.1"} | sqlglot_lib,
     "mongodb": {"pymongo[srv]>=3.11", "packaging"},
     "mssql": sql_common | mssql_common,
@@ -729,7 +717,6 @@
         if plugin
         for dependency in plugins[plugin]
     ),
-    *pydantic_no_v2,
 }
 
 dev_requirements = {
@@ -964,30 +951,7 @@
     extras_require={
         "base": list(framework_common),
         **{
-            plugin: list(
-                framework_common
-                | (
-                    # While pydantic v2 support is experimental, require that all plugins
-                    # continue to use v1. This will ensure that no ingestion recipes break.
-                    pydantic_no_v2
-                    if plugin
-                    not in {
-                        "airflow",
-                        "datahub-rest",
-                        "datahub-kafka",
-                        "sync-file-emitter",
-                        "sql-parser",
-                        # Some sources have been manually tested for compatibility with pydantic v2.
-                        "iceberg",
-                        "feast",
-                        "bigquery-slim",
-                        "snowflake-slim",
-                        "mysql",  # tested in smoke-test
-                    }
-                    else set()
-                )
-                | dependencies
-            )
+            plugin: list(framework_common | dependencies)
             for (plugin, dependencies) in plugins.items()
         },
         "all": list(
diff --git a/metadata-ingestion/src/datahub/api/entities/dataset/dataset.py b/metadata-ingestion/src/datahub/api/entities/dataset/dataset.py
index 0c1eb820a1fcc1..59c6d41eb5a09a 100644
--- a/metadata-ingestion/src/datahub/api/entities/dataset/dataset.py
+++ b/metadata-ingestion/src/datahub/api/entities/dataset/dataset.py
@@ -17,6 +17,7 @@
 import yaml
 from pydantic import (
     BaseModel,
+    ConfigDict,
     Field,
     StrictStr,
     root_validator,
@@ -66,33 +67,17 @@
     StructuredPropertyUrn,
     TagUrn,
 )
-from datahub.pydantic.compat import (
-    PYDANTIC_VERSION,
-)
 from datahub.specific.dataset import DatasetPatchBuilder
 from datahub.utilities.urns.dataset_urn import DatasetUrn
 
-logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)
 
 
 class StrictModel(BaseModel):
-    """
-    Base model with strict validation.
-    Compatible with both Pydantic v1 and v2.
-    """
-
-    if PYDANTIC_VERSION >= 2:
-        # Pydantic v2 config
-        model_config = {
-            "validate_assignment": True,
-            "extra": "forbid",
-        }
-    else:
-        # Pydantic v1 config
-        class Config:
-            validate_assignment = True
-            extra = "forbid"
+    model_config = ConfigDict(
+        validate_assignment=True,
+        extra="forbid",
+    )
 
 
 # Define type aliases for the complex types
@@ -134,7 +119,7 @@ def urn_strip(urn: str) -> str:
 
 class SchemaFieldSpecification(StrictModel):
     id: Optional[str] = None
-    urn: Optional[str] = None
+    urn: Optional[str] = Field(default=None, validate_default=True)
     structured_properties: Optional[StructuredProperties] = None
     type: Optional[str] = None
     nativeDataType: Optional[str] = None
@@ -288,66 +273,33 @@ def _from_datahub_type(
             return "record"
         raise ValueError(f"Type {input_type} is not a valid primitive type")
 
-    if PYDANTIC_VERSION < 2:
-
-        def dict(self, **kwargs):
-            """Custom dict method for Pydantic v1 to handle YAML serialization properly."""
-            exclude = kwargs.pop("exclude", None) or set()
-
-            # If description and doc are identical, exclude doc from the output
-            if self.description == self.doc and self.description is not None:
-                exclude.add("doc")
-
-            # if nativeDataType and type are identical, exclude nativeDataType from the output
-            if self.nativeDataType == self.type and self.nativeDataType is not None:
-                exclude.add("nativeDataType")
+    def model_dump(self, **kwargs):
+        """Custom model_dump method for Pydantic v2 to handle YAML serialization properly."""
+        exclude = kwargs.pop("exclude", None) or set()
 
-            # if the id is the same as the urn's fieldPath, exclude id from the output
+        # If description and doc are identical, exclude doc from the output
+        if self.description == self.doc and self.description is not None:
+            exclude.add("doc")
 
-            if self.urn:
-                field_urn = SchemaFieldUrn.from_string(self.urn)
-                if Dataset._simplify_field_path(field_urn.field_path) == self.id:
-                    exclude.add("urn")
+        # if nativeDataType and type are identical, exclude nativeDataType from the output
+        if self.nativeDataType == self.type and self.nativeDataType is not None:
+            exclude.add("nativeDataType")
 
-            kwargs.pop("exclude_defaults", None)
+        # if the id is the same as the urn's fieldPath, exclude id from the output
+        if self.urn:
+            field_urn = SchemaFieldUrn.from_string(self.urn)
+            if Dataset._simplify_field_path(field_urn.field_path) == self.id:
+                exclude.add("urn")
 
-            self.structured_properties = (
-                StructuredPropertiesHelper.simplify_structured_properties_list(
-                    self.structured_properties
-                )
+        self.structured_properties = (
+            StructuredPropertiesHelper.simplify_structured_properties_list(
+                self.structured_properties
             )
-
-            return super().dict(exclude=exclude, exclude_defaults=True, **kwargs)
-
-    else:
-        # For v2, implement model_dump with similar logic as dict
-        def model_dump(self, **kwargs):
-            """Custom model_dump method for Pydantic v2 to handle YAML serialization properly."""
-            exclude = kwargs.pop("exclude", None) or set()
-
-            # If description and doc are identical, exclude doc from the output
-            if self.description == self.doc and self.description is not None:
-                exclude.add("doc")
-
-            # if nativeDataType and type are identical, exclude nativeDataType from the output
-            if self.nativeDataType == self.type and self.nativeDataType is not None:
-                exclude.add("nativeDataType")
-
-            # if the id is the same as the urn's fieldPath, exclude id from the output
-            if self.urn:
-                field_urn = SchemaFieldUrn.from_string(self.urn)
-                if Dataset._simplify_field_path(field_urn.field_path) == self.id:
-                    exclude.add("urn")
-
-            self.structured_properties = (
-                StructuredPropertiesHelper.simplify_structured_properties_list(
-                    self.structured_properties
-                )
+        )
+        if hasattr(super(), "model_dump"):
+            return super().model_dump(  # type: ignore
+                exclude=exclude, exclude_defaults=True, **kwargs
             )
-            if hasattr(super(), "model_dump"):
-                return super().model_dump(  # type: ignore
-                    exclude=exclude, exclude_defaults=True, **kwargs
-                )
 
 
 class SchemaSpecification(BaseModel):
@@ -380,9 +332,9 @@ class Dataset(StrictModel):
     id: Optional[str] = None
     platform: Optional[str] = None
     env: str = "PROD"
-    urn: Optional[str] = None
+    urn: Optional[str] = Field(default=None, validate_default=True)
     description: Optional[str] = None
-    name: Optional[str] = None
+    name: Optional[str] = Field(default=None, validate_default=True)
     schema_metadata: Optional[SchemaSpecification] = Field(default=None, alias="schema")
     downstreams: Optional[List[str]] = None
     properties: Optional[Dict[str, str]] = None
@@ -940,80 +892,42 @@ def from_datahub(
             downstreams=downstreams if config.include_downstreams else None,
         )
 
-    if PYDANTIC_VERSION < 2:
+    def model_dump(self, **kwargs):
+        """Custom model_dump method to handle YAML serialization properly."""
+        exclude = kwargs.pop("exclude", None) or set()
 
-        def dict(self, **kwargs):
-            """Custom dict method for Pydantic v1 to handle YAML serialization properly."""
-            exclude = kwargs.pop("exclude", set())
+        # If id and name are identical, exclude name from the output
+        if self.id == self.name and self.id is not None:
+            exclude.add("name")
 
-            # If id and name are identical, exclude name from the output
-            if self.id == self.name and self.id is not None:
-                exclude.add("name")
-
-            # if subtype and subtypes are identical or subtypes is a singleton list, exclude subtypes from the output
-            if self.subtypes and len(self.subtypes) == 1:
-                self.subtype = self.subtypes[0]
-                exclude.add("subtypes")
+        # if subtype and subtypes are identical or subtypes is a singleton list, exclude subtypes from the output
+        if self.subtypes and len(self.subtypes) == 1:
+            self.subtype = self.subtypes[0]
+            exclude.add("subtypes")
 
+        if hasattr(super(), "model_dump"):
+            result = super().model_dump(exclude=exclude, **kwargs)  # type: ignore
+        else:
             result = super().dict(exclude=exclude, **kwargs)
 
-            # Custom handling for schema_metadata/schema
-            if self.schema_metadata and "schema" in result:
-                schema_data = result["schema"]
-
-                # Handle fields if they exist
-                if "fields" in schema_data and isinstance(schema_data["fields"], list):
-                    # Process each field using its custom dict method
-                    processed_fields = []
-                    if self.schema_metadata and self.schema_metadata.fields:
-                        for field in self.schema_metadata.fields:
-                            if field:
-                                # Use dict method for Pydantic v1
-                                processed_field = field.dict(**kwargs)
-                                processed_fields.append(processed_field)
-
-                    # Replace the fields in the result with the processed ones
-                    schema_data["fields"] = processed_fields
-
-            return result
-    else:
-
-        def model_dump(self, **kwargs):
-            """Custom model_dump method for Pydantic v2 to handle YAML serialization properly."""
-            exclude = kwargs.pop("exclude", None) or set()
-
-            # If id and name are identical, exclude name from the output
-            if self.id == self.name and self.id is not None:
-                exclude.add("name")
-
-            # if subtype and subtypes are identical or subtypes is a singleton list, exclude subtypes from the output
-            if self.subtypes and len(self.subtypes) == 1:
-                self.subtype = self.subtypes[0]
-                exclude.add("subtypes")
-
-            if hasattr(super(), "model_dump"):
-                result = super().model_dump(exclude=exclude, **kwargs)  # type: ignore
-            else:
-                result = super().dict(exclude=exclude, **kwargs)
-
-            # Custom handling for schema_metadata/schema
-            if self.schema_metadata and "schema" in result:
-                schema_data = result["schema"]
+        # Custom handling for schema_metadata/schema
+        if self.schema_metadata and "schema" in result:
+            schema_data = result["schema"]
 
-                # Handle fields if they exist
-                if "fields" in schema_data and isinstance(schema_data["fields"], list):
-                    # Process each field using its custom model_dump method
-                    processed_fields = []
-                    if self.schema_metadata and self.schema_metadata.fields:
-                        for field in self.schema_metadata.fields:
-                            if field:
-                                processed_field = field.model_dump(**kwargs)
-                                processed_fields.append(processed_field)
+            # Handle fields if they exist
+            if "fields" in schema_data and isinstance(schema_data["fields"], list):
+                # Process each field using its custom model_dump method
+                processed_fields = []
+                if self.schema_metadata and self.schema_metadata.fields:
+                    for field in self.schema_metadata.fields:
+                        if field:
+                            processed_field = field.model_dump(**kwargs)
+                            processed_fields.append(processed_field)
 
-                    # Replace the fields in the result with the processed ones
-                    schema_data["fields"] = processed_fields
+                # Replace the fields in the result with the processed ones
+                schema_data["fields"] = processed_fields
 
-            return result
+        return result
 
     def to_yaml(
         self,
@@ -1025,13 +939,7 @@ def to_yaml(
         Returns True if file was written, False if no changes were detected.
         """
         # Create new model data
-        # Create new model data - choose dict() or model_dump() based on Pydantic version
-        if PYDANTIC_VERSION >= 2:
-            new_data = self.model_dump(
-                exclude_none=True, exclude_unset=True, by_alias=True
-            )
-        else:
-            new_data = self.dict(exclude_none=True, exclude_unset=True, by_alias=True)
+        new_data = self.model_dump(exclude_none=True, exclude_unset=True, by_alias=True)
 
         # Set up ruamel.yaml for preserving comments
         yaml_handler = YAML(typ="rt")  # round-trip mode
diff --git a/metadata-ingestion/src/datahub/api/entities/external/restricted_text.py b/metadata-ingestion/src/datahub/api/entities/external/restricted_text.py
index 99e5534c65a4c7..4eb0b17a513650 100644
--- a/metadata-ingestion/src/datahub/api/entities/external/restricted_text.py
+++ b/metadata-ingestion/src/datahub/api/entities/external/restricted_text.py
@@ -13,20 +13,8 @@
 
 from typing import Any, ClassVar, Optional, Set, Union
 
-# Check Pydantic version and import accordingly
-try:
-    from pydantic import VERSION
-
-    PYDANTIC_V2 = int(VERSION.split(".")[0]) >= 2
-except (ImportError, AttributeError):
-    # Fallback for older versions that don't have VERSION
-    PYDANTIC_V2 = False
-
-if PYDANTIC_V2:
-    from pydantic import GetCoreSchemaHandler  # type: ignore[attr-defined]
-    from pydantic_core import core_schema
-else:
-    from pydantic.validators import str_validator
+from pydantic import GetCoreSchemaHandler
+from pydantic_core import core_schema
 
 
 class RestrictedTextConfig:
@@ -179,69 +167,24 @@ def with_config(
             truncation_suffix=truncation_suffix,
         )
 
-    # Pydantic v2 methods
-    if PYDANTIC_V2:
-
-        @classmethod
-        def _validate(
-            cls,
-            __input_value: Union[str, "RestrictedText"],
-            _: core_schema.ValidationInfo,
-        ) -> "RestrictedText":
-            """Validate and create a RestrictedText instance."""
-            if isinstance(__input_value, RestrictedText):
-                return __input_value
-            return cls(__input_value)
-
-        @classmethod
-        def __get_pydantic_core_schema__(
-            cls, source: type[Any], handler: GetCoreSchemaHandler
-        ) -> core_schema.CoreSchema:
-            """Get the Pydantic core schema for this type."""
-            return core_schema.with_info_after_validator_function(
-                cls._validate,
-                core_schema.str_schema(),
-                field_name=cls.__name__,
-            )
-
-    # Pydantic v1 methods
-    else:
-
-        @classmethod
-        def __get_validators__(cls):
-            """Pydantic v1 validator method."""
-            yield cls.validate
-
-        @classmethod
-        def validate(cls, v, field=None):
-            """Validate and create a RestrictedText instance for Pydantic v1."""
-            if isinstance(v, RestrictedText):
-                return v
-
-            if not isinstance(v, str):
-                # Let pydantic handle the string validation
-                v = str_validator(v)
-
-            # Create instance
-            instance = cls(v)
-
-            # Check if there's a field default that contains configuration
-            if (
-                field
-                and hasattr(field, "default")
-                and isinstance(field.default, RestrictedTextConfig)
-            ):
-                config = field.default
-                instance._configure(
-                    max_length=config.max_length,
-                    forbidden_chars=config.forbidden_chars,
-                    replacement_char=config.replacement_char,
-                    truncation_suffix=config.truncation_suffix,
-                )
-
-            return instance
-
-        @classmethod
-        def __modify_schema__(cls, field_schema):
-            """Modify the JSON schema for Pydantic v1."""
-            field_schema.update(type="string", examples=["example string"])
+    @classmethod
+    def _validate(
+        cls,
+        __input_value: Union[str, "RestrictedText"],
+        _: core_schema.ValidationInfo,
+    ) -> "RestrictedText":
+        """Validate and create a RestrictedText instance."""
+        if isinstance(__input_value, RestrictedText):
+            return __input_value
+        return cls(__input_value)
+
+    @classmethod
+    def __get_pydantic_core_schema__(
+        cls, source_type: Any, handler: GetCoreSchemaHandler
+    ) -> core_schema.CoreSchema:
+        """Get the Pydantic core schema for this type."""
+        return core_schema.with_info_after_validator_function(
+            cls._validate,
+            core_schema.str_schema(),
+            field_name=cls.__name__,
+        )
diff --git a/metadata-ingestion/src/datahub/cli/quickstart_versioning.py b/metadata-ingestion/src/datahub/cli/quickstart_versioning.py
index 9739af5127f4d1..16bf7559d45ef8 100644
--- a/metadata-ingestion/src/datahub/cli/quickstart_versioning.py
+++ b/metadata-ingestion/src/datahub/cli/quickstart_versioning.py
@@ -21,7 +21,7 @@
 class QuickstartExecutionPlan(BaseModel):
     composefile_git_ref: str
     docker_tag: str
-    mysql_tag: Optional[str]
+    mysql_tag: Optional[str] = None
 
 
 def _is_it_a_version(version: str) -> bool:
diff --git a/metadata-ingestion/src/datahub/configuration/_config_enum.py b/metadata-ingestion/src/datahub/configuration/_config_enum.py
index 190a006b077d9f..3c9349382bc4ea 100644
--- a/metadata-ingestion/src/datahub/configuration/_config_enum.py
+++ b/metadata-ingestion/src/datahub/configuration/_config_enum.py
@@ -1,10 +1,8 @@
 from enum import Enum
+from typing import Any
 
-import pydantic
-import pydantic.types
-import pydantic.validators
-
-from datahub.configuration.pydantic_migration_helpers import PYDANTIC_VERSION_2
+from pydantic import GetCoreSchemaHandler
+from pydantic_core import CoreSchema, core_schema
 
 
 class ConfigEnum(Enum):
@@ -17,25 +15,13 @@ def _generate_next_value_(  # type: ignore
         # From https://stackoverflow.com/a/44785241/5004662.
         return name
 
-    if PYDANTIC_VERSION_2:
-        # if TYPE_CHECKING:
-        #     from pydantic import GetCoreSchemaHandler
-
-        @classmethod
-        def __get_pydantic_core_schema__(cls, source_type, handler):  # type: ignore
-            from pydantic_core import core_schema
-
-            return core_schema.no_info_before_validator_function(
-                cls.validate, handler(source_type)
-            )
-
-    else:
-
-        @classmethod
-        def __get_validators__(cls) -> "pydantic.types.CallableGenerator":
-            # We convert the text to uppercase before attempting to match it to an enum value.
-            yield cls.validate
-            yield pydantic.validators.enum_member_validator
+    @classmethod
+    def __get_pydantic_core_schema__(
+        cls, source_type: Any, handler: GetCoreSchemaHandler
+    ) -> CoreSchema:
+        return core_schema.no_info_before_validator_function(
+            cls.validate, handler(source_type)
+        )
 
     @classmethod
     def validate(cls, v):  # type: ignore[no-untyped-def]
diff --git a/metadata-ingestion/src/datahub/configuration/common.py b/metadata-ingestion/src/datahub/configuration/common.py
index aae894f1c5370e..bca4162ae70432 100644
--- a/metadata-ingestion/src/datahub/configuration/common.py
+++ b/metadata-ingestion/src/datahub/configuration/common.py
@@ -14,14 +14,12 @@
     runtime_checkable,
 )
 
-import pydantic
 from cached_property import cached_property
-from pydantic import BaseModel, Extra, ValidationError
+from pydantic import BaseModel, ValidationError
 from pydantic.fields import Field
 from typing_extensions import Protocol, Self
 
 from datahub.configuration._config_enum import ConfigEnum as ConfigEnum
-from datahub.configuration.pydantic_migration_helpers import PYDANTIC_VERSION_2
 from datahub.utilities.dedup_list import deduplicate_list
 
 REDACT_KEYS = {
@@ -97,35 +95,22 @@ def _schema_extra(schema: Dict[str, Any], model: Type["ConfigModel"]) -> None:
             for key in remove_fields:
                 del schema["properties"][key]
 
-        # This is purely to suppress pydantic's warnings, since this class is used everywhere.
-        if PYDANTIC_VERSION_2:
-            extra = "forbid"
-            ignored_types = (cached_property,)
-            json_schema_extra = _schema_extra
-        else:
-            extra = Extra.forbid
-            underscore_attrs_are_private = True
-            keep_untouched = (
-                cached_property,
-            )  # needed to allow cached_property to work. See https://github.com/samuelcolvin/pydantic/issues/1241 for more info.
-            schema_extra = _schema_extra
+        extra = "forbid"
+        ignored_types = (cached_property,)
+        json_schema_extra = _schema_extra
 
     @classmethod
     def parse_obj_allow_extras(cls, obj: Any) -> Self:
-        if PYDANTIC_VERSION_2:
-            try:
-                with unittest.mock.patch.dict(
-                    cls.model_config,  # type: ignore
-                    {"extra": "allow"},
-                    clear=False,
-                ):
-                    cls.model_rebuild(force=True)  # type: ignore
-                    return cls.parse_obj(obj)
-            finally:
+        try:
+            with unittest.mock.patch.dict(
+                cls.model_config,  # type: ignore
+                {"extra": "allow"},
+                clear=False,
+            ):
                 cls.model_rebuild(force=True)  # type: ignore
-        else:
-            with unittest.mock.patch.object(cls.Config, "extra", pydantic.Extra.allow):
                 return cls.parse_obj(obj)
+        finally:
+            cls.model_rebuild(force=True)  # type: ignore
 
 
 class PermissiveConfigModel(ConfigModel):
@@ -135,10 +120,7 @@ class PermissiveConfigModel(ConfigModel):
     # It is usually used for argument bags that are passed through to third-party libraries.
 
     class Config:
-        if PYDANTIC_VERSION_2:  # noqa: SIM108
-            extra = "allow"
-        else:
-            extra = Extra.allow
+        extra = "allow"
 
 
 class TransformerSemantics(ConfigEnum):
diff --git a/metadata-ingestion/src/datahub/configuration/import_resolver.py b/metadata-ingestion/src/datahub/configuration/import_resolver.py
index 19627c7b8c9569..6708121adc7d0e 100644
--- a/metadata-ingestion/src/datahub/configuration/import_resolver.py
+++ b/metadata-ingestion/src/datahub/configuration/import_resolver.py
@@ -12,4 +12,4 @@ def _pydantic_resolver(v: Union[T, str]) -> T:
 
 
 def pydantic_resolve_key(field: str) -> classmethod:
-    return pydantic.validator(field, pre=True, allow_reuse=True)(_pydantic_resolver)
+    return pydantic.validator(field, pre=True, allow_reuse=True)(_pydantic_resolver)  # type: ignore
diff --git a/metadata-ingestion/src/datahub/configuration/pydantic_migration_helpers.py b/metadata-ingestion/src/datahub/configuration/pydantic_migration_helpers.py
index bd931abe2e84d1..a6e5ef7067ddfb 100644
--- a/metadata-ingestion/src/datahub/configuration/pydantic_migration_helpers.py
+++ b/metadata-ingestion/src/datahub/configuration/pydantic_migration_helpers.py
@@ -1,40 +1,11 @@
-import pydantic.version
-from packaging.version import Version
-
-PYDANTIC_VERSION_2: bool
-if Version(pydantic.version.VERSION) >= Version("2.0"):
-    PYDANTIC_VERSION_2 = True
-else:
-    PYDANTIC_VERSION_2 = False
-
-
-# This can be used to silence deprecation warnings while we migrate.
-if PYDANTIC_VERSION_2:
-    from pydantic import PydanticDeprecatedSince20  # type: ignore
-else:
-
-    class PydanticDeprecatedSince20(Warning):  # type: ignore
-        pass
-
-
-if PYDANTIC_VERSION_2:
-    from pydantic import BaseModel as GenericModel
-    from pydantic.v1 import (  # type: ignore
-        BaseModel as v1_BaseModel,
-        Extra as v1_Extra,
-        Field as v1_Field,
-        root_validator as v1_root_validator,
-        validator as v1_validator,
-    )
-else:
-    from pydantic import (  # type: ignore
-        BaseModel as v1_BaseModel,
-        Extra as v1_Extra,
-        Field as v1_Field,
-        root_validator as v1_root_validator,
-        validator as v1_validator,
-    )
-    from pydantic.generics import GenericModel  # type: ignore
+from pydantic import PydanticDeprecatedSince20
+from pydantic.v1 import (  # type: ignore
+    BaseModel as v1_BaseModel,
+    Extra as v1_Extra,
+    Field as v1_Field,
+    root_validator as v1_root_validator,
+    validator as v1_validator,
+)
 
 
 class v1_ConfigModel(v1_BaseModel):
@@ -48,10 +19,10 @@ class Config:
         underscore_attrs_are_private = True
 
 
+# TODO: Remove the warning ignore on PydanticDeprecatedSince20.
+
 __all__ = [
-    "PYDANTIC_VERSION_2",
     "PydanticDeprecatedSince20",
-    "GenericModel",
     "v1_ConfigModel",
     "v1_Field",
     "v1_root_validator",
diff --git a/metadata-ingestion/src/datahub/configuration/validate_field_deprecation.py b/metadata-ingestion/src/datahub/configuration/validate_field_deprecation.py
index 6134c4dab48174..bfc5423ae9382f 100644
--- a/metadata-ingestion/src/datahub/configuration/validate_field_deprecation.py
+++ b/metadata-ingestion/src/datahub/configuration/validate_field_deprecation.py
@@ -31,4 +31,4 @@ def _validate_deprecated(cls: Type, values: dict) -> dict:
     # https://github.com/pydantic/pydantic/blob/v1.10.9/pydantic/main.py#L264
     # This hack ensures that multiple field deprecated do not overwrite each other.
     _validate_deprecated.__name__ = f"{_validate_deprecated.__name__}_{field}"
-    return pydantic.root_validator(pre=True, allow_reuse=True)(_validate_deprecated)
+    return pydantic.root_validator(pre=True, allow_reuse=True)(_validate_deprecated)  # type: ignore
diff --git a/metadata-ingestion/src/datahub/configuration/validate_field_removal.py b/metadata-ingestion/src/datahub/configuration/validate_field_removal.py
index 6b8f0df0588e68..ace3dbf94b925e 100644
--- a/metadata-ingestion/src/datahub/configuration/validate_field_removal.py
+++ b/metadata-ingestion/src/datahub/configuration/validate_field_removal.py
@@ -25,4 +25,4 @@ def _validate_field_removal(cls: Type, values: dict) -> dict:
     # https://github.com/pydantic/pydantic/blob/v1.10.9/pydantic/main.py#L264
     # This hack ensures that multiple field removals do not overwrite each other.
     _validate_field_removal.__name__ = f"{_validate_field_removal.__name__}_{field}"
-    return pydantic.root_validator(pre=True, allow_reuse=True)(_validate_field_removal)
+    return pydantic.root_validator(pre=True, allow_reuse=True)(_validate_field_removal)  # type: ignore
diff --git a/metadata-ingestion/src/datahub/configuration/validate_field_rename.py b/metadata-ingestion/src/datahub/configuration/validate_field_rename.py
index de2a16e9bf247d..a1ad06ed1cb2bd 100644
--- a/metadata-ingestion/src/datahub/configuration/validate_field_rename.py
+++ b/metadata-ingestion/src/datahub/configuration/validate_field_rename.py
@@ -49,6 +49,4 @@ def _validate_field_rename(cls: Type, values: dict) -> dict:
     # validator with pre=True gets all the values that were passed in.
     # Given that a renamed field doesn't show up in the fields list, we can't use
     # the field-level validator, even with a different field name.
-    return pydantic.root_validator(pre=True, skip_on_failure=True, allow_reuse=True)(
-        _validate_field_rename
-    )
+    return pydantic.root_validator(pre=True, allow_reuse=True)(_validate_field_rename)  # type: ignore
diff --git a/metadata-ingestion/src/datahub/configuration/validate_multiline_string.py b/metadata-ingestion/src/datahub/configuration/validate_multiline_string.py
index 0baaf4f0264b99..034d9dfdd5cc20 100644
--- a/metadata-ingestion/src/datahub/configuration/validate_multiline_string.py
+++ b/metadata-ingestion/src/datahub/configuration/validate_multiline_string.py
@@ -28,4 +28,4 @@ def _validate_field(
     # https://github.com/pydantic/pydantic/blob/v1.10.9/pydantic/main.py#L264
     # This hack ensures that multiple field deprecated do not overwrite each other.
     _validate_field.__name__ = f"{_validate_field.__name__}_{field}"
-    return pydantic.validator(field, pre=True, allow_reuse=True)(_validate_field)
+    return pydantic.validator(field, pre=True, allow_reuse=True)(_validate_field)  # type: ignore
diff --git a/metadata-ingestion/src/datahub/ingestion/glossary/datahub_classifier.py b/metadata-ingestion/src/datahub/ingestion/glossary/datahub_classifier.py
index ba03083854e785..a6e92493a07c37 100644
--- a/metadata-ingestion/src/datahub/ingestion/glossary/datahub_classifier.py
+++ b/metadata-ingestion/src/datahub/ingestion/glossary/datahub_classifier.py
@@ -7,7 +7,6 @@
 from pydantic.fields import Field
 
 from datahub.configuration.common import ConfigModel
-from datahub.configuration.pydantic_migration_helpers import PYDANTIC_VERSION_2
 from datahub.ingestion.glossary.classifier import Classifier
 from datahub.utilities.str_enum import StrEnum
 
@@ -51,10 +50,7 @@ class ValuesFactorConfig(ConfigModel):
 
 class PredictionFactorsAndWeights(ConfigModel):
     class Config:
-        if PYDANTIC_VERSION_2:
-            populate_by_name = True
-        else:
-            allow_population_by_field_name = True
+        populate_by_name = True
 
     Name: float = Field(alias="name")
     Description: float = Field(alias="description")
@@ -64,10 +60,7 @@ class Config:
 
 class InfoTypeConfig(ConfigModel):
     class Config:
-        if PYDANTIC_VERSION_2:
-            populate_by_name = True
-        else:
-            allow_population_by_field_name = True
+        populate_by_name = True
 
     Prediction_Factors_and_Weights: PredictionFactorsAndWeights = Field(
         description="Factors and their weights to consider when predicting info types",
diff --git a/metadata-ingestion/src/datahub/ingestion/run/pipeline_config.py b/metadata-ingestion/src/datahub/ingestion/run/pipeline_config.py
index 4982bef370d92b..067dff70cc990f 100644
--- a/metadata-ingestion/src/datahub/ingestion/run/pipeline_config.py
+++ b/metadata-ingestion/src/datahub/ingestion/run/pipeline_config.py
@@ -85,7 +85,9 @@ class PipelineConfig(ConfigModel):
     source: SourceConfig
     sink: Optional[DynamicTypedConfig] = None
     transformers: Optional[List[DynamicTypedConfig]] = None
-    flags: FlagsConfig = Field(default=FlagsConfig(), hidden_from_docs=True)
+    flags: FlagsConfig = Field(
+        default=FlagsConfig(), json_schema_extra={"hidden_from_docs": True}
+    )
     reporting: List[ReporterConfig] = []
     run_id: str = DEFAULT_RUN_ID
     datahub_api: Optional[DatahubClientConfig] = None
diff --git a/metadata-ingestion/src/datahub/ingestion/source/abs/config.py b/metadata-ingestion/src/datahub/ingestion/source/abs/config.py
index 583876a8dda6c0..0df1644ddcffa2 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/abs/config.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/abs/config.py
@@ -151,7 +151,7 @@ def platform_not_empty(cls, platform: Any, values: dict) -> str:
             raise ValueError("platform must not be empty")
         return platform
 
-    @pydantic.root_validator()
+    @pydantic.root_validator(skip_on_failure=True)
     def ensure_profiling_pattern_is_passed_to_profiling(
         cls, values: Dict[str, Any]
     ) -> Dict[str, Any]:
diff --git a/metadata-ingestion/src/datahub/ingestion/source/abs/datalake_profiler_config.py b/metadata-ingestion/src/datahub/ingestion/source/abs/datalake_profiler_config.py
index d12ff7415faefc..58e930eb6e809c 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/abs/datalake_profiler_config.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/abs/datalake_profiler_config.py
@@ -72,7 +72,7 @@ class DataLakeProfilerConfig(ConfigModel):
         description="Whether to profile for the sample values for all columns.",
     )
 
-    @pydantic.root_validator()
+    @pydantic.root_validator(skip_on_failure=True)
     def ensure_field_level_settings_are_normalized(
         cls: "DataLakeProfilerConfig", values: Dict[str, Any]
     ) -> Dict[str, Any]:
diff --git a/metadata-ingestion/src/datahub/ingestion/source/aws/tag_entities.py b/metadata-ingestion/src/datahub/ingestion/source/aws/tag_entities.py
index b122388851d2ea..e3f858ef7032d2 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/aws/tag_entities.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/aws/tag_entities.py
@@ -37,7 +37,7 @@ class LakeFormationTagPlatformResourceId(BaseModel, ExternalEntityId):
 
     tag_key: str
     tag_value: Optional[str] = None
-    platform_instance: Optional[str]
+    platform_instance: Optional[str] = None
     catalog: Optional[str] = None
     exists_in_lake_formation: bool = False
     persisted: bool = False
@@ -227,7 +227,7 @@ class LakeFormationTagPlatformResource(BaseModel, ExternalEntity):
     datahub_urns: LinkedResourceSet
     managed_by_datahub: bool
     id: LakeFormationTagPlatformResourceId
-    allowed_values: Optional[List[str]]
+    allowed_values: Optional[List[str]] = None
 
     def get_id(self) -> ExternalEntityId:
         return self.id
diff --git a/metadata-ingestion/src/datahub/ingestion/source/azure/azure_common.py b/metadata-ingestion/src/datahub/ingestion/source/azure/azure_common.py
index 46de4e09d7ee5b..546ae04e991556 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/azure/azure_common.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/azure/azure_common.py
@@ -81,7 +81,7 @@ def get_credentials(
             )
         return self.sas_token if self.sas_token is not None else self.account_key
 
-    @root_validator()
+    @root_validator(skip_on_failure=True)
     def _check_credential_values(cls, values: Dict) -> Dict:
         if (
             values.get("account_key")
diff --git a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_config.py b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_config.py
index 29d13da550a0cc..560d301445cc62 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_config.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_config.py
@@ -184,7 +184,7 @@ class BigQueryFilterConfig(SQLFilterConfig):
     # NOTE: `schema_pattern` is added here only to hide it from docs.
     schema_pattern: AllowDenyPattern = Field(
         default=AllowDenyPattern.allow_all(),
-        hidden_from_docs=True,
+        json_schema_extra={"hidden_from_docs": True},
     )
 
     @root_validator(pre=False, skip_on_failure=True)
@@ -321,7 +321,7 @@ class BigQueryV2Config(
     )
 
     number_of_datasets_process_in_batch: int = Field(
-        hidden_from_docs=True,
+        json_schema_extra={"hidden_from_docs": True},
         default=10000,
         description="Number of table queried in batch when getting metadata. This is a low level config property "
         "which should be touched with care.",
@@ -440,13 +440,13 @@ def have_table_data_read_permission(self) -> bool:
     )
 
     run_optimized_column_query: bool = Field(
-        hidden_from_docs=True,
+        json_schema_extra={"hidden_from_docs": True},
         default=False,
         description="Run optimized column query to get column information. This is an experimental feature and may not work for all cases.",
     )
 
     file_backed_cache_size: int = Field(
-        hidden_from_docs=True,
+        json_schema_extra={"hidden_from_docs": True},
         default=2000,
         description="Maximum number of entries for the in-memory caches of FileBacked data structures.",
     )
@@ -459,7 +459,7 @@ def have_table_data_read_permission(self) -> bool:
     schema_resolution_batch_size: int = Field(
         default=100,
         description="The number of tables to process in a batch when resolving schema from DataHub.",
-        hidden_from_schema=True,
+        json_schema_extra={"hidden_from_docs": True},
     )
 
     max_threads_dataset_parallelism: int = Field(
@@ -518,6 +518,6 @@ def validate_bigquery_audit_metadata_datasets(
     def get_table_pattern(self, pattern: List[str]) -> str:
         return "|".join(pattern) if pattern else ""
 
-    platform_instance_not_supported_for_bigquery = pydantic_removed_field(
+    platform_instance_not_supported_for_bigquery: classmethod = pydantic_removed_field(
         "platform_instance"
     )
diff --git a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/queries_extractor.py b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/queries_extractor.py
index 0f9471219c6590..a7bff0ee643dc9 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/queries_extractor.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/queries_extractor.py
@@ -91,7 +91,7 @@ class BigQueryQueriesExtractorConfig(BigQueryBaseConfig):
         description="Local path to store the audit log.",
         # TODO: For now, this is simply an advanced config to make local testing easier.
         # Eventually, we will want to store date-specific files in the directory and use it as a cache.
-        hidden_from_docs=True,
+        json_schema_extra={"hidden_from_docs": True},
     )
 
     user_email_pattern: AllowDenyPattern = Field(
diff --git a/metadata-ingestion/src/datahub/ingestion/source/data_lake_common/path_spec.py b/metadata-ingestion/src/datahub/ingestion/source/data_lake_common/path_spec.py
index d09bdb07449776..2a7544e1c4953c 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/data_lake_common/path_spec.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/data_lake_common/path_spec.py
@@ -62,8 +62,8 @@ class SortKey(ConfigModel):
 
     date_format: Optional[str] = Field(
         default=None,
-        type=str,
         description="The date format to use when sorting. This is used to parse the date from the key. The format should follow the java [SimpleDateFormat](https://docs.oracle.com/javase/8/docs/api/java/text/SimpleDateFormat.html) format.",
+        validate_default=True,
     )
 
     @pydantic.validator("date_format", always=True)
@@ -110,7 +110,7 @@ class Config:
 
     # This is not used yet, but will be used in the future to sort the partitions
     sort_key: Optional[SortKey] = Field(
-        hidden_from_docs=True,
+        json_schema_extra={"hidden_from_docs": True},
         default=None,
         description="Sort key to use when sorting the partitions. This is useful when the partitions are not sorted in the order of the data. The key can be a compound key based on the path_spec variables.",
     )
@@ -260,7 +260,7 @@ def get_folder_named_vars(
     ) -> Union[None, parse.Result, parse.Match]:
         return self.compiled_folder_include.parse(path)
 
-    @pydantic.root_validator()
+    @pydantic.root_validator(skip_on_failure=True)
     def validate_no_double_stars(cls, values: Dict) -> Dict:
         if "include" not in values:
             return values
@@ -476,6 +476,7 @@ def glob_include(self):
         return glob_include
 
     @pydantic.root_validator(skip_on_failure=True)
+    @classmethod
     def validate_path_spec(cls, values: Dict) -> Dict[str, Any]:
         # validate that main fields are populated
         required_fields = ["include", "file_types", "default_extension"]
diff --git a/metadata-ingestion/src/datahub/ingestion/source/datahub/config.py b/metadata-ingestion/src/datahub/ingestion/source/datahub/config.py
index d5f41cc2eb5a41..265dc5a607aad7 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/datahub/config.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/datahub/config.py
@@ -101,13 +101,13 @@ class DataHubSourceConfig(StatefulIngestionConfigBase):
     pull_from_datahub_api: bool = Field(
         default=False,
         description="Use the DataHub API to fetch versioned aspects.",
-        hidden_from_docs=True,
+        json_schema_extra={"hidden_from_docs": True},
     )
 
     max_workers: int = Field(
         default=5 * (os.cpu_count() or 4),
         description="Number of worker threads to use for datahub api ingestion.",
-        hidden_from_docs=True,
+        json_schema_extra={"hidden_from_docs": True},
     )
 
     urn_pattern: AllowDenyPattern = Field(default=AllowDenyPattern())
@@ -119,7 +119,7 @@ class DataHubSourceConfig(StatefulIngestionConfigBase):
     )
 
     structured_properties_template_cache_invalidation_interval: int = Field(
-        hidden_from_docs=True,
+        json_schema_extra={"hidden_from_docs": True},
         default=60,
         description="Interval in seconds to invalidate the structured properties template cache.",
     )
diff --git a/metadata-ingestion/src/datahub/ingestion/source/dremio/dremio_config.py b/metadata-ingestion/src/datahub/ingestion/source/dremio/dremio_config.py
index a5a35f9750d07a..5c57c59af2b99b 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/dremio/dremio_config.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/dremio/dremio_config.py
@@ -102,7 +102,7 @@ class ProfileConfig(GEProfilingBaseConfig):
     )
     include_field_median_value: bool = Field(
         default=False,
-        hidden_from_docs=True,
+        json_schema_extra={"hidden_from_docs": True},
         description="Median causes a number of issues in Dremio.",
     )
 
diff --git a/metadata-ingestion/src/datahub/ingestion/source/ge_profiling_config.py b/metadata-ingestion/src/datahub/ingestion/source/ge_profiling_config.py
index 7b9bd9dd097709..5d6b0dc56905c0 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/ge_profiling_config.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/ge_profiling_config.py
@@ -125,7 +125,7 @@ class GEProfilingConfig(GEProfilingBaseConfig):
         description="Profile table only if it has been updated since these many number of days. "
         "If set to `null`, no constraint of last modified time for tables to profile. "
         "Supported only in `snowflake` and `BigQuery`.",
-        schema_extra={"supported_sources": ["snowflake", "bigquery"]},
+        json_schema_extra={"supported_sources": ["snowflake", "bigquery"]},
     )
 
     profile_table_size_limit: Optional[int] = Field(
@@ -133,7 +133,7 @@ class GEProfilingConfig(GEProfilingBaseConfig):
         description="Profile tables only if their size is less than specified GBs. If set to `null`, "
         "no limit on the size of tables to profile. Supported only in `Snowflake`, `BigQuery` and "
         "`Databricks`. Supported for `Oracle` based on calculated size from gathered stats.",
-        schema_extra={
+        json_schema_extra={
             "supported_sources": ["snowflake", "bigquery", "unity-catalog", "oracle"]
         },
     )
@@ -143,14 +143,14 @@ class GEProfilingConfig(GEProfilingBaseConfig):
         description="Profile tables only if their row count is less than specified count. "
         "If set to `null`, no limit on the row count of tables to profile. Supported only in "
         "`Snowflake`, `BigQuery`. Supported for `Oracle` based on gathered stats.",
-        schema_extra={"supported_sources": ["snowflake", "bigquery", "oracle"]},
+        json_schema_extra={"supported_sources": ["snowflake", "bigquery", "oracle"]},
     )
 
     profile_table_row_count_estimate_only: bool = Field(
         default=False,
         description="Use an approximate query for row count. This will be much faster but slightly "
         "less accurate. Only supported for Postgres and MySQL. ",
-        schema_extra={"supported_sources": ["postgres", "mysql"]},
+        json_schema_extra={"supported_sources": ["postgres", "mysql"]},
     )
 
     # The query combiner enables us to combine multiple queries into a single query,
@@ -167,32 +167,32 @@ class GEProfilingConfig(GEProfilingBaseConfig):
         default=True,
         description="Whether to profile partitioned tables. Only BigQuery and Aws Athena supports this. "
         "If enabled, latest partition data is used for profiling.",
-        schema_extra={"supported_sources": ["athena", "bigquery"]},
+        json_schema_extra={"supported_sources": ["athena", "bigquery"]},
     )
     partition_datetime: Optional[datetime.datetime] = Field(
         default=None,
         description="If specified, profile only the partition which matches this datetime. "
         "If not specified, profile the latest partition. Only Bigquery supports this.",
-        schema_extra={"supported_sources": ["bigquery"]},
+        json_schema_extra={"supported_sources": ["bigquery"]},
     )
     use_sampling: bool = Field(
         default=True,
         description="Whether to profile column level stats on sample of table. Only BigQuery and Snowflake support this. "
         "If enabled, profiling is done on rows sampled from table. Sampling is not done for smaller tables. ",
-        schema_extra={"supported_sources": ["bigquery", "snowflake"]},
+        json_schema_extra={"supported_sources": ["bigquery", "snowflake"]},
     )
 
     sample_size: int = Field(
         default=10000,
         description="Number of rows to be sampled from table for column level profiling."
         "Applicable only if `use_sampling` is set to True.",
-        schema_extra={"supported_sources": ["bigquery", "snowflake"]},
+        json_schema_extra={"supported_sources": ["bigquery", "snowflake"]},
     )
 
     profile_external_tables: bool = Field(
         default=False,
         description="Whether to profile external tables. Only Snowflake and Redshift supports this.",
-        schema_extra={"supported_sources": ["redshift", "snowflake"]},
+        json_schema_extra={"supported_sources": ["redshift", "snowflake"]},
     )
 
     tags_to_ignore_sampling: Optional[List[str]] = pydantic.Field(
diff --git a/metadata-ingestion/src/datahub/ingestion/source/hex/hex.py b/metadata-ingestion/src/datahub/ingestion/source/hex/hex.py
index 808a1af767ed05..b02ce3f089ce18 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/hex/hex.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/hex/hex.py
@@ -68,7 +68,7 @@ class HexSourceConfig(
     )
     include_components: bool = Field(
         default=True,
-        desciption="Include Hex Components in the ingestion",
+        description="Include Hex Components in the ingestion",
     )
     page_size: int = Field(
         default=HEX_API_PAGE_SIZE_DEFAULT,
diff --git a/metadata-ingestion/src/datahub/ingestion/source/iceberg/iceberg.py b/metadata-ingestion/src/datahub/ingestion/source/iceberg/iceberg.py
index c97840950af2aa..d84b5fa7f6875c 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/iceberg/iceberg.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/iceberg/iceberg.py
@@ -524,11 +524,11 @@ def _get_dataset_properties_aspect(
         custom_properties["format-version"] = str(table.metadata.format_version)
         custom_properties["partition-spec"] = str(self._get_partition_aspect(table))
         last_modified: Optional[int] = table.metadata.last_updated_ms
-        if table.current_snapshot():
-            custom_properties["snapshot-id"] = str(table.current_snapshot().snapshot_id)
-            custom_properties["manifest-list"] = table.current_snapshot().manifest_list
+        if current_snapshot := table.current_snapshot():
+            custom_properties["snapshot-id"] = str(current_snapshot.snapshot_id)
+            custom_properties["manifest-list"] = current_snapshot.manifest_list
             if not last_modified:
-                last_modified = int(table.current_snapshot().timestamp_ms)
+                last_modified = int(current_snapshot.timestamp_ms)
         if "created-at" in custom_properties:
             try:
                 dt = dateutil_parser.isoparse(custom_properties["created-at"])
diff --git a/metadata-ingestion/src/datahub/ingestion/source/looker/looker_config.py b/metadata-ingestion/src/datahub/ingestion/source/looker/looker_config.py
index 0f8d86a2cbd295..b3168b2e4adae5 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/looker/looker_config.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/looker/looker_config.py
@@ -6,6 +6,7 @@
 import pydantic
 from looker_sdk.sdk.api40.models import DBConnection
 from pydantic import Field, validator
+from pydantic_core import core_schema
 
 from datahub.configuration import ConfigModel
 from datahub.configuration.common import AllowDenyPattern, ConfigurationError
@@ -31,23 +32,29 @@ class NamingPattern(ConfigModel):
     pattern: str
 
     @classmethod
-    def __get_validators__(cls):
-        yield cls.pydantic_accept_raw_pattern
-        yield cls.validate
-        yield cls.pydantic_validate_pattern
-
-    @classmethod
-    def pydantic_accept_raw_pattern(cls, v):
-        if isinstance(v, (NamingPattern, dict)):
-            return v
-        assert isinstance(v, str), "pattern must be a string"
-        return {"pattern": v}
-
-    @classmethod
-    def pydantic_validate_pattern(cls, v):
-        assert isinstance(v, NamingPattern)
-        assert v.validate_pattern(cls.REQUIRE_AT_LEAST_ONE_VAR)
-        return v
+    def __get_pydantic_core_schema__(
+        cls, source_type: Any, handler: Any
+    ) -> core_schema.CoreSchema:
+        """Pydantic v2 core schema for NamingPattern validation."""
+
+        def validate_string_input(value: Any) -> Union[NamingPattern, dict]:
+            if isinstance(value, (cls, dict)):
+                return value
+            if isinstance(value, str):
+                return {"pattern": value}
+            raise ValueError("pattern must be a string")
+
+        def validate_pattern_rules(instance: "NamingPattern") -> "NamingPattern":
+            instance.validate_pattern(cls.REQUIRE_AT_LEAST_ONE_VAR)
+            return instance
+
+        return core_schema.chain_schema(
+            [
+                core_schema.no_info_plain_validator_function(validate_string_input),
+                handler(cls),
+                core_schema.no_info_plain_validator_function(validate_pattern_rules),
+            ]
+        )
 
     @classmethod
     def allowed_docstring(cls) -> str:
@@ -136,7 +143,7 @@ class LookerCommonConfig(EnvConfigMixin, PlatformInstanceConfigMixin):
         # TODO: This shouldn't be part of the config.
         "looker",
         description="Default platform name.",
-        hidden_from_docs=True,
+        json_schema_extra={"hidden_from_docs": True},
     )
     extract_column_level_lineage: bool = Field(
         True,
diff --git a/metadata-ingestion/src/datahub/ingestion/source/mode.py b/metadata-ingestion/src/datahub/ingestion/source/mode.py
index a2f012c1fa0a65..7411c3c2c4a867 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/mode.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/mode.py
@@ -203,7 +203,7 @@ class ModeConfig(
     items_per_page: int = Field(
         default=DEFAULT_API_ITEMS_PER_PAGE,
         description="Number of items per page for paginated API requests.",
-        hidden_from_docs=True,
+        json_schema_extra={"hidden_from_docs": True},
     )
 
     @validator("connect_uri")
diff --git a/metadata-ingestion/src/datahub/ingestion/source/neo4j/neo4j_source.py b/metadata-ingestion/src/datahub/ingestion/source/neo4j/neo4j_source.py
index 64f6b130e0fe79..59b51fed26996f 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/neo4j/neo4j_source.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/neo4j/neo4j_source.py
@@ -57,7 +57,6 @@
 )
 
 log = logging.getLogger(__name__)
-logging.basicConfig(level=logging.INFO)
 
 _type_mapping: Dict[Union[Type, str], Type] = {
     "list": UnionTypeClass,
diff --git a/metadata-ingestion/src/datahub/ingestion/source/powerbi/config.py b/metadata-ingestion/src/datahub/ingestion/source/powerbi/config.py
index 3805d744ccb526..d318c0bfcf8e16 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/powerbi/config.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/powerbi/config.py
@@ -292,12 +292,12 @@ class PowerBiDashboardSourceConfig(
     StatefulIngestionConfigBase, DatasetSourceConfigMixin, IncrementalLineageConfigMixin
 ):
     platform_name: str = pydantic.Field(
-        default=Constant.PLATFORM_NAME, hidden_from_docs=True
+        default=Constant.PLATFORM_NAME, json_schema_extra={"hidden_from_docs": True}
     )
 
     platform_urn: str = pydantic.Field(
         default=builder.make_data_platform_urn(platform=Constant.PLATFORM_NAME),
-        hidden_from_docs=True,
+        json_schema_extra={"hidden_from_docs": True},
     )
 
     # Organization Identifier
@@ -306,7 +306,7 @@ class PowerBiDashboardSourceConfig(
     workspace_id: Optional[str] = pydantic.Field(
         default=None,
         description="[deprecated] Use workspace_id_pattern instead",
-        hidden_from_docs=True,
+        json_schema_extra={"hidden_from_docs": True},
     )
     # PowerBi workspace identifier
     workspace_id_pattern: AllowDenyPattern = pydantic.Field(
@@ -333,7 +333,7 @@ class PowerBiDashboardSourceConfig(
             "DataHub supported datasources."
             "You can configured platform instance for dataset lineage. "
             "See Quickstart Recipe for mapping",
-            hidden_from_docs=True,
+            json_schema_extra={"hidden_from_docs": True},
         )
     )
     # PowerBI datasource's server to platform instance mapping
@@ -531,7 +531,7 @@ class PowerBiDashboardSourceConfig(
     metadata_api_timeout: int = pydantic.Field(
         default=30,
         description="timeout in seconds for Metadata Rest Api.",
-        hidden_from_docs=True,
+        json_schema_extra={"hidden_from_docs": True},
     )
 
     @root_validator(skip_on_failure=True)
diff --git a/metadata-ingestion/src/datahub/ingestion/source/powerbi_report_server/report_server_domain.py b/metadata-ingestion/src/datahub/ingestion/source/powerbi_report_server/report_server_domain.py
index b30f7317ca2371..f7260a194f8360 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/powerbi_report_server/report_server_domain.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/powerbi_report_server/report_server_domain.py
@@ -27,10 +27,8 @@ class CatalogItem(BaseModel):
     is_favorite: bool = Field(alias="IsFavorite")
     user_info: Any = Field(None, alias="UserInfo")
     display_name: Optional[str] = Field(None, alias="DisplayName")
-    has_data_sources: bool = Field(default=False, alias="HasDataSources")
-    data_sources: Optional[List["DataSource"]] = Field(
-        default_factory=list, alias="DataSources"
-    )
+    has_data_sources: bool = Field(False, alias="HasDataSources")
+    data_sources: Optional[List["DataSource"]] = Field(None, alias="DataSources")
 
     @validator("display_name", always=True)
     def validate_diplay_name(cls, value, values):
diff --git a/metadata-ingestion/src/datahub/ingestion/source/redshift/config.py b/metadata-ingestion/src/datahub/ingestion/source/redshift/config.py
index 80b834edb3940d..dcb816bbd8b7e8 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/redshift/config.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/redshift/config.py
@@ -97,7 +97,7 @@ class RedshiftConfig(
     scheme: str = Field(
         default="redshift+redshift_connector",
         description="",
-        hidden_from_docs=True,
+        json_schema_extra={"hidden_from_docs": True},
     )
 
     _database_alias_removed = pydantic_removed_field("database_alias")
diff --git a/metadata-ingestion/src/datahub/ingestion/source/redshift/datashares.py b/metadata-ingestion/src/datahub/ingestion/source/redshift/datashares.py
index a4e7d509fda5e4..652f3b8a21bd02 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/redshift/datashares.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/redshift/datashares.py
@@ -26,7 +26,7 @@
 
 class OutboundSharePlatformResource(BaseModel):
     namespace: str
-    platform_instance: Optional[str]
+    platform_instance: Optional[str] = None
     env: str
     source_database: str
     share_name: str
diff --git a/metadata-ingestion/src/datahub/ingestion/source/slack/slack.py b/metadata-ingestion/src/datahub/ingestion/source/slack/slack.py
index c0798aec350850..1cd65df7bd09c8 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/slack/slack.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/slack/slack.py
@@ -203,38 +203,31 @@ class SlackSourceConfig(
         description="Bot token for the Slack workspace. Needs `users:read`, `users:read.email`, `users.profile:read`, and `team:read` scopes.",
     )
     enrich_user_metadata: bool = Field(
-        type=bool,
-        default=True,
+        True,
         description="When enabled, will enrich provisioned DataHub users' metadata with information from Slack.",
     )
     ingest_users: bool = Field(
-        type=bool,
-        default=True,
+        True,
         description="Whether to ingest users. When set to true, will ingest all users in the Slack workspace (as platform resources) to simplify user enrichment after they are provisioned on DataHub.",
     )
     api_requests_per_min: int = Field(
-        type=int,
-        default=10,
+        10,
         description="Number of API requests per minute. Low-level config. Do not tweak unless you are facing any issues.",
     )
     ingest_public_channels: bool = Field(
-        type=bool,
-        default=False,
+        False,
         description="Whether to ingest public channels. If set to true needs `channels:read` scope.",
     )
     channels_iteration_limit: int = Field(
-        type=int,
-        default=200,
+        200,
         description="Limit the number of channels to be ingested in a iteration. Low-level config. Do not tweak unless you are facing any issues.",
     )
     channel_min_members: int = Field(
-        type=int,
-        default=2,
+        2,
         description="Ingest channels with at least this many members.",
     )
     should_ingest_archived_channels: bool = Field(
-        type=bool,
-        default=False,
+        False,
         description="Whether to ingest archived channels.",
     )
 
diff --git a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_config.py b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_config.py
index 195f8e3eefd3fc..927dfd7af4abf3 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_config.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_config.py
@@ -262,7 +262,7 @@ class SnowflakeV2Config(
     )
 
     structured_properties_template_cache_invalidation_interval: int = Field(
-        hidden_from_docs=True,
+        json_schema_extra={"hidden_from_docs": True},
         default=60,
         description="Interval in seconds to invalidate the structured properties template cache.",
     )
@@ -332,7 +332,7 @@ class SnowflakeV2Config(
 
     # Allows empty containers to be ingested before datasets are added, avoiding permission errors
     warn_no_datasets: bool = Field(
-        hidden_from_docs=True,
+        json_schema_extra={"hidden_from_docs": True},
         default=False,
         description="If True, warns when no datasets are found during ingestion. If False, ingestion fails when no datasets are found.",
     )
diff --git a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_lineage_v2.py b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_lineage_v2.py
index 63077f7f1a35ca..af29d9e01f7301 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_lineage_v2.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_lineage_v2.py
@@ -72,7 +72,7 @@ class ColumnUpstreamJob(BaseModel):
 
 
 class ColumnUpstreamLineage(BaseModel):
-    column_name: Optional[str]
+    column_name: Optional[str] = None
     upstreams: List[ColumnUpstreamJob] = Field(default_factory=list)
 
 
@@ -91,9 +91,9 @@ class Query(BaseModel):
 class UpstreamLineageEdge(BaseModel):
     DOWNSTREAM_TABLE_NAME: str
     DOWNSTREAM_TABLE_DOMAIN: str
-    UPSTREAM_TABLES: Optional[List[UpstreamTableNode]]
-    UPSTREAM_COLUMNS: Optional[List[ColumnUpstreamLineage]]
-    QUERIES: Optional[List[Query]]
+    UPSTREAM_TABLES: Optional[List[UpstreamTableNode]] = None
+    UPSTREAM_COLUMNS: Optional[List[ColumnUpstreamLineage]] = None
+    QUERIES: Optional[List[Query]] = None
 
     _json_upstream_tables = pydantic_parse_json("UPSTREAM_TABLES")
     _json_upstream_columns = pydantic_parse_json("UPSTREAM_COLUMNS")
diff --git a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_queries.py b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_queries.py
index 3075b48dc393d9..bec3f83c1dc2e7 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_queries.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_queries.py
@@ -109,7 +109,7 @@ class SnowflakeQueriesExtractorConfig(ConfigModel):
         description="Local path to store the audit log.",
         # TODO: For now, this is simply an advanced config to make local testing easier.
         # Eventually, we will want to store date-specific files in the directory and use it as a cache.
-        hidden_from_docs=True,
+        json_schema_extra={"hidden_from_docs": True},
     )
 
     include_lineage: bool = True
diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql/clickhouse.py b/metadata-ingestion/src/datahub/ingestion/source/sql/clickhouse.py
index 2e1c7d0ed6ee56..17a1b441ff1c46 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/sql/clickhouse.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/sql/clickhouse.py
@@ -127,7 +127,11 @@ class ClickHouseConfig(
 ):
     # defaults
     host_port: str = Field(default="localhost:8123", description="ClickHouse host URL.")
-    scheme: str = Field(default="clickhouse", description="", hidden_from_docs=True)
+    scheme: str = Field(
+        default="clickhouse",
+        description="",
+        json_schema_extra={"hidden_from_docs": True},
+    )
     password: pydantic.SecretStr = Field(
         default=pydantic.SecretStr(""), description="password"
     )
diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql/hive.py b/metadata-ingestion/src/datahub/ingestion/source/sql/hive.py
index 562e9192e0a28a..895c5a30073c21 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/sql/hive.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/sql/hive.py
@@ -651,10 +651,12 @@ def get_view_definition_patched(self, connection, view_name, schema=None, **kw):
 
 class HiveConfig(TwoTierSQLAlchemyConfig):
     # defaults
-    scheme: str = Field(default="hive", hidden_from_docs=True)
+    scheme: str = Field(default="hive", json_schema_extra={"hidden_from_docs": True})
 
     # Overriding as table location lineage is richer implementation here than with include_table_location_lineage
-    include_table_location_lineage: bool = Field(default=False, hidden_from_docs=True)
+    include_table_location_lineage: bool = Field(
+        default=False, json_schema_extra={"hidden_from_docs": True}
+    )
 
     emit_storage_lineage: bool = Field(
         default=False,
diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql/hive_metastore.py b/metadata-ingestion/src/datahub/ingestion/source/sql/hive_metastore.py
index 6d5977fbb07594..fab011ed48a789 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/sql/hive_metastore.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/sql/hive_metastore.py
@@ -99,7 +99,11 @@ class HiveMetastore(BasicSQLAlchemyConfig):
         default="localhost:3306",
         description="Host URL and port to connect to. Example: localhost:3306",
     )
-    scheme: str = Field(default="mysql+pymysql", description="", hidden_from_docs=True)
+    scheme: str = Field(
+        default="mysql+pymysql",
+        description="",
+        json_schema_extra={"hidden_from_docs": True},
+    )
 
     database_pattern: AllowDenyPattern = Field(
         default=AllowDenyPattern.allow_all(),
@@ -124,7 +128,7 @@ class HiveMetastore(BasicSQLAlchemyConfig):
     )
 
     include_view_lineage: bool = Field(
-        default=False, description="", hidden_from_docs=True
+        default=False, description="", json_schema_extra={"hidden_from_docs": True}
     )
 
     include_catalog_name_in_ids: bool = Field(
diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql/mssql/source.py b/metadata-ingestion/src/datahub/ingestion/source/sql/mssql/source.py
index d0c9d724056a69..bf8ba5332aedc0 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/sql/mssql/source.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/sql/mssql/source.py
@@ -75,7 +75,11 @@
 class SQLServerConfig(BasicSQLAlchemyConfig):
     # defaults
     host_port: str = Field(default="localhost:1433", description="MSSQL host URL.")
-    scheme: str = Field(default="mssql+pytds", description="", hidden_from_docs=True)
+    scheme: str = Field(
+        default="mssql+pytds",
+        description="",
+        json_schema_extra={"hidden_from_docs": True},
+    )
 
     # TODO: rename to include_procedures ?
     include_stored_procedures: bool = Field(
diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql/presto.py b/metadata-ingestion/src/datahub/ingestion/source/sql/presto.py
index eb5292fda125d2..4adff61c10660d 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/sql/presto.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/sql/presto.py
@@ -87,7 +87,9 @@ def _get_full_table(  # type: ignore
 
 class PrestoConfig(TrinoConfig):
     # defaults
-    scheme: str = Field(default="presto", description="", hidden_from_docs=True)
+    scheme: str = Field(
+        default="presto", description="", json_schema_extra={"hidden_from_docs": True}
+    )
 
 
 @platform_name("Presto", doc_order=1)
diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql/trino.py b/metadata-ingestion/src/datahub/ingestion/source/sql/trino.py
index d7ea886a86b388..01ba24c3ab536c 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/sql/trino.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/sql/trino.py
@@ -221,7 +221,9 @@ class ConnectorDetail(PlatformInstanceConfigMixin, EnvConfigMixin):
 
 class TrinoConfig(BasicSQLAlchemyConfig):
     # defaults
-    scheme: str = Field(default="trino", description="", hidden_from_docs=True)
+    scheme: str = Field(
+        default="trino", description="", json_schema_extra={"hidden_from_docs": True}
+    )
     database: str = Field(description="database (catalog)")
 
     catalog_to_connector_details: Dict[str, ConnectorDetail] = Field(
diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql/two_tier_sql_source.py b/metadata-ingestion/src/datahub/ingestion/source/sql/two_tier_sql_source.py
index fa29e33dc421ab..6270145486477f 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/sql/two_tier_sql_source.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/sql/two_tier_sql_source.py
@@ -31,7 +31,7 @@ class TwoTierSQLAlchemyConfig(BasicSQLAlchemyConfig):
         # The superclass contains a `schema_pattern` field, so we need this here
         # to override the documentation.
         default=AllowDenyPattern.allow_all(),
-        hidden_from_docs=True,
+        json_schema_extra={"hidden_from_docs": True},
         description="Deprecated in favour of database_pattern.",
     )
 
diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql_queries.py b/metadata-ingestion/src/datahub/ingestion/source/sql_queries.py
index f3e8e774e4388f..66ae5f78c15448 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/sql_queries.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/sql_queries.py
@@ -56,7 +56,7 @@ class SqlQueriesSourceConfig(PlatformInstanceConfigMixin, EnvConfigMixin):
     use_schema_resolver: bool = Field(
         description="Read SchemaMetadata aspects from DataHub to aid in SQL parsing. Turn off only for testing.",
         default=True,
-        hidden_from_docs=True,
+        json_schema_extra={"hidden_from_docs": True},
     )
     default_db: Optional[str] = Field(
         description="The default database to use for unqualified table names",
diff --git a/metadata-ingestion/src/datahub/ingestion/source/state/stateful_ingestion_base.py b/metadata-ingestion/src/datahub/ingestion/source/state/stateful_ingestion_base.py
index 994854836ae379..c261dea7236710 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/state/stateful_ingestion_base.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/state/stateful_ingestion_base.py
@@ -11,7 +11,6 @@
     ConfigurationError,
     DynamicTypedConfig,
 )
-from datahub.configuration.pydantic_migration_helpers import GenericModel
 from datahub.configuration.time_window_config import BaseTimeWindowConfig
 from datahub.configuration.validate_field_rename import pydantic_renamed_field
 from datahub.ingestion.api.common import PipelineContext
@@ -58,22 +57,22 @@ class StatefulIngestionConfig(ConfigModel):
     max_checkpoint_state_size: pydantic.PositiveInt = Field(
         default=2**24,  # 16 MB
         description="The maximum size of the checkpoint state in bytes. Default is 16MB",
-        hidden_from_docs=True,
+        json_schema_extra={"hidden_from_docs": True},
     )
     state_provider: Optional[DynamicTypedStateProviderConfig] = Field(
         default=None,
         description="The ingestion state provider configuration.",
-        hidden_from_docs=True,
+        json_schema_extra={"hidden_from_docs": True},
     )
     ignore_old_state: bool = Field(
         default=False,
         description="If set to True, ignores the previous checkpoint state.",
-        hidden_from_docs=True,
+        json_schema_extra={"hidden_from_docs": True},
     )
     ignore_new_state: bool = Field(
         default=False,
         description="If set to True, ignores the current checkpoint state.",
-        hidden_from_docs=True,
+        json_schema_extra={"hidden_from_docs": True},
     )
 
     @pydantic.root_validator(skip_on_failure=True)
@@ -89,7 +88,7 @@ def validate_config(cls, values: Dict[str, Any]) -> Dict[str, Any]:
 CustomConfig = TypeVar("CustomConfig", bound=StatefulIngestionConfig)
 
 
-class StatefulIngestionConfigBase(GenericModel, Generic[CustomConfig]):
+class StatefulIngestionConfigBase(ConfigModel, Generic[CustomConfig]):
     """
     Base configuration class for stateful ingestion for source configs to inherit from.
     """
diff --git a/metadata-ingestion/src/datahub/ingestion/source/tableau/tableau.py b/metadata-ingestion/src/datahub/ingestion/source/tableau/tableau.py
index 232cf0613beaae..0856635b60daa4 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/tableau/tableau.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/tableau/tableau.py
@@ -659,7 +659,7 @@ def projects_backward_compatibility(cls, values: Dict) -> Dict:
 
         return values
 
-    @root_validator()
+    @root_validator(skip_on_failure=True)
     def validate_config_values(cls, values: Dict) -> Dict:
         tags_for_hidden_assets = values.get("tags_for_hidden_assets")
         ingest_tags = values.get("ingest_tags")
diff --git a/metadata-ingestion/src/datahub/ingestion/source/unity/config.py b/metadata-ingestion/src/datahub/ingestion/source/unity/config.py
index 4e797436262cfd..5fcc6f8cb1ddd4 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/unity/config.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/unity/config.py
@@ -251,7 +251,7 @@ class UnityCatalogSourceConfig(
     lineage_max_workers: int = pydantic.Field(
         default=5 * (os.cpu_count() or 4),
         description="Number of worker threads to use for column lineage thread pool executor. Set to 1 to disable.",
-        hidden_from_docs=True,
+        json_schema_extra={"hidden_from_docs": True},
     )
 
     include_usage_statistics: bool = Field(
diff --git a/metadata-ingestion/src/datahub/ingestion/source/unity/tag_entities.py b/metadata-ingestion/src/datahub/ingestion/source/unity/tag_entities.py
index 992ba5a6d8110a..c01a01d5d997a7 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/unity/tag_entities.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/unity/tag_entities.py
@@ -36,7 +36,7 @@ class UnityCatalogTagPlatformResourceId(BaseModel, ExternalEntityId):
 
     tag_key: str
     tag_value: Optional[str] = None
-    platform_instance: Optional[str]
+    platform_instance: Optional[str] = None
     exists_in_unity_catalog: bool = False
     persisted: bool = False
 
@@ -218,7 +218,7 @@ class UnityCatalogTagPlatformResource(BaseModel, ExternalEntity):
     datahub_urns: LinkedResourceSet
     managed_by_datahub: bool
     id: UnityCatalogTagPlatformResourceId
-    allowed_values: Optional[List[str]]
+    allowed_values: Optional[List[str]] = None
 
     def get_id(self) -> ExternalEntityId:
         return self.id
diff --git a/metadata-ingestion/src/datahub/ingestion/source/usage/starburst_trino_usage.py b/metadata-ingestion/src/datahub/ingestion/source/usage/starburst_trino_usage.py
index 81a88f84179bfd..53139e1590b926 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/usage/starburst_trino_usage.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/usage/starburst_trino_usage.py
@@ -60,7 +60,7 @@
 
 class TrinoConnectorInfo(BaseModel):
     partitionIds: List[str]
-    truncated: Optional[bool]
+    truncated: Optional[bool] = None
 
 
 class TrinoAccessedMetadata(BaseModel):
@@ -80,7 +80,7 @@ class TrinoJoinedAccessEvent(BaseModel):
     table: Optional[str] = None
     accessed_metadata: List[TrinoAccessedMetadata]
     starttime: datetime = Field(alias="create_time")
-    endtime: Optional[datetime] = Field(alias="end_time")
+    endtime: Optional[datetime] = Field(None, alias="end_time")
 
 
 class EnvBasedSourceBaseConfig:
diff --git a/metadata-ingestion/src/datahub/ingestion/source/usage/usage_common.py b/metadata-ingestion/src/datahub/ingestion/source/usage/usage_common.py
index 02ddc1735c0d11..6293f3a7b0e55e 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/usage/usage_common.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/usage/usage_common.py
@@ -200,7 +200,9 @@ class BaseUsageConfig(BaseTimeWindowConfig):
             "Total character limit for all queries in a single usage aspect."
             " Queries will be truncated to length `queries_character_limit / top_n_queries`."
         ),
-        hidden_from_docs=True,  # Don't want to encourage people to break elasticsearch
+        json_schema_extra={
+            "hidden_from_docs": True
+        },  # Don't want to encourage people to break elasticsearch
     )
 
     top_n_queries: pydantic.PositiveInt = Field(
diff --git a/metadata-ingestion/src/datahub/pydantic/__init__.py b/metadata-ingestion/src/datahub/pydantic/__init__.py
deleted file mode 100644
index e69de29bb2d1d6..00000000000000
diff --git a/metadata-ingestion/src/datahub/pydantic/compat.py b/metadata-ingestion/src/datahub/pydantic/compat.py
deleted file mode 100644
index f6c1208ae85afd..00000000000000
--- a/metadata-ingestion/src/datahub/pydantic/compat.py
+++ /dev/null
@@ -1,58 +0,0 @@
-import functools
-from typing import Any, Callable, Optional, TypeVar, cast
-
-# Define a type variable for the decorator
-F = TypeVar("F", bound=Callable[..., Any])
-
-
-# Check which Pydantic version is installed
-def get_pydantic_version() -> int:
-    """Determine if Pydantic v1 or v2 is installed."""
-    try:
-        import pydantic
-
-        version = pydantic.__version__
-        return 1 if version.startswith("1.") else 2
-    except (ImportError, AttributeError):
-        # Default to v1 if we can't determine version
-        return 1
-
-
-PYDANTIC_VERSION = get_pydantic_version()
-
-
-# Create compatibility layer for dict-like methods
-def compat_dict_method(v1_method: Optional[Callable] = None) -> Callable:
-    """
-    Decorator to make a dict method work with both Pydantic v1 and v2.
-
-    In v1: Uses the decorated method (typically dict)
-    In v2: Redirects to model_dump with appropriate parameter mapping
-    """
-
-    def decorator(func: F) -> F:
-        @functools.wraps(func)
-        def wrapper(self, *args, **kwargs):
-            if PYDANTIC_VERSION >= 2:
-                # Map v1 parameters to v2 parameters
-                # exclude -> exclude
-                # exclude_unset -> exclude_unset
-                # exclude_defaults -> exclude_defaults
-                # exclude_none -> exclude_none
-                # by_alias -> by_alias
-                model_dump_kwargs = kwargs.copy()
-
-                # Handle the 'exclude' parameter differently between versions
-                exclude = kwargs.get("exclude", set())
-                if isinstance(exclude, (set, dict)):
-                    model_dump_kwargs["exclude"] = exclude
-
-                return self.model_dump(**model_dump_kwargs)
-            return func(self, *args, **kwargs)
-
-        return cast(F, wrapper)
-
-    # Allow use as both @compat_dict_method and @compat_dict_method()
-    if v1_method is None:
-        return decorator
-    return decorator(v1_method)
diff --git a/metadata-ingestion/src/datahub/sdk/search_filters.py b/metadata-ingestion/src/datahub/sdk/search_filters.py
index d395e7b0090142..9a67c2962b115a 100644
--- a/metadata-ingestion/src/datahub/sdk/search_filters.py
+++ b/metadata-ingestion/src/datahub/sdk/search_filters.py
@@ -15,7 +15,6 @@
 import pydantic
 
 from datahub.configuration.common import ConfigModel
-from datahub.configuration.pydantic_migration_helpers import PYDANTIC_VERSION_2
 from datahub.ingestion.graph.client import flexible_entity_type_to_graphql
 from datahub.ingestion.graph.filters import (
     FilterOperator,
@@ -34,12 +33,7 @@
 
 class _BaseFilter(ConfigModel):
     class Config:
-        # We can't wrap this in a TYPE_CHECKING block because the pydantic plugin
-        # doesn't recognize it properly. So unfortunately we'll need to live
-        # with the deprecation warning w/ pydantic v2.
-        allow_population_by_field_name = True
-        if PYDANTIC_VERSION_2:
-            populate_by_name = True
+        populate_by_name = True
 
     @abc.abstractmethod
     def compile(self) -> _OrFilters:
@@ -319,21 +313,13 @@ def dfs(self) -> Iterator[_BaseFilter]:
 
 
 # Required to resolve forward references to "Filter"
-if PYDANTIC_VERSION_2:
-    _And.model_rebuild()  # type: ignore
-    _Or.model_rebuild()  # type: ignore
-    _Not.model_rebuild()  # type: ignore
-else:
-    _And.update_forward_refs()
-    _Or.update_forward_refs()
-    _Not.update_forward_refs()
+_And.model_rebuild()
+_Or.model_rebuild()
+_Not.model_rebuild()
 
 
 def load_filters(obj: Any) -> Filter:
-    if PYDANTIC_VERSION_2:
-        return pydantic.TypeAdapter(Filter).validate_python(obj)  # type: ignore
-    else:
-        return pydantic.parse_obj_as(Filter, obj)  # type: ignore
+    return pydantic.TypeAdapter(Filter).validate_python(obj)
 
 
 # We need FilterDsl for two reasons:
diff --git a/metadata-ingestion/src/datahub/sql_parsing/_models.py b/metadata-ingestion/src/datahub/sql_parsing/_models.py
index d586e7d6d9045b..97da2880476bdb 100644
--- a/metadata-ingestion/src/datahub/sql_parsing/_models.py
+++ b/metadata-ingestion/src/datahub/sql_parsing/_models.py
@@ -4,7 +4,6 @@
 import sqlglot
 from pydantic import BaseModel
 
-from datahub.configuration.pydantic_migration_helpers import PYDANTIC_VERSION_2
 from datahub.metadata.schema_classes import SchemaFieldDataTypeClass
 
 
@@ -16,10 +15,8 @@ class _ParserBaseModel(
     },
 ):
     def json(self, *args: Any, **kwargs: Any) -> str:
-        if PYDANTIC_VERSION_2:
-            return super().model_dump_json(*args, **kwargs)  # type: ignore
-        else:
-            return super().json(*args, **kwargs)
+        # TODO: Remove this method and migrate to model_dump_json.
+        return super().model_dump_json(*args, **kwargs)
 
 
 @functools.total_ordering
diff --git a/metadata-ingestion/src/datahub/utilities/lossy_collections.py b/metadata-ingestion/src/datahub/utilities/lossy_collections.py
index 31d6d0eb842d04..66051d23535447 100644
--- a/metadata-ingestion/src/datahub/utilities/lossy_collections.py
+++ b/metadata-ingestion/src/datahub/utilities/lossy_collections.py
@@ -1,7 +1,8 @@
 import random
-from typing import Dict, Generic, Iterable, Iterator, List, Set, TypeVar, Union
+from typing import Any, Dict, Generic, Iterable, Iterator, List, Set, TypeVar, Union
 
-from datahub.configuration.pydantic_migration_helpers import PYDANTIC_VERSION_2
+from pydantic import GetCoreSchemaHandler
+from pydantic_core import CoreSchema, core_schema
 
 T = TypeVar("T")
 _KT = TypeVar("_KT")
@@ -47,15 +48,11 @@ def __repr__(self) -> str:
     def __str__(self) -> str:
         return repr(self)
 
-    if PYDANTIC_VERSION_2:
-        # With pydantic 2, it doesn't recognize that this is a list subclass,
-        # so we need to make it explicit.
-
-        @classmethod
-        def __get_pydantic_core_schema__(cls, source_type, handler):  # type: ignore
-            from pydantic_core import core_schema
-
-            return core_schema.no_info_after_validator_function(cls, handler(list))
+    @classmethod
+    def __get_pydantic_core_schema__(
+        cls, source_type: Any, handler: GetCoreSchemaHandler
+    ) -> CoreSchema:
+        return core_schema.no_info_after_validator_function(cls, handler(list))
 
     def as_obj(self) -> List[Union[T, str]]:
         from datahub.ingestion.api.report import Report