Merge branch 'py39-typing-compat' into v2_main

thoughtspot · Jan 8, 2025 · 06f054a · 06f054a
2 parents 0a8af6a + 8bc1697
commit 06f054a
Show file tree

Hide file tree

Showing 15 changed files with 149 additions and 128 deletions.
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -17,6 +17,8 @@ repos:
       - id: pyupgrade
         # SEE pyproject.toml FOR py{min-support}-plus.
         args: [--py39-plus, --keep-runtime-typing]
+        # THIS FILE NEEDS TO STAY python27 COMPATIBLE "ABOVE" __main__ AND py{min-support} "BELOW" __main__.
+        exclude: ^examples/
 
   - repo: https://github.com/charliermarsh/ruff-pre-commit
     rev: v0.8.6

diff --git a/_generate/__main__.py b/_generate/__main__.py
@@ -73,8 +73,10 @@ def _clean_edoc_proto() -> None:
         # REMOVE THE IMPORT STATEMENT SINCE WE ARE LOCALIZING OR STRIPPING THE PROTO
         text = re.sub(rf'^import "{preprocessor.import_name}";$', _const.VOID, text, flags=re.MULTILINE)
 
-        # STRIP OFF THE PACKAGE IDENTITY (and optional path separator)
-        text = re.sub(rf"(?<=\s){preprocessor.package}\.?", preprocessor.replace, text, flags=re.MULTILINE | re.DOTALL)
+        # STRIP OFF THE PACKAGE IDENTITY (and not following by an underscore, with an optional path separator)
+        # fmt: off
+        text = re.sub(rf"(?<=\s){preprocessor.package}(?!_)\.?", preprocessor.replace, text, flags=re.MULTILINE | re.DOTALL)  # noqa: E501
+        # fmt: on
 
         # DIVIDE THE edoc.proto INTO 3 PARTS, INJECT THE LOCAL PROTO, STICK IT BACK TOGETHER
         imports, package_info, edoc_contents = text.partition(SCRIPTABILITY_PACKAGE_INFO)

diff --git a/_generate/_clean.py b/_generate/_clean.py
@@ -44,7 +44,7 @@ class ProtobufPreprocessor:
     ),
     ProtobufPreprocessor(
         import_name=r"common/common.proto",
-        package=r"common.(?!proto_validation)",
+        package=r"common(?!.proto_validation)",
         local=_proto_local.PROTO_COMMON,
     ),
     ProtobufPreprocessor(

diff --git a/pyproject.toml b/pyproject.toml
@@ -106,7 +106,8 @@ strict_equality = true
 strict_concatenate = true
 exclude = '''
 (?x)(
-    ^_scriptability.py$  # IGNORE AUTO-GENERATED FILES
+      _scriptability.py$  # IGNORE AUTO-GENERATED FILES
+    | _compat.py$         # IGNORE COMPAT FILES
 )
 '''
 
@@ -116,7 +117,6 @@ target-version = "py39"
 line-length = 120
 src = ["src/thoughtspot_tml"]
 exclude = [
-
     # PROJECT SPECIFIC IGNORES
     "__init__.py",        # ignore __init__.py
     "__project__.py",     # ignore project metadata

diff --git a/src/thoughtspot_tml/__init__.py b/src/thoughtspot_tml/__init__.py
@@ -1,5 +1,7 @@
 from thoughtspot_tml.__project__ import __version__
 
+from thoughtspot_tml._tml import TML
+
 from thoughtspot_tml.tml import Connection
 from thoughtspot_tml.tml import Table, View, SQLView, Worksheet, Model
 from thoughtspot_tml.tml import Answer, Liveboard, Cohort
@@ -14,6 +16,7 @@
 
 __all__ = (
     "__version__",
+    "TML",
     "Connection",
     "Table",
     "View",

diff --git a/src/thoughtspot_tml/__project__.py b/src/thoughtspot_tml/__project__.py
@@ -1 +1 @@
-__version__ = "2.2.0"
+__version__ = "2.2.1"
diff --git a/src/thoughtspot_tml/_scriptability.py b/src/thoughtspot_tml/_scriptability.py
diff --git a/src/thoughtspot_tml/_tml.py b/src/thoughtspot_tml/_tml.py
@@ -2,33 +2,50 @@
 
 from collections.abc import Collection
 from dataclasses import asdict, dataclass, fields, is_dataclass
-from typing import TYPE_CHECKING, get_args, get_origin
+from typing import TYPE_CHECKING, ForwardRef, Optional, get_args, get_origin
 import functools as ft
 import json
 import keyword
 import pathlib
 import re
 import warnings
 
-import yaml
-
 from thoughtspot_tml import _scriptability, _yaml
 from thoughtspot_tml._compat import Self
 from thoughtspot_tml.exceptions import TMLDecodeError, TMLExtensionWarning
 
 if TYPE_CHECKING:
     from typing import Any
 
+    from thoughtspot_tml.types import GUID
+
 RE_CAMEL_CASE = re.compile(r"[A-Z]?[a-z]+|[A-Z]{2,}(?=[A-Z][a-z]|\d|\W|$)|\d+")
 
 
 def attempt_resolve_type(type_hint: Any) -> Any:
     """Resolves string type hints to actual types."""
+    # IF IT'S A ForwardRef, RESOLVE IT.
+    # Further Reading:
+    #   https://docs.python.org/3/library/typing.html#typing.ForwardRef
+    if isinstance(type_hint, ForwardRef):
+        return type_hint.__forward_value__
+
+    # IF IT'S A STRING, ATTEMPT TO LOOK IT UP IN _scriptability.py
     if isinstance(type_hint, str):
         return getattr(_scriptability, type_hint.replace("_scriptability.", ""), type_hint)
     return type_hint
 
 
+def origin_or_fallback(type_hint: Any, *, default: Any) -> Any:
+    """
+    Get the unsubscripted version of a type, with optional fallback.
+
+    Further Reading:
+      https://docs.python.org/3/library/typing.html#typing.get_origin
+    """
+    return get_origin(type_hint) or default
+
+
 def recursive_complex_attrs_to_dataclasses(instance: Any) -> None:
     """
     Convert all fields of type `dataclass` into an instance of the
@@ -56,25 +73,22 @@ def recursive_complex_attrs_to_dataclasses(instance: Any) -> None:
         # NOTE: this falls back to the original type_hint when it can't be resolved.
         field_type = attempt_resolve_type(field.type)
 
+        # ORIGIN TYPES ARE THE X in X[a, b, c] hints.. but does not include native types
+        # eg.  typing.List[str] but NOT list[str]
+        origin_type = origin_or_fallback(field_type, default=field_type)
+
         # RECURSE INTO RESOLVED _scripatability.py HINTS
         if RESOLVED_TYPEHINT_HAS_CHILDREN(hint=field_type, expr=value):
             new_value = field_type(**value)
             recursive_complex_attrs_to_dataclasses(new_value)
 
         # list IS USED TO DENOTE THAT A TML OBJECT CAN CONTAIN MULTIPLE HOMOGENOUS
         # CHILDREN SO WE TAKE JUST THE FIRST ELEMENT AND ATTEMPT TO RESOLVE IT.
-        elif get_origin(field_type) is list:
-            new_value = []
+        elif origin_type is list:
             homo_type = next(iter(get_args(field_type)))
             item_type = attempt_resolve_type(homo_type)
 
-            # OLD ... will keep this around JUST IN CASE.
-            #
-            # item_type = attempt_resolve_type(
-            #     get_args(field_type)[0].__forward_value__
-            #     if isinstance(get_args(field_type)[0], typing.ForwardRef)
-            #     else get_args(field_type)[0]
-            # )
+            new_value = []
 
             for item in value:
                 # RECURSE INTO RESOLVED _scripatability.py HINTS
@@ -84,10 +98,16 @@ def recursive_complex_attrs_to_dataclasses(instance: Any) -> None:
 
                 new_value.append(item)
 
-        # IF OUR VALUE IS EMPTY, WE'RE GOING TO DROP IT.
-        elif get_origin(field_type) is dict and not value:
+        # IF OUR VALUE IS EMPTY, IT IS OPTIONAL AND SO WE'RE GOING TO DROP IT.
+        elif origin_type is dict and not value:
             new_value = None
 
+        # DEV NOTE: @boonhapus, 2025/01/08
+        #   Q. WHY NO (origin_type is dict and value) LIKE WE HAVE FOR LISTS?
+        #   A. Currently the edoc spec does not maintain complex mapping types. If we
+        #      need to support them, we'll need to add them at this priority (below
+        #      empty dicts -- so we continue to support optionality).
+
         # SIMPLE TYPES DO NOT NEED RECURSION.
         else:
             continue
@@ -141,6 +161,8 @@ class TML:
     Base object for ThoughtSpot TML.
     """
 
+    guid: Optional[GUID]
+
     @property
     def tml_type_name(self) -> str:
         """Return the type name of the TML object."""
@@ -149,6 +171,11 @@ def tml_type_name(self) -> str:
         snakes = "_".join(camels)
         return snakes.lower()
 
+    @property
+    def name(self) -> str:
+        """This should be implemented in child classes."""
+        raise NotImplementedError
+
     def __post_init__(self):
         recursive_complex_attrs_to_dataclasses(self)
 
@@ -180,14 +207,10 @@ def loads(cls, tml_document: str) -> Self:
         TMLDecodeError, when the document string cannot be parsed or receives extra data
         """
         try:
-            document = cls._loads(tml_document)
-        except (yaml.scanner.ScannerError, yaml.parser.ParserError, yaml.reader.ReaderError) as e:
-            raise TMLDecodeError(cls, message=str(e), problem_mark=getattr(e, "problem_mark", None)) from None  # type: ignore[arg-type]
-
-        try:
-            instance = cls(**document)
-        except TypeError as e:
-            raise TMLDecodeError(cls, data=document, message=str(e)) from None  # type: ignore[arg-type]
+            data = cls._loads(tml_document)
+            instance = cls(**data)
+        except Exception as e:
+            raise TMLDecodeError(cls, exc=e, document=tml_document) from None
 
         return instance
 
@@ -211,7 +234,8 @@ def load(cls, path: pathlib.Path) -> Self:
         try:
             instance = cls.loads(path.read_text(encoding="utf-8"))
         except TMLDecodeError as e:
-            e.path = path
+            # INTERCEPT AND INJECT THE FILEPATH.
+            e.filepath = path
             raise e from None
 
         return instance

diff --git a/src/thoughtspot_tml/_yaml.py b/src/thoughtspot_tml/_yaml.py
@@ -1,14 +1,12 @@
 from __future__ import annotations
 
-from typing import Any, Dict
+from typing import Any
 import re
 
 import yaml
 
 from thoughtspot_tml import _compat
 
-NEARLY_INFINITY = 999999999  # This used to be math.inf, but C has no concept of infinity. ;)
-
 # TML column ids typically take the form..
 #
 #   LOGICAL_TABLE_NAME_#::LOGICAL_COLUMN_NAME
@@ -40,7 +38,7 @@
 # fmt: on
 
 
-def _double_quote_when_special_char(dumper: yaml.Dumper, data: str) -> yaml.ScalarNode:
+def _double_quote_when_special_char(dumper: yaml.Dumper | yaml.CDumper, data: str) -> yaml.ScalarNode:
     """
     Double quote the string when any condition is met.
 
@@ -68,7 +66,7 @@ def _double_quote_when_special_char(dumper: yaml.Dumper, data: str) -> yaml.Scal
 yaml.Loader.yaml_implicit_resolvers.pop("=")
 
 
-def load(document: str) -> Dict[str, Any]:
+def load(document: str) -> dict[str, Any]:
     """
     Load a TML object.
     """
@@ -80,7 +78,7 @@ def load(document: str) -> Dict[str, Any]:
         return yaml.load(document, Loader=yaml.SafeLoader)
 
 
-def dump(document: Dict[str, Any]) -> str:
+def dump(document: dict[str, Any]) -> str:
     """
     Dump a TML object as YAML.
 
@@ -94,15 +92,17 @@ def dump(document: Dict[str, Any]) -> str:
 
     We'll attempt to reproduce them in Python.
     """
+    NEARLY_INFINITY = 999999999  # This used to be math.inf, but C has no concept of infinity. ;)
+
     options = {
         "width": NEARLY_INFINITY,
         "default_flow_style": False,
         "sort_keys": False,
         "allow_unicode": True,
     }
     try:
-        return yaml.dump(document, Dumper=_compat.Dumper, **options)
+        return yaml.dump(document, Dumper=_compat.Dumper, **options)  # type: ignore[call-overload]
 
     # FALL BACK TO THE SLOWER PYTHON DUMPER IF WE CAN'T FULLY PARSE UNICODE
     except UnicodeEncodeError:
-        return yaml.dump(document, Dumper=yaml.SafeDumper, **options)
+        return yaml.dump(document, Dumper=yaml.SafeDumper, **options)  # type: ignore[call-overload]
diff --git a/src/thoughtspot_tml/exceptions.py b/src/thoughtspot_tml/exceptions.py
@@ -1,14 +1,13 @@
 from __future__ import annotations
 
-from typing import TYPE_CHECKING, Any, Dict, Optional, Type
-import dataclasses
+from typing import TYPE_CHECKING, Optional
+
+from yaml import error
 
 if TYPE_CHECKING:
     from collections.abc import Iterable
     from pathlib import Path
 
-    from yaml import error
-
     from thoughtspot_tml.types import GUID, TMLObject
 
 
@@ -41,50 +40,40 @@ class TMLDecodeError(TMLError):
     Raised when a TML object cannot be instantiated from input data.
     """
 
-    def __init__(
-        self,
-        tml_cls: Type[TMLObject],
-        *,
-        message: Optional[str] = None,
-        data: Optional[Dict[str, Any]] = None,
-        path: Optional[Path] = None,
-        problem_mark: Optional[error.Mark] = None,
-    ):  # pragma: no cover
+    def __init__(self, tml_cls: type[TMLObject], *, exc: Exception, document: str, filepath: Optional[Path] = None):
         self.tml_cls = tml_cls
-        self.message = message
-        self.data = data
-        self.path = path
-        self.problem_mark = problem_mark
+        self.parent_exc = exc
+        self.document = document
+        self.filepath = filepath
 
-    def __str__(self) -> str:
-        lines = []
-        class_name = self.tml_cls.__name__
+    def with_filepath(self, filepath) -> TMLDecodeError:
+        """Add the file which generated the exception."""
+        self.filepath = filepath
+        return self
 
-        if self.message is not None:
-            lines.append(self.message)
+    def __str__(self) -> str:
+        lines: list[str] = []
 
-        if self.data is not None:
-            lines.append(f"supplied data does not produce a valid TML ({class_name}) document")
-            fields = {f.name for f in dataclasses.fields(self.tml_cls)}
-            data = set(self.data)
+        if isinstance(self.parent_exc, TypeError):
+            _, _, attribute = str(self.parent_exc).partition(" unexpected keyword argument ")
+            lines.append(f"Unrecognized attribute in the TML spec: {attribute}")
 
-            if data.difference(fields):
-                extra = ", ".join([f"'{arg}'" for arg in data.difference(fields)])
-                lines.append(f"\ngot extra data: {extra}")
+        if self.filepath is not None:
+            lines.append("\n")
+            lines.append(f"File '{self.filepath}' may not be a valid {self.tml_cls.__name__} file")
 
-        if self.path is not None:
-            lines.append(f"'{self.path}' is not a valid TML ({class_name}) file")
+        if isinstance(self.parent_exc, error.MarkedYAMLError):
+            if mark := self.parent_exc.problem_mark:
+                lines.append("\n")
+                lines.append(f"Syntax error on line {mark.line + 1}, around column {mark.column + 1}")
 
-        if self.problem_mark is not None:
-            err_line = self.problem_mark.line + 1
-            err_column = self.problem_mark.column + 1
-            snippet = self.problem_mark.get_snippet()
-            lines.append(f"\nsyntax error on line {err_line}, around column {err_column}")
+                if snippet := mark.get_snippet():
+                    lines.append(snippet)
 
-            if snippet is not None:
-                lines.append(snippet)
+        if not lines:
+            lines.append(str(self.parent_exc))
 
-        return "\n".join(lines)
+        return "\n".join(lines).strip()
 
 
 class TMLDisambiguationError(TMLError):