MaterializeInc · petrosagg · Apr 2, 2025 · Apr 2, 2025 · Apr 2, 2025 · Apr 2, 2025
@@ -23,7 +23,14 @@ class ParserState(Enum):
 
 
 HEADER_SEPARATOR_RE = re.compile(r"\|?(\s*-+\s*)(\|\s*-+\s*){2}\|?")
-TABLE_RE = re.compile(r"(?:\|?[\s`\[\]]*([\w_ ]+)[\s`\[\]]*)")
+# Field names are enclosed in backticks
+FIELD_NAME_RE = re.compile(r"`(.*)`")
+# Field types are enclosed in backticks and optionally in square brackets
+FIELD_TYPE_RE = re.compile(r"\[?`(.*)`\]?")
+# Documentation links are not preserved in the SQL comments. We capture the
+# text of [..](..) and [..][..] type links and keep only the link text.
+DOC_LINK_TYPE1_RE = re.compile(r"\[([^\]]+)\]\([^)]+\)")
+DOC_LINK_TYPE2_RE = re.compile(r"\[([^\]]+)\]\[[^]]+\]")
 RELATION_MARKER_RE = re.compile(r"RELATION_SPEC (\w+)\.(\w+)")
 UNDOCUMENTED_RELATION_MARKER = re.compile(r"RELATION_SPEC_UNDOCUMENTED (\w+)\.(\w+)")
 
@@ -41,19 +48,25 @@ class ParserState(Enum):
 
 mode cockroach
 
+simple conn=mz_system,user=mz_system
+ALTER SYSTEM SET unsafe_enable_unstable_dependencies = true
+----
+COMPLETE 0
+
 statement ok
 CREATE VIEW objects AS
   SELECT
     schema.name AS schema,
     objects.name AS object,
     columns.position,
     columns.name,
-    columns.type
+    columns.type,
+    comments.comment
   FROM
-    mz_catalog.mz_columns AS columns,
-    mz_catalog.mz_objects AS objects,
-    mz_catalog.mz_schemas AS schema
-  WHERE columns.id = objects.id AND objects.schema_id = schema.id
+    mz_catalog.mz_columns AS columns
+    JOIN mz_catalog.mz_objects AS objects ON columns.id = objects.id
+    JOIN mz_catalog.mz_schemas AS schema ON objects.schema_id = schema.id
+    LEFT JOIN mz_internal.mz_comments AS comments ON columns.id = comments.id AND columns.position = comments.object_sub_id
 
 statement ok
 CREATE INDEX objects_idx ON objects(schema, object)
@@ -80,9 +93,9 @@ def main() -> None:
             if marker_match:
                 schema = marker_match.group(1)
                 object_name = marker_match.group(2)
-                print("query ITT")
+                print("query TTT")
                 print(
-                    f"SELECT position, name, type FROM objects WHERE schema = '{schema}' AND object = '{object_name}' ORDER BY position"
+                    f"SELECT name, type, comment FROM objects WHERE schema = '{schema}' AND object = '{object_name}' ORDER BY position"
                 )
                 print("----")
                 state = ParserState.HEADER
@@ -92,10 +105,15 @@ def main() -> None:
             if HEADER_SEPARATOR_RE.match(line):
                 state = ParserState.FIELDS
         elif state == ParserState.FIELDS:
-            table_match = TABLE_RE.findall(line)
-            if table_match and len(table_match) >= 2:
-                field = table_match[0]
-                type_name = table_match[1]
+            line = line.strip()
+            if line.startswith("|"):
+                line = line[1:]
+            fields = [field.strip() for field in line.split("|")]
+            if len(fields) >= 3:
+                field = FIELD_NAME_RE.search(fields[0]).group(1)
+                type_name = FIELD_TYPE_RE.search(fields[1]).group(1)
+                documentation = DOC_LINK_TYPE1_RE.sub(r"\1", fields[2])
+                documentation = DOC_LINK_TYPE2_RE.sub(r"\1", documentation)
                 # We currently cannot determine the type of lists from the catalog.
                 if type_name == "mz_aclitem array":
                     type_name = "mz_aclitem[]"
@@ -106,7 +124,8 @@ def main() -> None:
                 elif "array" in type_name:
                     type_name = "array"
                 type_name = type_name.replace(" ", "␠")
-                print("  ".join([str(position), field, type_name]))
+                documentation = documentation.replace(" ", "␠")
+                print("  ".join([field, type_name, documentation]))
                 position += 1
             else:
                 print()

@@ -52,7 +52,7 @@ granted the [`mz_monitor` role](/manage/access-control/manage-roles#builtin-role
 <!-- RELATION_SPEC mz_internal.mz_recent_activity_log -->
 | Field                      | Type                         | Meaning                                                                                                                                                                                                                                                                       |
 |----------------------------|------------------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
-| `execution_id`             | [`uuid`]                     | An ID that is unique for each executed statement.                                                                                                                                                                                                                             |
+| `execution_id`             | [`uuid`]                     | An ID that is unique for each executed statement.
 | `sample_rate`              | [`double precision`]         | The actual rate at which the statement was sampled.                                                                                                                                                                                                                           |
 | `cluster_id`               | [`text`]                     | The ID of the cluster the statement execution was directed to. Corresponds to [mz_clusters.id](/sql/system-catalog/mz_catalog/#mz_clusters).                                                                                                      |
 | `application_name`         | [`text`]                     | The value of the `application_name` configuration parameter at execution time.                                                                                                                                                                                                |
@@ -129,7 +129,7 @@ the most recent status for each AWS PrivateLink connection in the system.
 | `id` | [`text`] | The ID of the connection. Corresponds to [`mz_catalog.mz_connections.id`](../mz_catalog#mz_sinks). |
 | `name` | [`text`] | The name of the connection.  |
 | `last_status_change_at` | [`timestamp with time zone`] | Wall-clock timestamp of the connection status change.|
-| `status` | [`text`] | | The status of the connection: one of `pending-service-discovery`, `creating-endpoint`, `recreating-endpoint`, `updating-endpoint`, `available`, `deleted`, `deleting`, `expired`, `failed`, `pending`, `pending-acceptance`, `rejected`, or `unknown`. |
+| `status` | [`text`] | The status of the connection: one of `pending-service-discovery`, `creating-endpoint`, `recreating-endpoint`, `updating-endpoint`, `available`, `deleted`, `deleting`, `expired`, `failed`, `pending`, `pending-acceptance`, `rejected`, or `unknown`. |
 
 ## `mz_cluster_deployment_lineage`
 
@@ -287,7 +287,7 @@ The `mz_internal_cluster_replicas` table lists the replicas that are created and
 <!-- RELATION_SPEC mz_internal.mz_internal_cluster_replicas -->
 | Field      | Type     | Meaning                                                                                                     |
 |------------|----------|-------------------------------------------------------------------------------------------------------------|
-| id         | [`text`] | The ID of a cluster replica. Corresponds to [`mz_cluster_replicas.id`](../mz_catalog/#mz_cluster_replicas). |
+| `id`       | [`text`] | The ID of a cluster replica. Corresponds to [`mz_cluster_replicas.id`](../mz_catalog/#mz_cluster_replicas). |
 
 ## `mz_pending_cluster_replicas`
 
@@ -296,7 +296,7 @@ The `mz_pending_cluster_replicas` table lists the replicas that were created dur
 <!-- RELATION_SPEC mz_internal.mz_pending_cluster_replicas -->
 | Field      | Type     | Meaning                                                                                                     |
 |------------|----------|-------------------------------------------------------------------------------------------------------------|
-| id         | [`text`] | The ID of a cluster replica. Corresponds to [`mz_cluster_replicas.id`](../mz_catalog/#mz_cluster_replicas). |
+| `id`       | [`text`] | The ID of a cluster replica. Corresponds to [`mz_cluster_replicas.id`](../mz_catalog/#mz_cluster_replicas). |
 
 ## `mz_comments`
 

@@ -329,15 +329,15 @@ We use the range `[operator_id_start, operator_id_end)` to record this informati
 If an LIR node was implemented without any dataflow operators, `operator_id_start` will be equal to `operator_id_end`.
 
 <!-- RELATION_SPEC mz_introspection.mz_lir_mapping -->
-| Field             | Type      | Meaning
-| ---------         | --------  | -----------
-| global_id         | [`text`]  | The global ID.
-| lir_id            | [`uint8`] | The LIR node ID.
-| operator          | [`text`]  | The LIR operator, in the format `OperatorName INPUTS [OPTIONS]`.
-| parent_lir_id     | [`uint8`] | The parent of this LIR node. May be `NULL`.
-| nesting           | [`uint2`] | The nesting level of this LIR node.
-| operator_id_start | [`uint8`] | The first dataflow operator ID implementing this LIR operator (inclusive).
-| operator_id_end   | [`uint8`] | The first dataflow operator ID _after_ this LIR operator (exclusive).
+| Field               | Type      | Meaning
+| ---------           | --------  | -----------
+| `global_id`         | [`text`]  | The global ID.
+| `lir_id`            | [`uint8`] | The LIR node ID.
+| `operator`          | [`text`]  | The LIR operator, in the format `OperatorName INPUTS [OPTIONS]`.
+| `parent_lir_id`     | [`uint8`] | The parent of this LIR node. May be `NULL`.
+| `nesting`           | [`uint2`] | The nesting level of this LIR node.
+| `operator_id_start` | [`uint8`] | The first dataflow operator ID implementing this LIR operator (inclusive).
+| `operator_id_end`   | [`uint8`] | The first dataflow operator ID _after_ this LIR operator (exclusive).
 
 <!-- RELATION_SPEC_UNDOCUMENTED mz_introspection.mz_compute_lir_mapping_per_worker -->