diff --git a/.gitignore b/.gitignore
index dce09a74..a42c13b0 100644
--- a/.gitignore
+++ b/.gitignore
@@ -14,6 +14,8 @@
.sw?
#OS X specific files.
.DS_store
+#VSCode specifics
+.vscode/
#==============================================================================#
# Build artifacts
@@ -45,6 +47,7 @@ cmake-build-release
cmake-build-relwithdebinfo
duckdb_packaging/duckdb_version.txt
test.db
+tmp/
#==============================================================================#
# Python
diff --git a/_duckdb-stubs/__init__.pyi b/_duckdb-stubs/__init__.pyi
index 124a5d5a..67830ad3 100644
--- a/_duckdb-stubs/__init__.pyi
+++ b/_duckdb-stubs/__init__.pyi
@@ -86,9 +86,11 @@ __all__: list[str] = [
"default_connection",
"description",
"df",
+ "disable_profiling",
"distinct",
"dtype",
"duplicate",
+ "enable_profiling",
"enum_type",
"execute",
"executemany",
@@ -109,6 +111,7 @@ __all__: list[str] = [
"from_df",
"from_parquet",
"from_query",
+ "get_profiling_information",
"get_table_names",
"install_extension",
"interrupt",
@@ -313,6 +316,9 @@ class DuckDBPyConnection:
repository_url: str | None = None,
version: str | None = None,
) -> None: ...
+ def get_profiling_information(self, format: str = "json") -> str: ...
+ def enable_profiling(self) -> None: ...
+ def disable_profiling(self) -> None: ...
def interrupt(self) -> None: ...
def list_filesystems(self) -> list[str]: ...
def list_type(self, type: sqltypes.DuckDBPyType) -> sqltypes.DuckDBPyType: ...
@@ -1250,6 +1256,9 @@ def limit(
*,
connection: DuckDBPyConnection | None = None,
) -> DuckDBPyRelation: ...
+def get_profiling_information(*, connection: DuckDBPyConnection | None = None, format: str = "json") -> str: ...
+def enable_profiling(*, connection: DuckDBPyConnection | None = None) -> None: ...
+def disable_profiling(*, connection: DuckDBPyConnection | None = None) -> None: ...
def list_filesystems(*, connection: DuckDBPyConnection | None = None) -> list[str]: ...
def list_type(
type: sqltypes.DuckDBPyType, *, connection: DuckDBPyConnection | None = None
diff --git a/duckdb/__init__.py b/duckdb/__init__.py
index a7370083..a9ca7773 100644
--- a/duckdb/__init__.py
+++ b/duckdb/__init__.py
@@ -84,9 +84,11 @@
default_connection,
description,
df,
+ disable_profiling,
distinct,
dtype,
duplicate,
+ enable_profiling,
enum_type,
execute,
executemany,
@@ -107,6 +109,7 @@
from_df,
from_parquet,
from_query,
+ get_profiling_information,
get_table_names,
install_extension,
interrupt,
@@ -310,9 +313,11 @@
"default_connection",
"description",
"df",
+ "disable_profiling",
"distinct",
"dtype",
"duplicate",
+ "enable_profiling",
"enum_type",
"execute",
"executemany",
@@ -333,6 +338,7 @@
"from_df",
"from_parquet",
"from_query",
+ "get_profiling_information",
"get_table_names",
"install_extension",
"interrupt",
diff --git a/duckdb/query_graph/__init__.py b/duckdb/query_graph/__init__.py
new file mode 100644
index 00000000..340dd8d3
--- /dev/null
+++ b/duckdb/query_graph/__init__.py
@@ -0,0 +1,3 @@
+from .__main__ import ProfilingInfo # noqa: D104
+
+__all__ = ["ProfilingInfo"]
diff --git a/duckdb/query_graph/__main__.py b/duckdb/query_graph/__main__.py
index d4851694..5ffb942d 100644
--- a/duckdb/query_graph/__main__.py
+++ b/duckdb/query_graph/__main__.py
@@ -4,81 +4,277 @@
import webbrowser
from functools import reduce
from pathlib import Path
+from typing import Optional
+
+from duckdb import DuckDBPyConnection
qgraph_css = """
-.styled-table {
- border-collapse: collapse;
- margin: 25px 0;
- font-size: 0.9em;
- font-family: sans-serif;
- min-width: 400px;
- box-shadow: 0 0 20px rgba(0, 0, 0, 0.15);
+:root {
+ --text-primary-color: #0d0d0d;
+ --text-secondary-color: #444;
+ --doc-codebox-border-color: #e6e6e6;
+ --doc-codebox-background-color: #f7f7f7;
+ --doc-scrollbar-bg: #e6e6e6;
+ --doc-scrollbar-slider: #ccc;
+ --duckdb-accent: #009982;
+ --duckdb-accent-light: #00b89a;
+ --card-bg: #fff;
+ --border-radius: 8px;
+ --shadow: 0 4px 14px rgba(0,0,0,0.05);
}
-.styled-table thead tr {
- background-color: #009879;
- color: #ffffff;
- text-align: left;
+
+html, body {
+ margin: 0;
+ padding: 0;
+ font-family: Inter, system-ui, -apple-system, "Segoe UI", Roboto, sans-serif;
+ color: var(--text-primary-color);
+ background: #fafafa;
+ line-height: 1.55;
}
-.styled-table th,
-.styled-table td {
- padding: 12px 15px;
+
+.container {
+ max-width: 1000px;
+ margin: 40px auto;
+ padding: 0 20px;
}
-.styled-table tbody tr {
- border-bottom: 1px solid #dddddd;
+
+header {
+ display: flex;
+ align-items: center;
+ gap: 10px;
+ margin-bottom: 5px;
}
-.styled-table tbody tr:nth-of-type(even) {
- background-color: #f3f3f3;
+header img {
+ width: 100px;
+ height: 100px;
}
-.styled-table tbody tr:last-of-type {
- border-bottom: 2px solid #009879;
+header h1 {
+ font-size: 1.5rem;
+ font-weight: 600;
+ margin: 0;
+ color: var(--text-primary-color);
}
-.node-body {
- font-size:15px;
+/* === Table Styling (DuckDB documentation style, flat header) === */
+table {
+ border-collapse: collapse;
+ width: 100%;
+ margin-bottom: 20px;
+ text-align: left;
+ font-variant-numeric: tabular-nums;
+ border: 1px solid var(--doc-codebox-border-color);
+ border-radius: var(--border-radius);
+ overflow: hidden;
+ box-shadow: var(--shadow);
+ background: var(--card-bg);
+}
+
+thead {
+ background-color: var(--duckdb-accent);
+ color: white;
+}
+
+th, td {
+ padding: 10px 12px;
+ font-size: 14px;
+ vertical-align: top;
+}
+
+th {
+ font-weight: 700;
+}
+
+tbody tr {
+ border-bottom: 1px solid var(--doc-codebox-border-color);
+}
+
+tbody tr:last-child td {
+ border-bottom: none;
+}
+
+tbody tr:hover {
+ background: var(--doc-codebox-border-color);
+}
+
+tbody tr.phase-details-row {
+ border-bottom: none;
}
+
+tbody tr.phase-details-row:hover {
+ background: transparent;
+}
+
+tbody tr.phase-details-row details summary {
+ font-size: 12px;
+ padding: 4px 0;
+}
+
+tbody tr.phase-details-row details[open] summary {
+ margin-bottom: 4px;
+}
+
+/* === Chart/Card Section === */
+.chart {
+ padding: 20px;
+ border: 1px solid var(--doc-codebox-border-color);
+ border-radius: var(--border-radius);
+ background: var(--card-bg);
+ box-shadow: var(--shadow);
+ overflow: visible;
+}
+
+/* === Tree Layout Styling === */
+.tf-tree {
+ overflow-x: visible;
+ overflow-y: visible;
+ padding-top: 20px;
+}
+
.tf-nc {
- position: relative;
- width: 180px;
- text-align: center;
- background-color: #fff100;
+ background: var(--card-bg);
+ border: 1px solid var(--doc-codebox-border-color);
+ border-radius: var(--border-radius);
+ padding: 6px;
+ display: inline-block;
+}
+
+.node-body {
+ font-size: 13px;
+ text-align: left;
+ padding: 10px;
+ white-space: nowrap;
}
-.custom-tooltip {
- position: relative;
+
+.node-body p {
+ margin: 2px 0;
+}
+
+.node-details {
+ white-space: nowrap;
+ overflow: visible;
display: inline-block;
}
-.tooltip-text {
- visibility: hidden;
- background-color: #333;
- color: #fff;
+/* === Metric Boxes === */
+.chart .metrics-grid {
+ display: grid;
+ grid-template-columns: repeat(auto-fit, minmax(150px, 1fr));
+ gap: 16px;
+ margin-bottom: 20px;
+}
+
+.chart .metric-box {
+ background: var(--card-bg);
+ border: 1px solid var(--doc-codebox-border-color);
+ border-radius: var(--border-radius);
+ box-shadow: var(--shadow);
+ padding: 12px 16px;
text-align: center;
- padding: 0px;
- border-radius: 1px;
+ transition: transform 0.2s ease, box-shadow 0.2s ease;
+}
+
+.chart .metric-box:hover {
+ transform: translateY(-2px);
+ box-shadow: 0 6px 18px rgba(0, 0, 0, 0.08);
+}
+
+.chart .metric-title {
+ font-size: 13px;
+ color: var(--text-secondary-color);
+ margin-bottom: 4px;
+ text-transform: uppercase;
+ letter-spacing: 0.5px;
+}
+
+.chart .metric-value {
+ font-size: 18px;
+ font-weight: 600;
+ color: var(--duckdb-accent);
+}
- /* Positioning */
- position: absolute;
- z-index: 1;
- bottom: 100%;
- left: 50%;
- transform: translateX(-50%);
- margin-bottom: 8px;
- /* Tooltip Arrow */
- width: 400px;
+/* === SQL Query Block === */
+.chart.sql-block {
+ background: var(--doc-codebox-background-color);
+ border: 1px solid var(--doc-codebox-border-color);
+ border-radius: var(--border-radius);
+ box-shadow: var(--shadow);
+ padding: 16px;
+ overflow-x: auto;
+ margin-top: 20px;
+}
+
+.chart.sql-block pre {
+ margin: 0;
+ font-family: "JetBrains Mono", "Fira Code", Consolas, monospace;
+ font-size: 13.5px;
+ line-height: 1.5;
+ color: var(--text-primary-color);
+ white-space: pre;
+}
+
+.chart.sql-block code {
+ color: var(--duckdb-accent);
+ font-weight: 500;
+}
+
+
+/* === Links, Typography, and Consistency === */
+a {
+ color: var(--duckdb-accent);
+ text-decoration: underline;
+ transition: color 0.3s;
+}
+
+a:hover {
+ color: black;
+}
+
+strong {
+ font-weight: 600;
}
-.custom-tooltip:hover .tooltip-text {
- visibility: visible;
+/* === Dark Mode Support === */
+@media (prefers-color-scheme: dark) {
+ :root {
+ --text-primary-color: #e6e6e6;
+ --text-secondary-color: #b3b3b3;
+ --doc-codebox-border-color: #2a2a2a;
+ --doc-codebox-background-color: #1e1e1e;
+ --card-bg: #111;
+ }
+ body {
+ background: #0b0b0b;
+ }
+ thead {
+ background-color: var(--duckdb-accent);
+ }
+ tbody tr:hover {
+ background: #222;
+ }
+
+ /* Fix tree node text visibility in dark mode */
+ .tf-nc .node-body,
+ .tf-nc .node-body p,
+ .tf-nc .node-details {
+ color: #1a1a1a !important;
+ }
+
+ /* Fix metric title visibility in dark mode */
+ .chart .metric-title {
+ color: #b3b3b3;
+ }
}
-"""
+""" # noqa: W293
class NodeTiming: # noqa: D101
- def __init__(self, phase: str, time: float) -> None: # noqa: D107
+ def __init__(self, phase: str, time: float, depth: int) -> None: # noqa: D107
self.phase = phase
self.time = time
+ self.depth = depth
# percentage is determined later.
self.percentage = 0
@@ -88,7 +284,7 @@ def calculate_percentage(self, total_time: float) -> None: # noqa: D102
def combine_timing(self, r: "NodeTiming") -> "NodeTiming": # noqa: D102
# TODO: can only add timings for same-phase nodes # noqa: TD002, TD003
total_time = self.time + r.time
- return NodeTiming(self.phase, total_time)
+ return NodeTiming(self.phase, total_time, self.depth)
class AllTimings: # noqa: D101
@@ -124,200 +320,319 @@ def open_utf8(fpath: str, flags: str) -> object: # noqa: D103
return Path(fpath).open(mode=flags, encoding="utf8")
-def get_child_timings(top_node: object, query_timings: object) -> str: # noqa: D103
- node_timing = NodeTiming(top_node["operator_type"], float(top_node["operator_timing"]))
- query_timings.add_node_timing(node_timing)
- for child in top_node["children"]:
- get_child_timings(child, query_timings)
-
-
-def get_pink_shade_hex(fraction: float) -> str: # noqa: D103
- fraction = max(0, min(1, fraction))
-
- # Define the RGB values for very light pink (almost white) and dark pink
- light_pink = (255, 250, 250) # Very light pink
- dark_pink = (255, 20, 147) # Dark pink
-
- # Calculate the RGB values for the given fraction
- r = int(light_pink[0] + (dark_pink[0] - light_pink[0]) * fraction)
- g = int(light_pink[1] + (dark_pink[1] - light_pink[1]) * fraction)
- b = int(light_pink[2] + (dark_pink[2] - light_pink[2]) * fraction)
-
- # Return as hexadecimal color code
- return f"#{r:02x}{g:02x}{b:02x}"
-
-
-def get_node_body(name: str, result: str, cpu_time: float, card: int, est: int, width: int, extra_info: str) -> str: # noqa: D103
- node_style = f"background-color: {get_pink_shade_hex(float(result) / cpu_time)};"
-
- body = f''
- body += ''
- new_name = "BRIDGE" if (name == "INVALID") else name.replace("_", " ")
- formatted_num = f"{float(result):.4f}"
- body += f"
{new_name}
time: {formatted_num} seconds
"
- body += f'
{extra_info} '
- if width > 0:
- body += f"
cardinality: {card}
"
- body += f"
estimate: {est}
"
- body += f"
width: {width} bytes
"
- # TODO: Expand on timing. Usually available from a detailed profiling # noqa: TD002, TD003
- body += "
"
- body += " "
- return body
-
-
-def generate_tree_recursive(json_graph: object, cpu_time: float) -> str: # noqa: D103
- node_prefix_html = "
"
- node_suffix_html = " "
-
- extra_info = ""
- estimate = 0
- for key in json_graph["extra_info"]:
- value = json_graph["extra_info"][key]
- if key == "Estimated Cardinality":
- estimate = int(value)
+class ProfilingInfo: # noqa: D101
+ def __init__(self, conn: Optional[DuckDBPyConnection] = None, from_file: Optional[str] = None) -> None: # noqa: D107
+ self.conn = conn
+ self.from_file = from_file
+
+ def to_json(self) -> str: # noqa: D102
+ if self.from_file is not None:
+ with open_utf8(self.from_file, "r") as f:
+ return f.read()
+
+ return self.conn.get_profiling_information(format="json")
+
+ def to_pydict(self) -> dict: # noqa: D102
+ return json.loads(self.to_json())
+
+ def to_html(self, output_file: str = "profile.html") -> str: # noqa: D102
+ profiling_info_text = self.to_json()
+ html_output = self._translate_json_to_html(input_text=profiling_info_text, output_file=output_file)
+ return html_output
+
+ def _get_child_timings(self, top_node: object, query_timings: object, depth: int = 0) -> str:
+ node_timing = NodeTiming(top_node["operator_type"], float(top_node["operator_timing"]), depth)
+ query_timings.add_node_timing(node_timing)
+ for child in top_node["children"]:
+ self._get_child_timings(child, query_timings, depth + 1)
+
+ @staticmethod
+ def _get_f7fff0_shade_hex(fraction: float) -> str:
+ """Returns a shade between very light (#f7fff0) and a slightly darker green-yellow,
+ depending on the fraction (0..1).
+ """ # noqa: D205
+ fraction = max(0, min(1, fraction))
+
+ # Define RGB for light and dark end
+ light_color = (247, 255, 240) # #f7fff0
+ dark_color = (200, 255, 150) # slightly darker/more saturated green-yellow
+
+ # Interpolate RGB channels
+ r = int(light_color[0] + (dark_color[0] - light_color[0]) * fraction)
+ g = int(light_color[1] + (dark_color[1] - light_color[1]) * fraction)
+ b = int(light_color[2] + (dark_color[2] - light_color[2]) * fraction)
+
+ return f"#{r:02x}{g:02x}{b:02x}"
+
+ def _get_node_body(
+ self, name: str, result: str, cpu_time: float, card: int, est: int, result_size: int, extra_info: str
+ ) -> str:
+ """Generate the HTML body for a single node in the tree."""
+ node_style = f"background-color: {self._get_f7fff0_shade_hex(float(result) / cpu_time)};"
+ new_name = "BRIDGE" if (name == "INVALID") else name.replace("_", " ")
+ formatted_num = f"{float(result):.4f}"
+
+ body = f''
+ body += ''
+ body += f"
{new_name}
"
+ if result_size > 0:
+ body += f"
time: {formatted_num}s
"
+ body += f"
cardinality: {card}
"
+ body += f"
estimate: {est}
"
+ body += f"
result size: {result_size} bytes
"
+ body += "
"
+ body += "Extra info "
+ body += ''
+ body += f"
{extra_info}
"
+ # TODO: Expand on timing. Usually available from a detailed profiling # noqa: TD002, TD003
+ body += "
"
+ body += " "
+ body += "
"
+ body += " "
+ return body
+
+ def _generate_tree_recursive(self, json_graph: object, cpu_time: float) -> str:
+ node_prefix_html = ""
+ node_suffix_html = " "
+
+ extra_info = ""
+ estimate = 0
+ for key in json_graph["extra_info"]:
+ value = json_graph["extra_info"][key]
+ if key == "Estimated Cardinality":
+ estimate = int(value)
+ else:
+ extra_info += f"{key}: {value} "
+
+ # get rid of some typically long names
+ extra_info = re.sub(r"__internal_\s*", "__", extra_info)
+ extra_info = re.sub(r"compress_integral\s*", "compress", extra_info)
+
+ node_body = self._get_node_body(
+ json_graph["operator_type"],
+ json_graph["operator_timing"],
+ cpu_time,
+ json_graph["operator_cardinality"],
+ estimate,
+ json_graph["result_set_size"],
+ re.sub(r",\s*", ", ", extra_info),
+ )
+
+ children_html = ""
+ if len(json_graph["children"]) >= 1:
+ children_html += ""
+ for child in json_graph["children"]:
+ children_html += self._generate_tree_recursive(child, cpu_time)
+ children_html += " "
+ return node_prefix_html + node_body + children_html + node_suffix_html
+
+ # For generating the table in the top left with expandable phases
+ def _generate_timing_html(self, graph_json: object, query_timings: object) -> object:
+ """Generates timing HTML table with expandable phases."""
+ json_graph = json.loads(graph_json)
+ self._gather_timing_information(json_graph, query_timings)
+ table_head = """
+
+
+
+ Phase
+ Time (s)
+ Percentage
+
+ """
+
+ table_body = ""
+ table_end = "
"
+
+ execution_time = query_timings.get_sum_of_all_timings()
+
+ all_phases = query_timings.get_phases()
+ query_timings.add_node_timing(NodeTiming("Execution Time (CPU)", execution_time, None))
+ all_phases = ["Execution Time (CPU)", *all_phases]
+
+ for phase in all_phases:
+ summarized_phase = query_timings.get_summary_phase_timings(phase)
+ summarized_phase.calculate_percentage(execution_time)
+ phase_column = f"{phase} " if phase == "Execution Time (CPU)" else phase
+
+ # Main phase row
+ table_body += f"""
+
+ {phase_column}
+ {round(summarized_phase.time, 8)}
+ {str(summarized_phase.percentage * 100)[:6]}%
+
+ """
+
+ # Add expandable details for individual nodes (except for Execution Time)
+ if phase != "Execution Time (CPU)":
+ phase_timings = query_timings.get_phase_timings(phase)
+ if len(phase_timings) > 1: # Only show details if there are multiple nodes
+ table_body += f"""
+
+
+
+
+ Show {len(phase_timings)} nodes
+
+
+
+ """
+ for node_timing in sorted(phase_timings, key=lambda x: x.time, reverse=True):
+ node_timing.calculate_percentage(execution_time)
+ depth_indent = " " * (node_timing.depth * 4)
+ table_body += f"""
+
+ {depth_indent}↳ Depth {node_timing.depth}
+ {round(node_timing.time, 8)}
+ {str(node_timing.percentage * 100)[:6]}%
+
+ """ # noqa: E501
+ table_body += """
+
+
+
+
+
+ """
+
+ table_body += table_end
+ return table_head + table_body
+
+ @staticmethod
+ def _generate_metric_grid_html(graph_json: str) -> str:
+ json_graph = json.loads(graph_json)
+ metrics = {
+ "Execution Time (s)": f"{float(json_graph.get('latency', 'N/A')):.4f}",
+ "Total GB Read": f"{float(json_graph.get('total_bytes_read', 'N/A')) / (1024**3):.4f}"
+ if json_graph.get("total_bytes_read", "N/A") != "N/A"
+ else "N/A",
+ "Total GB Written": f"{float(json_graph.get('total_bytes_written', 'N/A')) / (1024**3):.4f}"
+ if json_graph.get("total_bytes_written", "N/A") != "N/A"
+ else "N/A",
+ "Peak Memory (GB)": f"{float(json_graph.get('system_peak_buffer_memory', 'N/A')) / (1024**3):.4f}"
+ if json_graph.get("system_peak_buffer_memory", "N/A") != "N/A"
+ else "N/A",
+ "Rows Scanned": f"{json_graph.get('cumulative_rows_scanned', 'N/A'):,}"
+ if json_graph.get("cumulative_rows_scanned", "N/A") != "N/A"
+ else "N/A",
+ }
+ metric_grid_html = """"""
+ for key in metrics:
+ metric_grid_html += f"""
+
+
{key}
+
{metrics[key]}
+
+ """
+ metric_grid_html += "
"
+ return metric_grid_html
+
+ @staticmethod
+ def _generate_sql_query_html(graph_json: str) -> str:
+ json_graph = json.loads(graph_json)
+ sql_query = json_graph.get("query_name", "N/A")
+ sql_html = f"""
+ SQL Query
+
+
+ """
+ return sql_html
+
+ def _generate_tree_html(self, graph_json: object) -> str:
+ json_graph = json.loads(graph_json)
+ cpu_time = float(json_graph["cpu_time"])
+ tree_prefix = '"
+ # first level of json is general overview
+ # TODO: make sure json output first level always has only 1 level # noqa: TD002, TD003
+ tree_body = self._generate_tree_recursive(json_graph["children"][0], cpu_time)
+ return tree_prefix + tree_body + tree_suffix
+
+ def _generate_ipython(self, json_input: str) -> str:
+ from IPython.core.display import HTML
+
+ html_output = self._generate_html(json_input, False)
+
+ return HTML(
+ (
+ '\n ${CSS}\n ${LIBRARIES}\n
\n ${CHART_SCRIPT}\n '
+ )
+ .replace("${CSS}", html_output["css"])
+ .replace("${CHART_SCRIPT}", html_output["chart_script"])
+ .replace("${LIBRARIES}", html_output["libraries"])
+ )
+
+ @staticmethod
+ def _generate_style_html(graph_json: str, include_meta_info: bool) -> None: # noqa: FBT001
+ treeflex_css = ' \n'
+ libraries = ' \n' # noqa: E501
+ return {"treeflex_css": treeflex_css, "duckdb_css": qgraph_css, "libraries": libraries, "chart_script": ""}
+
+ def _gather_timing_information(self, json: str, query_timings: object) -> None:
+ # add up all of the times
+ # measure each time as a percentage of the total time.
+ # then you can return a list of [phase, time, percentage]
+ self._get_child_timings(json["children"][0], query_timings)
+
+ def _translate_json_to_html(
+ self, input_file: Optional[str] = None, input_text: Optional[str] = None, output_file: str = "profile.html"
+ ) -> None:
+ query_timings = AllTimings()
+ if input_text is not None:
+ text = input_text
+ elif input_file is not None:
+ with open_utf8(input_file, "r") as f:
+ text = f.read()
else:
- extra_info += f"{key}: {value} "
- cardinality = json_graph["operator_cardinality"]
- width = int(json_graph["result_set_size"] / max(1, cardinality))
-
- # get rid of some typically long names
- extra_info = re.sub(r"__internal_\s*", "__", extra_info)
- extra_info = re.sub(r"compress_integral\s*", "compress", extra_info)
-
- node_body = get_node_body(
- json_graph["operator_type"],
- json_graph["operator_timing"],
- cpu_time,
- cardinality,
- estimate,
- width,
- re.sub(r",\s*", ", ", extra_info),
- )
-
- children_html = ""
- if len(json_graph["children"]) >= 1:
- children_html += ""
- for child in json_graph["children"]:
- children_html += generate_tree_recursive(child, cpu_time)
- children_html += " "
- return node_prefix_html + node_body + children_html + node_suffix_html
-
-
-# For generating the table in the top left.
-def generate_timing_html(graph_json: object, query_timings: object) -> object: # noqa: D103
- json_graph = json.loads(graph_json)
- gather_timing_information(json_graph, query_timings)
- total_time = float(json_graph.get("operator_timing") or json_graph.get("latency"))
- table_head = """
-
-
-
- Phase
- Time
- Percentage
-
- """
-
- table_body = ""
- table_end = "
"
-
- execution_time = query_timings.get_sum_of_all_timings()
-
- all_phases = query_timings.get_phases()
- query_timings.add_node_timing(NodeTiming("TOTAL TIME", total_time))
- query_timings.add_node_timing(NodeTiming("Execution Time", execution_time))
- all_phases = ["TOTAL TIME", "Execution Time", *all_phases]
- for phase in all_phases:
- summarized_phase = query_timings.get_summary_phase_timings(phase)
- summarized_phase.calculate_percentage(total_time)
- phase_column = f"{phase} " if phase == "TOTAL TIME" or phase == "Execution Time" else phase
- table_body += f"""
-
- {phase_column}
- {summarized_phase.time}
- {str(summarized_phase.percentage * 100)[:6]}%
-
-"""
- table_body += table_end
- return table_head + table_body
-
-
-def generate_tree_html(graph_json: object) -> str: # noqa: D103
- json_graph = json.loads(graph_json)
- cpu_time = float(json_graph["cpu_time"])
- tree_prefix = '"
- # first level of json is general overview
- # TODO: make sure json output first level always has only 1 level # noqa: TD002, TD003
- tree_body = generate_tree_recursive(json_graph["children"][0], cpu_time)
- return tree_prefix + tree_body + tree_suffix
-
-
-def generate_ipython(json_input: str) -> str: # noqa: D103
- from IPython.core.display import HTML
-
- html_output = generate_html(json_input, False) # noqa: F821
-
- return HTML(
- ('\n ${CSS}\n ${LIBRARIES}\n
\n ${CHART_SCRIPT}\n ')
- .replace("${CSS}", html_output["css"])
- .replace("${CHART_SCRIPT}", html_output["chart_script"])
- .replace("${LIBRARIES}", html_output["libraries"])
- )
-
-
-def generate_style_html(graph_json: str, include_meta_info: bool) -> None: # noqa: D103, FBT001
- treeflex_css = ' \n'
- css = "\n"
- return {"treeflex_css": treeflex_css, "duckdb_css": css, "libraries": "", "chart_script": ""}
-
-
-def gather_timing_information(json: str, query_timings: object) -> None: # noqa: D103
- # add up all of the times
- # measure each time as a percentage of the total time.
- # then you can return a list of [phase, time, percentage]
- get_child_timings(json["children"][0], query_timings)
-
-
-def translate_json_to_html(input_file: str, output_file: str) -> None: # noqa: D103
- query_timings = AllTimings()
- with open_utf8(input_file, "r") as f:
- text = f.read()
-
- html_output = generate_style_html(text, True)
- timing_table = generate_timing_html(text, query_timings)
- tree_output = generate_tree_html(text)
-
- # finally create and write the html
- with open_utf8(output_file, "w+") as f:
- html = """
-
-
-
-
- Query Profile Graph for Query
- ${TREEFLEX_CSS}
-
-
-
-
-
- ${TIMING_TABLE}
-
- ${TREE}
-
-
-"""
- html = html.replace("${TREEFLEX_CSS}", html_output["treeflex_css"])
- html = html.replace("${DUCKDB_CSS}", html_output["duckdb_css"])
- html = html.replace("${TIMING_TABLE}", timing_table)
- html = html.replace("${TREE}", tree_output)
- f.write(html)
+ print("please provide either input file or input text")
+ exit(1)
+ html_output = self._generate_style_html(text, True)
+ highlight_metric_grid = self._generate_metric_grid_html(text)
+ timing_table = self._generate_timing_html(text, query_timings)
+ tree_output = self._generate_tree_html(text)
+ sql_query_html = self._generate_sql_query_html(text)
+ # finally create and write the html
+ with open_utf8(output_file, "w+") as f:
+ html = """
+
+
+
+
+ Query Profile Graph for Query
+ ${TREEFLEX_CSS}
+
+
+
+
+
+
+ Query Profile Graph
+
+
+ ${METRIC_GRID}
+
+
+ ${SQL_QUERY}
+ ${TIMING_TABLE}
+
+ ${TREE}
+
+
+ """ # noqa: E501
+ html = html.replace("${TREEFLEX_CSS}", html_output["treeflex_css"])
+ html = html.replace("${DUCKDB_CSS}", html_output["duckdb_css"])
+ html = html.replace("${METRIC_GRID}", highlight_metric_grid)
+ html = html.replace("${SQL_QUERY}", sql_query_html)
+ html = html.replace("${TIMING_TABLE}", timing_table)
+ html = html.replace("${TREE}", tree_output)
+ f.write(html)
def main() -> None: # noqa: D103
@@ -326,7 +641,7 @@ def main() -> None: # noqa: D103
description="""Given a json profile output, generate a html file showing the query graph and
timings of operators""",
)
- parser.add_argument("profile_input", help="profile input in json")
+ parser.add_argument("--profile_input", help="profile input in json")
parser.add_argument("--out", required=False, default=False)
parser.add_argument("--open", required=False, action="store_true", default=True)
args = parser.parse_args()
@@ -347,8 +662,8 @@ def main() -> None: # noqa: D103
exit(1)
open_output = args.open
-
- translate_json_to_html(input, output)
+ profiling_info = ProfilingInfo(from_file=input)
+ profiling_info.to_html(output_file=output)
if open_output:
webbrowser.open(f"file://{Path(output).resolve()}", new=2)
diff --git a/scripts/connection_methods.json b/scripts/connection_methods.json
index a87b992f..3b02a9b1 100644
--- a/scripts/connection_methods.json
+++ b/scripts/connection_methods.json
@@ -1093,5 +1093,30 @@
}
],
"return": "None"
+ },
+ {
+ "name": "get_profiling_information",
+ "function": "GetProfilingInformation",
+ "docs": "Get profiling information for a query",
+ "args": [
+ {
+ "name": "format",
+ "default": "JSON",
+ "type": "Optional[str]"
+ }
+ ],
+ "return": "str"
+ },
+ {
+ "name": "enable_profiling",
+ "function": "EnableProfiling",
+ "docs": "Enable profiling for a connection",
+ "return": "None"
+ },
+ {
+ "name": "disable_profiling",
+ "function": "DisableProfiling",
+ "docs": "Disable profiling for a connection",
+ "return": "None"
}
]
diff --git a/scripts/generate_connection_stubs.py b/scripts/generate_connection_stubs.py
index d542a047..76c19b36 100644
--- a/scripts/generate_connection_stubs.py
+++ b/scripts/generate_connection_stubs.py
@@ -5,7 +5,7 @@
os.chdir(Path(__file__).parent)
JSON_PATH = "connection_methods.json"
-DUCKDB_STUBS_FILE = Path("..") / "duckdb" / "__init__.pyi"
+DUCKDB_STUBS_FILE = Path("..") / "_duckdb-stubs" / "__init__.pyi"
START_MARKER = " # START OF CONNECTION METHODS"
END_MARKER = " # END OF CONNECTION METHODS"
diff --git a/src/duckdb_py/duckdb_python.cpp b/src/duckdb_py/duckdb_python.cpp
index 1dd3ba17..fedbec5f 100644
--- a/src/duckdb_py/duckdb_python.cpp
+++ b/src/duckdb_py/duckdb_python.cpp
@@ -124,6 +124,34 @@ static void InitializeConnectionMethods(py::module_ &m) {
},
"Check if a filesystem with the provided name is currently registered", py::arg("name"), py::kw_only(),
py::arg("connection") = py::none());
+ m.def(
+ "get_profiling_information",
+ [](const py::str &format, shared_ptr
conn = nullptr) {
+ if (!conn) {
+ conn = DuckDBPyConnection::DefaultConnection();
+ }
+ return conn->GetProfilingInformation(format);
+ },
+ "Get profiling information from a query", py::kw_only(), py::arg("format") = "json",
+ py::arg("connection") = py::none());
+ m.def(
+ "enable_profiling",
+ [](shared_ptr conn = nullptr) {
+ if (!conn) {
+ conn = DuckDBPyConnection::DefaultConnection();
+ }
+ return conn->EnableProfiling();
+ },
+ "Enable profiling for the current connection", py::kw_only(), py::arg("connection") = py::none());
+ m.def(
+ "disable_profiling",
+ [](shared_ptr conn = nullptr) {
+ if (!conn) {
+ conn = DuckDBPyConnection::DefaultConnection();
+ }
+ return conn->DisableProfiling();
+ },
+ "Disable profiling for the current connection", py::kw_only(), py::arg("connection") = py::none());
m.def(
"create_function",
[](const string &name, const py::function &udf, const py::object &arguments = py::none(),
diff --git a/src/duckdb_py/include/duckdb_python/pyconnection/pyconnection.hpp b/src/duckdb_py/include/duckdb_python/pyconnection/pyconnection.hpp
index 48ee055e..8117eda9 100644
--- a/src/duckdb_py/include/duckdb_python/pyconnection/pyconnection.hpp
+++ b/src/duckdb_py/include/duckdb_python/pyconnection/pyconnection.hpp
@@ -337,6 +337,11 @@ struct DuckDBPyConnection : public enable_shared_from_this {
py::list ListFilesystems();
bool FileSystemIsRegistered(const string &name);
+ // Profiling info
+ py::str GetProfilingInformation(const py::str &format = "json");
+ void EnableProfiling();
+ void DisableProfiling();
+
//! Default connection to an in-memory database
static DefaultConnectionHolder default_connection;
//! Caches and provides an interface to get frequently used modules+subtypes
diff --git a/src/duckdb_py/pyconnection.cpp b/src/duckdb_py/pyconnection.cpp
index 11a7ea9d..7a454b29 100644
--- a/src/duckdb_py/pyconnection.cpp
+++ b/src/duckdb_py/pyconnection.cpp
@@ -3,6 +3,7 @@
#include "duckdb/catalog/default/default_types.hpp"
#include "duckdb/common/arrow/arrow.hpp"
#include "duckdb/common/enums/file_compression_type.hpp"
+#include "duckdb/common/enums/profiler_format.hpp"
#include "duckdb/common/printer.hpp"
#include "duckdb/common/types.hpp"
#include "duckdb/common/types/vector.hpp"
@@ -285,6 +286,10 @@ static void InitializeConnectionMethods(py::class_