diff --git a/.gitignore b/.gitignore index dce09a74..a42c13b0 100644 --- a/.gitignore +++ b/.gitignore @@ -14,6 +14,8 @@ .sw? #OS X specific files. .DS_store +#VSCode specifics +.vscode/ #==============================================================================# # Build artifacts @@ -45,6 +47,7 @@ cmake-build-release cmake-build-relwithdebinfo duckdb_packaging/duckdb_version.txt test.db +tmp/ #==============================================================================# # Python diff --git a/_duckdb-stubs/__init__.pyi b/_duckdb-stubs/__init__.pyi index 124a5d5a..67830ad3 100644 --- a/_duckdb-stubs/__init__.pyi +++ b/_duckdb-stubs/__init__.pyi @@ -86,9 +86,11 @@ __all__: list[str] = [ "default_connection", "description", "df", + "disable_profiling", "distinct", "dtype", "duplicate", + "enable_profiling", "enum_type", "execute", "executemany", @@ -109,6 +111,7 @@ __all__: list[str] = [ "from_df", "from_parquet", "from_query", + "get_profiling_information", "get_table_names", "install_extension", "interrupt", @@ -313,6 +316,9 @@ class DuckDBPyConnection: repository_url: str | None = None, version: str | None = None, ) -> None: ... + def get_profiling_information(self, format: str = "json") -> str: ... + def enable_profiling(self) -> None: ... + def disable_profiling(self) -> None: ... def interrupt(self) -> None: ... def list_filesystems(self) -> list[str]: ... def list_type(self, type: sqltypes.DuckDBPyType) -> sqltypes.DuckDBPyType: ... @@ -1250,6 +1256,9 @@ def limit( *, connection: DuckDBPyConnection | None = None, ) -> DuckDBPyRelation: ... +def get_profiling_information(*, connection: DuckDBPyConnection | None = None, format: str = "json") -> str: ... +def enable_profiling(*, connection: DuckDBPyConnection | None = None) -> None: ... +def disable_profiling(*, connection: DuckDBPyConnection | None = None) -> None: ... def list_filesystems(*, connection: DuckDBPyConnection | None = None) -> list[str]: ... def list_type( type: sqltypes.DuckDBPyType, *, connection: DuckDBPyConnection | None = None diff --git a/duckdb/__init__.py b/duckdb/__init__.py index a7370083..a9ca7773 100644 --- a/duckdb/__init__.py +++ b/duckdb/__init__.py @@ -84,9 +84,11 @@ default_connection, description, df, + disable_profiling, distinct, dtype, duplicate, + enable_profiling, enum_type, execute, executemany, @@ -107,6 +109,7 @@ from_df, from_parquet, from_query, + get_profiling_information, get_table_names, install_extension, interrupt, @@ -310,9 +313,11 @@ "default_connection", "description", "df", + "disable_profiling", "distinct", "dtype", "duplicate", + "enable_profiling", "enum_type", "execute", "executemany", @@ -333,6 +338,7 @@ "from_df", "from_parquet", "from_query", + "get_profiling_information", "get_table_names", "install_extension", "interrupt", diff --git a/duckdb/query_graph/__init__.py b/duckdb/query_graph/__init__.py new file mode 100644 index 00000000..340dd8d3 --- /dev/null +++ b/duckdb/query_graph/__init__.py @@ -0,0 +1,3 @@ +from .__main__ import ProfilingInfo # noqa: D104 + +__all__ = ["ProfilingInfo"] diff --git a/duckdb/query_graph/__main__.py b/duckdb/query_graph/__main__.py index d4851694..5ffb942d 100644 --- a/duckdb/query_graph/__main__.py +++ b/duckdb/query_graph/__main__.py @@ -4,81 +4,277 @@ import webbrowser from functools import reduce from pathlib import Path +from typing import Optional + +from duckdb import DuckDBPyConnection qgraph_css = """ -.styled-table { - border-collapse: collapse; - margin: 25px 0; - font-size: 0.9em; - font-family: sans-serif; - min-width: 400px; - box-shadow: 0 0 20px rgba(0, 0, 0, 0.15); +:root { + --text-primary-color: #0d0d0d; + --text-secondary-color: #444; + --doc-codebox-border-color: #e6e6e6; + --doc-codebox-background-color: #f7f7f7; + --doc-scrollbar-bg: #e6e6e6; + --doc-scrollbar-slider: #ccc; + --duckdb-accent: #009982; + --duckdb-accent-light: #00b89a; + --card-bg: #fff; + --border-radius: 8px; + --shadow: 0 4px 14px rgba(0,0,0,0.05); } -.styled-table thead tr { - background-color: #009879; - color: #ffffff; - text-align: left; + +html, body { + margin: 0; + padding: 0; + font-family: Inter, system-ui, -apple-system, "Segoe UI", Roboto, sans-serif; + color: var(--text-primary-color); + background: #fafafa; + line-height: 1.55; } -.styled-table th, -.styled-table td { - padding: 12px 15px; + +.container { + max-width: 1000px; + margin: 40px auto; + padding: 0 20px; } -.styled-table tbody tr { - border-bottom: 1px solid #dddddd; + +header { + display: flex; + align-items: center; + gap: 10px; + margin-bottom: 5px; } -.styled-table tbody tr:nth-of-type(even) { - background-color: #f3f3f3; +header img { + width: 100px; + height: 100px; } -.styled-table tbody tr:last-of-type { - border-bottom: 2px solid #009879; +header h1 { + font-size: 1.5rem; + font-weight: 600; + margin: 0; + color: var(--text-primary-color); } -.node-body { - font-size:15px; +/* === Table Styling (DuckDB documentation style, flat header) === */ +table { + border-collapse: collapse; + width: 100%; + margin-bottom: 20px; + text-align: left; + font-variant-numeric: tabular-nums; + border: 1px solid var(--doc-codebox-border-color); + border-radius: var(--border-radius); + overflow: hidden; + box-shadow: var(--shadow); + background: var(--card-bg); +} + +thead { + background-color: var(--duckdb-accent); + color: white; +} + +th, td { + padding: 10px 12px; + font-size: 14px; + vertical-align: top; +} + +th { + font-weight: 700; +} + +tbody tr { + border-bottom: 1px solid var(--doc-codebox-border-color); +} + +tbody tr:last-child td { + border-bottom: none; +} + +tbody tr:hover { + background: var(--doc-codebox-border-color); +} + +tbody tr.phase-details-row { + border-bottom: none; } + +tbody tr.phase-details-row:hover { + background: transparent; +} + +tbody tr.phase-details-row details summary { + font-size: 12px; + padding: 4px 0; +} + +tbody tr.phase-details-row details[open] summary { + margin-bottom: 4px; +} + +/* === Chart/Card Section === */ +.chart { + padding: 20px; + border: 1px solid var(--doc-codebox-border-color); + border-radius: var(--border-radius); + background: var(--card-bg); + box-shadow: var(--shadow); + overflow: visible; +} + +/* === Tree Layout Styling === */ +.tf-tree { + overflow-x: visible; + overflow-y: visible; + padding-top: 20px; +} + .tf-nc { - position: relative; - width: 180px; - text-align: center; - background-color: #fff100; + background: var(--card-bg); + border: 1px solid var(--doc-codebox-border-color); + border-radius: var(--border-radius); + padding: 6px; + display: inline-block; +} + +.node-body { + font-size: 13px; + text-align: left; + padding: 10px; + white-space: nowrap; } -.custom-tooltip { - position: relative; + +.node-body p { + margin: 2px 0; +} + +.node-details { + white-space: nowrap; + overflow: visible; display: inline-block; } -.tooltip-text { - visibility: hidden; - background-color: #333; - color: #fff; +/* === Metric Boxes === */ +.chart .metrics-grid { + display: grid; + grid-template-columns: repeat(auto-fit, minmax(150px, 1fr)); + gap: 16px; + margin-bottom: 20px; +} + +.chart .metric-box { + background: var(--card-bg); + border: 1px solid var(--doc-codebox-border-color); + border-radius: var(--border-radius); + box-shadow: var(--shadow); + padding: 12px 16px; text-align: center; - padding: 0px; - border-radius: 1px; + transition: transform 0.2s ease, box-shadow 0.2s ease; +} + +.chart .metric-box:hover { + transform: translateY(-2px); + box-shadow: 0 6px 18px rgba(0, 0, 0, 0.08); +} + +.chart .metric-title { + font-size: 13px; + color: var(--text-secondary-color); + margin-bottom: 4px; + text-transform: uppercase; + letter-spacing: 0.5px; +} + +.chart .metric-value { + font-size: 18px; + font-weight: 600; + color: var(--duckdb-accent); +} - /* Positioning */ - position: absolute; - z-index: 1; - bottom: 100%; - left: 50%; - transform: translateX(-50%); - margin-bottom: 8px; - /* Tooltip Arrow */ - width: 400px; +/* === SQL Query Block === */ +.chart.sql-block { + background: var(--doc-codebox-background-color); + border: 1px solid var(--doc-codebox-border-color); + border-radius: var(--border-radius); + box-shadow: var(--shadow); + padding: 16px; + overflow-x: auto; + margin-top: 20px; +} + +.chart.sql-block pre { + margin: 0; + font-family: "JetBrains Mono", "Fira Code", Consolas, monospace; + font-size: 13.5px; + line-height: 1.5; + color: var(--text-primary-color); + white-space: pre; +} + +.chart.sql-block code { + color: var(--duckdb-accent); + font-weight: 500; +} + + +/* === Links, Typography, and Consistency === */ +a { + color: var(--duckdb-accent); + text-decoration: underline; + transition: color 0.3s; +} + +a:hover { + color: black; +} + +strong { + font-weight: 600; } -.custom-tooltip:hover .tooltip-text { - visibility: visible; +/* === Dark Mode Support === */ +@media (prefers-color-scheme: dark) { + :root { + --text-primary-color: #e6e6e6; + --text-secondary-color: #b3b3b3; + --doc-codebox-border-color: #2a2a2a; + --doc-codebox-background-color: #1e1e1e; + --card-bg: #111; + } + body { + background: #0b0b0b; + } + thead { + background-color: var(--duckdb-accent); + } + tbody tr:hover { + background: #222; + } + + /* Fix tree node text visibility in dark mode */ + .tf-nc .node-body, + .tf-nc .node-body p, + .tf-nc .node-details { + color: #1a1a1a !important; + } + + /* Fix metric title visibility in dark mode */ + .chart .metric-title { + color: #b3b3b3; + } } -""" +""" # noqa: W293 class NodeTiming: # noqa: D101 - def __init__(self, phase: str, time: float) -> None: # noqa: D107 + def __init__(self, phase: str, time: float, depth: int) -> None: # noqa: D107 self.phase = phase self.time = time + self.depth = depth # percentage is determined later. self.percentage = 0 @@ -88,7 +284,7 @@ def calculate_percentage(self, total_time: float) -> None: # noqa: D102 def combine_timing(self, r: "NodeTiming") -> "NodeTiming": # noqa: D102 # TODO: can only add timings for same-phase nodes # noqa: TD002, TD003 total_time = self.time + r.time - return NodeTiming(self.phase, total_time) + return NodeTiming(self.phase, total_time, self.depth) class AllTimings: # noqa: D101 @@ -124,200 +320,319 @@ def open_utf8(fpath: str, flags: str) -> object: # noqa: D103 return Path(fpath).open(mode=flags, encoding="utf8") -def get_child_timings(top_node: object, query_timings: object) -> str: # noqa: D103 - node_timing = NodeTiming(top_node["operator_type"], float(top_node["operator_timing"])) - query_timings.add_node_timing(node_timing) - for child in top_node["children"]: - get_child_timings(child, query_timings) - - -def get_pink_shade_hex(fraction: float) -> str: # noqa: D103 - fraction = max(0, min(1, fraction)) - - # Define the RGB values for very light pink (almost white) and dark pink - light_pink = (255, 250, 250) # Very light pink - dark_pink = (255, 20, 147) # Dark pink - - # Calculate the RGB values for the given fraction - r = int(light_pink[0] + (dark_pink[0] - light_pink[0]) * fraction) - g = int(light_pink[1] + (dark_pink[1] - light_pink[1]) * fraction) - b = int(light_pink[2] + (dark_pink[2] - light_pink[2]) * fraction) - - # Return as hexadecimal color code - return f"#{r:02x}{g:02x}{b:02x}" - - -def get_node_body(name: str, result: str, cpu_time: float, card: int, est: int, width: int, extra_info: str) -> str: # noqa: D103 - node_style = f"background-color: {get_pink_shade_hex(float(result) / cpu_time)};" - - body = f'' - body += '
' - new_name = "BRIDGE" if (name == "INVALID") else name.replace("_", " ") - formatted_num = f"{float(result):.4f}" - body += f"

{new_name}

time: {formatted_num} seconds

" - body += f' {extra_info} ' - if width > 0: - body += f"

cardinality: {card}

" - body += f"

estimate: {est}

" - body += f"

width: {width} bytes

" - # TODO: Expand on timing. Usually available from a detailed profiling # noqa: TD002, TD003 - body += "
" - body += "
" - return body - - -def generate_tree_recursive(json_graph: object, cpu_time: float) -> str: # noqa: D103 - node_prefix_html = "
  • " - node_suffix_html = "
  • " - - extra_info = "" - estimate = 0 - for key in json_graph["extra_info"]: - value = json_graph["extra_info"][key] - if key == "Estimated Cardinality": - estimate = int(value) +class ProfilingInfo: # noqa: D101 + def __init__(self, conn: Optional[DuckDBPyConnection] = None, from_file: Optional[str] = None) -> None: # noqa: D107 + self.conn = conn + self.from_file = from_file + + def to_json(self) -> str: # noqa: D102 + if self.from_file is not None: + with open_utf8(self.from_file, "r") as f: + return f.read() + + return self.conn.get_profiling_information(format="json") + + def to_pydict(self) -> dict: # noqa: D102 + return json.loads(self.to_json()) + + def to_html(self, output_file: str = "profile.html") -> str: # noqa: D102 + profiling_info_text = self.to_json() + html_output = self._translate_json_to_html(input_text=profiling_info_text, output_file=output_file) + return html_output + + def _get_child_timings(self, top_node: object, query_timings: object, depth: int = 0) -> str: + node_timing = NodeTiming(top_node["operator_type"], float(top_node["operator_timing"]), depth) + query_timings.add_node_timing(node_timing) + for child in top_node["children"]: + self._get_child_timings(child, query_timings, depth + 1) + + @staticmethod + def _get_f7fff0_shade_hex(fraction: float) -> str: + """Returns a shade between very light (#f7fff0) and a slightly darker green-yellow, + depending on the fraction (0..1). + """ # noqa: D205 + fraction = max(0, min(1, fraction)) + + # Define RGB for light and dark end + light_color = (247, 255, 240) # #f7fff0 + dark_color = (200, 255, 150) # slightly darker/more saturated green-yellow + + # Interpolate RGB channels + r = int(light_color[0] + (dark_color[0] - light_color[0]) * fraction) + g = int(light_color[1] + (dark_color[1] - light_color[1]) * fraction) + b = int(light_color[2] + (dark_color[2] - light_color[2]) * fraction) + + return f"#{r:02x}{g:02x}{b:02x}" + + def _get_node_body( + self, name: str, result: str, cpu_time: float, card: int, est: int, result_size: int, extra_info: str + ) -> str: + """Generate the HTML body for a single node in the tree.""" + node_style = f"background-color: {self._get_f7fff0_shade_hex(float(result) / cpu_time)};" + new_name = "BRIDGE" if (name == "INVALID") else name.replace("_", " ") + formatted_num = f"{float(result):.4f}" + + body = f'' + body += '
    ' + body += f"

    {new_name}

    " + if result_size > 0: + body += f"

    time: {formatted_num}s

    " + body += f"

    cardinality: {card}

    " + body += f"

    estimate: {est}

    " + body += f"

    result size: {result_size} bytes

    " + body += "
    " + body += "Extra info" + body += '
    ' + body += f"

    {extra_info}

    " + # TODO: Expand on timing. Usually available from a detailed profiling # noqa: TD002, TD003 + body += "
    " + body += "
    " + body += "
    " + body += "
    " + return body + + def _generate_tree_recursive(self, json_graph: object, cpu_time: float) -> str: + node_prefix_html = "
  • " + node_suffix_html = "
  • " + + extra_info = "" + estimate = 0 + for key in json_graph["extra_info"]: + value = json_graph["extra_info"][key] + if key == "Estimated Cardinality": + estimate = int(value) + else: + extra_info += f"{key}: {value}
    " + + # get rid of some typically long names + extra_info = re.sub(r"__internal_\s*", "__", extra_info) + extra_info = re.sub(r"compress_integral\s*", "compress", extra_info) + + node_body = self._get_node_body( + json_graph["operator_type"], + json_graph["operator_timing"], + cpu_time, + json_graph["operator_cardinality"], + estimate, + json_graph["result_set_size"], + re.sub(r",\s*", ", ", extra_info), + ) + + children_html = "" + if len(json_graph["children"]) >= 1: + children_html += "" + return node_prefix_html + node_body + children_html + node_suffix_html + + # For generating the table in the top left with expandable phases + def _generate_timing_html(self, graph_json: object, query_timings: object) -> object: + """Generates timing HTML table with expandable phases.""" + json_graph = json.loads(graph_json) + self._gather_timing_information(json_graph, query_timings) + table_head = """ + + + + + + + + """ + + table_body = "" + table_end = "
    PhaseTime (s)Percentage
    " + + execution_time = query_timings.get_sum_of_all_timings() + + all_phases = query_timings.get_phases() + query_timings.add_node_timing(NodeTiming("Execution Time (CPU)", execution_time, None)) + all_phases = ["Execution Time (CPU)", *all_phases] + + for phase in all_phases: + summarized_phase = query_timings.get_summary_phase_timings(phase) + summarized_phase.calculate_percentage(execution_time) + phase_column = f"{phase}" if phase == "Execution Time (CPU)" else phase + + # Main phase row + table_body += f""" + + {phase_column} + {round(summarized_phase.time, 8)} + {str(summarized_phase.percentage * 100)[:6]}% + + """ + + # Add expandable details for individual nodes (except for Execution Time) + if phase != "Execution Time (CPU)": + phase_timings = query_timings.get_phase_timings(phase) + if len(phase_timings) > 1: # Only show details if there are multiple nodes + table_body += f""" + + +
    + + Show {len(phase_timings)} nodes + + + + """ + for node_timing in sorted(phase_timings, key=lambda x: x.time, reverse=True): + node_timing.calculate_percentage(execution_time) + depth_indent = " " * (node_timing.depth * 4) + table_body += f""" + + + + + + """ # noqa: E501 + table_body += """ + +
    {depth_indent}↳ Depth {node_timing.depth}{round(node_timing.time, 8)}{str(node_timing.percentage * 100)[:6]}%
    +
    + + + """ + + table_body += table_end + return table_head + table_body + + @staticmethod + def _generate_metric_grid_html(graph_json: str) -> str: + json_graph = json.loads(graph_json) + metrics = { + "Execution Time (s)": f"{float(json_graph.get('latency', 'N/A')):.4f}", + "Total GB Read": f"{float(json_graph.get('total_bytes_read', 'N/A')) / (1024**3):.4f}" + if json_graph.get("total_bytes_read", "N/A") != "N/A" + else "N/A", + "Total GB Written": f"{float(json_graph.get('total_bytes_written', 'N/A')) / (1024**3):.4f}" + if json_graph.get("total_bytes_written", "N/A") != "N/A" + else "N/A", + "Peak Memory (GB)": f"{float(json_graph.get('system_peak_buffer_memory', 'N/A')) / (1024**3):.4f}" + if json_graph.get("system_peak_buffer_memory", "N/A") != "N/A" + else "N/A", + "Rows Scanned": f"{json_graph.get('cumulative_rows_scanned', 'N/A'):,}" + if json_graph.get("cumulative_rows_scanned", "N/A") != "N/A" + else "N/A", + } + metric_grid_html = """
    """ + for key in metrics: + metric_grid_html += f""" +
    +
    {key}
    +
    {metrics[key]}
    +
    + """ + metric_grid_html += "
    " + return metric_grid_html + + @staticmethod + def _generate_sql_query_html(graph_json: str) -> str: + json_graph = json.loads(graph_json) + sql_query = json_graph.get("query_name", "N/A") + sql_html = f""" +
    SQL Query +
    +
    
    +    {sql_query}
    +            
    +
    +

    + """ + return sql_html + + def _generate_tree_html(self, graph_json: object) -> str: + json_graph = json.loads(graph_json) + cpu_time = float(json_graph["cpu_time"]) + tree_prefix = '
    \n
    " + # first level of json is general overview + # TODO: make sure json output first level always has only 1 level # noqa: TD002, TD003 + tree_body = self._generate_tree_recursive(json_graph["children"][0], cpu_time) + return tree_prefix + tree_body + tree_suffix + + def _generate_ipython(self, json_input: str) -> str: + from IPython.core.display import HTML + + html_output = self._generate_html(json_input, False) + + return HTML( + ( + '\n ${CSS}\n ${LIBRARIES}\n
    \n ${CHART_SCRIPT}\n ' + ) + .replace("${CSS}", html_output["css"]) + .replace("${CHART_SCRIPT}", html_output["chart_script"]) + .replace("${LIBRARIES}", html_output["libraries"]) + ) + + @staticmethod + def _generate_style_html(graph_json: str, include_meta_info: bool) -> None: # noqa: FBT001 + treeflex_css = '\n' + libraries = '\n' # noqa: E501 + return {"treeflex_css": treeflex_css, "duckdb_css": qgraph_css, "libraries": libraries, "chart_script": ""} + + def _gather_timing_information(self, json: str, query_timings: object) -> None: + # add up all of the times + # measure each time as a percentage of the total time. + # then you can return a list of [phase, time, percentage] + self._get_child_timings(json["children"][0], query_timings) + + def _translate_json_to_html( + self, input_file: Optional[str] = None, input_text: Optional[str] = None, output_file: str = "profile.html" + ) -> None: + query_timings = AllTimings() + if input_text is not None: + text = input_text + elif input_file is not None: + with open_utf8(input_file, "r") as f: + text = f.read() else: - extra_info += f"{key}: {value}
    " - cardinality = json_graph["operator_cardinality"] - width = int(json_graph["result_set_size"] / max(1, cardinality)) - - # get rid of some typically long names - extra_info = re.sub(r"__internal_\s*", "__", extra_info) - extra_info = re.sub(r"compress_integral\s*", "compress", extra_info) - - node_body = get_node_body( - json_graph["operator_type"], - json_graph["operator_timing"], - cpu_time, - cardinality, - estimate, - width, - re.sub(r",\s*", ", ", extra_info), - ) - - children_html = "" - if len(json_graph["children"]) >= 1: - children_html += "" - return node_prefix_html + node_body + children_html + node_suffix_html - - -# For generating the table in the top left. -def generate_timing_html(graph_json: object, query_timings: object) -> object: # noqa: D103 - json_graph = json.loads(graph_json) - gather_timing_information(json_graph, query_timings) - total_time = float(json_graph.get("operator_timing") or json_graph.get("latency")) - table_head = """ - - - - - - - - """ - - table_body = "" - table_end = "
    PhaseTimePercentage
    " - - execution_time = query_timings.get_sum_of_all_timings() - - all_phases = query_timings.get_phases() - query_timings.add_node_timing(NodeTiming("TOTAL TIME", total_time)) - query_timings.add_node_timing(NodeTiming("Execution Time", execution_time)) - all_phases = ["TOTAL TIME", "Execution Time", *all_phases] - for phase in all_phases: - summarized_phase = query_timings.get_summary_phase_timings(phase) - summarized_phase.calculate_percentage(total_time) - phase_column = f"{phase}" if phase == "TOTAL TIME" or phase == "Execution Time" else phase - table_body += f""" - - {phase_column} - {summarized_phase.time} - {str(summarized_phase.percentage * 100)[:6]}% - -""" - table_body += table_end - return table_head + table_body - - -def generate_tree_html(graph_json: object) -> str: # noqa: D103 - json_graph = json.loads(graph_json) - cpu_time = float(json_graph["cpu_time"]) - tree_prefix = '
    \n
    " - # first level of json is general overview - # TODO: make sure json output first level always has only 1 level # noqa: TD002, TD003 - tree_body = generate_tree_recursive(json_graph["children"][0], cpu_time) - return tree_prefix + tree_body + tree_suffix - - -def generate_ipython(json_input: str) -> str: # noqa: D103 - from IPython.core.display import HTML - - html_output = generate_html(json_input, False) # noqa: F821 - - return HTML( - ('\n ${CSS}\n ${LIBRARIES}\n
    \n ${CHART_SCRIPT}\n ') - .replace("${CSS}", html_output["css"]) - .replace("${CHART_SCRIPT}", html_output["chart_script"]) - .replace("${LIBRARIES}", html_output["libraries"]) - ) - - -def generate_style_html(graph_json: str, include_meta_info: bool) -> None: # noqa: D103, FBT001 - treeflex_css = '\n' - css = "\n" - return {"treeflex_css": treeflex_css, "duckdb_css": css, "libraries": "", "chart_script": ""} - - -def gather_timing_information(json: str, query_timings: object) -> None: # noqa: D103 - # add up all of the times - # measure each time as a percentage of the total time. - # then you can return a list of [phase, time, percentage] - get_child_timings(json["children"][0], query_timings) - - -def translate_json_to_html(input_file: str, output_file: str) -> None: # noqa: D103 - query_timings = AllTimings() - with open_utf8(input_file, "r") as f: - text = f.read() - - html_output = generate_style_html(text, True) - timing_table = generate_timing_html(text, query_timings) - tree_output = generate_tree_html(text) - - # finally create and write the html - with open_utf8(output_file, "w+") as f: - html = """ - - - - - Query Profile Graph for Query - ${TREEFLEX_CSS} - - - -
    -
    - ${TIMING_TABLE} -
    - ${TREE} - - -""" - html = html.replace("${TREEFLEX_CSS}", html_output["treeflex_css"]) - html = html.replace("${DUCKDB_CSS}", html_output["duckdb_css"]) - html = html.replace("${TIMING_TABLE}", timing_table) - html = html.replace("${TREE}", tree_output) - f.write(html) + print("please provide either input file or input text") + exit(1) + html_output = self._generate_style_html(text, True) + highlight_metric_grid = self._generate_metric_grid_html(text) + timing_table = self._generate_timing_html(text, query_timings) + tree_output = self._generate_tree_html(text) + sql_query_html = self._generate_sql_query_html(text) + # finally create and write the html + with open_utf8(output_file, "w+") as f: + html = """ + + + + + Query Profile Graph for Query + ${TREEFLEX_CSS} + + + +
    +
    + DuckDB Logo +

    Query Profile Graph

    +
    +
    + ${METRIC_GRID} +
    +
    + ${SQL_QUERY} + ${TIMING_TABLE} +
    + ${TREE} + + + """ # noqa: E501 + html = html.replace("${TREEFLEX_CSS}", html_output["treeflex_css"]) + html = html.replace("${DUCKDB_CSS}", html_output["duckdb_css"]) + html = html.replace("${METRIC_GRID}", highlight_metric_grid) + html = html.replace("${SQL_QUERY}", sql_query_html) + html = html.replace("${TIMING_TABLE}", timing_table) + html = html.replace("${TREE}", tree_output) + f.write(html) def main() -> None: # noqa: D103 @@ -326,7 +641,7 @@ def main() -> None: # noqa: D103 description="""Given a json profile output, generate a html file showing the query graph and timings of operators""", ) - parser.add_argument("profile_input", help="profile input in json") + parser.add_argument("--profile_input", help="profile input in json") parser.add_argument("--out", required=False, default=False) parser.add_argument("--open", required=False, action="store_true", default=True) args = parser.parse_args() @@ -347,8 +662,8 @@ def main() -> None: # noqa: D103 exit(1) open_output = args.open - - translate_json_to_html(input, output) + profiling_info = ProfilingInfo(from_file=input) + profiling_info.to_html(output_file=output) if open_output: webbrowser.open(f"file://{Path(output).resolve()}", new=2) diff --git a/scripts/connection_methods.json b/scripts/connection_methods.json index a87b992f..3b02a9b1 100644 --- a/scripts/connection_methods.json +++ b/scripts/connection_methods.json @@ -1093,5 +1093,30 @@ } ], "return": "None" + }, + { + "name": "get_profiling_information", + "function": "GetProfilingInformation", + "docs": "Get profiling information for a query", + "args": [ + { + "name": "format", + "default": "JSON", + "type": "Optional[str]" + } + ], + "return": "str" + }, + { + "name": "enable_profiling", + "function": "EnableProfiling", + "docs": "Enable profiling for a connection", + "return": "None" + }, + { + "name": "disable_profiling", + "function": "DisableProfiling", + "docs": "Disable profiling for a connection", + "return": "None" } ] diff --git a/scripts/generate_connection_stubs.py b/scripts/generate_connection_stubs.py index d542a047..76c19b36 100644 --- a/scripts/generate_connection_stubs.py +++ b/scripts/generate_connection_stubs.py @@ -5,7 +5,7 @@ os.chdir(Path(__file__).parent) JSON_PATH = "connection_methods.json" -DUCKDB_STUBS_FILE = Path("..") / "duckdb" / "__init__.pyi" +DUCKDB_STUBS_FILE = Path("..") / "_duckdb-stubs" / "__init__.pyi" START_MARKER = " # START OF CONNECTION METHODS" END_MARKER = " # END OF CONNECTION METHODS" diff --git a/src/duckdb_py/duckdb_python.cpp b/src/duckdb_py/duckdb_python.cpp index 1dd3ba17..fedbec5f 100644 --- a/src/duckdb_py/duckdb_python.cpp +++ b/src/duckdb_py/duckdb_python.cpp @@ -124,6 +124,34 @@ static void InitializeConnectionMethods(py::module_ &m) { }, "Check if a filesystem with the provided name is currently registered", py::arg("name"), py::kw_only(), py::arg("connection") = py::none()); + m.def( + "get_profiling_information", + [](const py::str &format, shared_ptr conn = nullptr) { + if (!conn) { + conn = DuckDBPyConnection::DefaultConnection(); + } + return conn->GetProfilingInformation(format); + }, + "Get profiling information from a query", py::kw_only(), py::arg("format") = "json", + py::arg("connection") = py::none()); + m.def( + "enable_profiling", + [](shared_ptr conn = nullptr) { + if (!conn) { + conn = DuckDBPyConnection::DefaultConnection(); + } + return conn->EnableProfiling(); + }, + "Enable profiling for the current connection", py::kw_only(), py::arg("connection") = py::none()); + m.def( + "disable_profiling", + [](shared_ptr conn = nullptr) { + if (!conn) { + conn = DuckDBPyConnection::DefaultConnection(); + } + return conn->DisableProfiling(); + }, + "Disable profiling for the current connection", py::kw_only(), py::arg("connection") = py::none()); m.def( "create_function", [](const string &name, const py::function &udf, const py::object &arguments = py::none(), diff --git a/src/duckdb_py/include/duckdb_python/pyconnection/pyconnection.hpp b/src/duckdb_py/include/duckdb_python/pyconnection/pyconnection.hpp index 48ee055e..8117eda9 100644 --- a/src/duckdb_py/include/duckdb_python/pyconnection/pyconnection.hpp +++ b/src/duckdb_py/include/duckdb_python/pyconnection/pyconnection.hpp @@ -337,6 +337,11 @@ struct DuckDBPyConnection : public enable_shared_from_this { py::list ListFilesystems(); bool FileSystemIsRegistered(const string &name); + // Profiling info + py::str GetProfilingInformation(const py::str &format = "json"); + void EnableProfiling(); + void DisableProfiling(); + //! Default connection to an in-memory database static DefaultConnectionHolder default_connection; //! Caches and provides an interface to get frequently used modules+subtypes diff --git a/src/duckdb_py/pyconnection.cpp b/src/duckdb_py/pyconnection.cpp index 11a7ea9d..7a454b29 100644 --- a/src/duckdb_py/pyconnection.cpp +++ b/src/duckdb_py/pyconnection.cpp @@ -3,6 +3,7 @@ #include "duckdb/catalog/default/default_types.hpp" #include "duckdb/common/arrow/arrow.hpp" #include "duckdb/common/enums/file_compression_type.hpp" +#include "duckdb/common/enums/profiler_format.hpp" #include "duckdb/common/printer.hpp" #include "duckdb/common/types.hpp" #include "duckdb/common/types/vector.hpp" @@ -285,6 +286,10 @@ static void InitializeConnectionMethods(py::class_