Skip to content

query tags integration #663

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 30 additions & 0 deletions examples/query_tags_example.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
import os
import databricks.sql as sql

"""
This example demonstrates how to use Query Tags.

Query Tags are key-value pairs that can be attached to SQL executions and will appear
in the system.query.history table for analytical purposes.

Format: "key1:value1,key2:value2,key3:value3"
"""

print("=== Query Tags Example ===\n")

with sql.connect(
server_hostname=os.getenv("DATABRICKS_SERVER_HOSTNAME"),
http_path=os.getenv("DATABRICKS_HTTP_PATH"),
access_token=os.getenv("DATABRICKS_TOKEN"),
session_configuration={
'QUERY_TAGS': 'team:engineering,test:query-tags',
'ansi_mode': False
}
) as connection:

with connection.cursor() as cursor:
cursor.execute("SELECT 1")
result = cursor.fetchone()
print(f" Result: {result[0]}")

print("\n=== Query Tags Example Complete ===")
1 change: 1 addition & 0 deletions src/databricks/sql/backend/sea/utils/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
"STATEMENT_TIMEOUT": "0",
"TIMEZONE": "UTC",
"USE_CACHED_RESULT": "true",
"QUERY_TAGS": "",
}


Expand Down
15 changes: 13 additions & 2 deletions tests/e2e/test_driver.py
Original file line number Diff line number Diff line change
Expand Up @@ -848,10 +848,21 @@ def test_socket_timeout_user_defined(self):
query = "select * from range(1000000000)"
cursor.execute(query)

def test_ssp_passthrough(self):
@pytest.mark.parametrize(
"extra_params",
[
{
"use_sea": False,
},
{
"use_sea": True,
},
],
)
def test_ssp_passthrough(self, extra_params):
for enable_ansi in (True, False):
with self.cursor(
{"session_configuration": {"ansi_mode": enable_ansi}}
{"session_configuration": {"ansi_mode": enable_ansi, "QUERY_TAGS": "team:marketing,dashboard:abc123,driver:python"}, **extra_params}

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should we add a test with invalid query param format & see if we are getting the required error?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In the backend design docs, it is mentioned that invalid query tags will be ignored without raising error so that actual query execution won't result in failure. Tested manually in local by passing invalid/non-parsable query tags - backend doesn't throw error

) as cursor:
cursor.execute("SET ansi_mode")
assert list(cursor.fetchone()) == ["ansi_mode", str(enable_ansi)]
Expand Down
6 changes: 6 additions & 0 deletions tests/unit/test_sea_backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -185,6 +185,7 @@ def test_session_management(self, sea_client, mock_http_client, thrift_session_i
session_config = {
"ANSI_MODE": "FALSE", # Supported parameter
"STATEMENT_TIMEOUT": "3600", # Supported parameter
"QUERY_TAGS": "team:marketing,dashboard:abc123", # Supported parameter
"unsupported_param": "value", # Unsupported parameter
}
catalog = "test_catalog"
Expand All @@ -196,6 +197,7 @@ def test_session_management(self, sea_client, mock_http_client, thrift_session_i
"session_confs": {
"ansi_mode": "FALSE",
"statement_timeout": "3600",
"query_tags": "team:marketing,dashboard:abc123",
},
"catalog": catalog,
"schema": schema,
Expand Down Expand Up @@ -641,6 +643,7 @@ def test_filter_session_configuration(self):
"TIMEZONE": "UTC",
"enable_photon": False,
"MAX_FILE_PARTITION_BYTES": 128.5,
"QUERY_TAGS": "team:engineering,project:data-pipeline",
"unsupported_param": "value",
"ANOTHER_UNSUPPORTED": 42,
}
Expand All @@ -663,6 +666,7 @@ def test_filter_session_configuration(self):
"timezone": "UTC", # string -> "UTC", key lowercased
"enable_photon": "False", # boolean False -> "False", key lowercased
"max_file_partition_bytes": "128.5", # float -> "128.5", key lowercased
"query_tags": "team:engineering,project:data-pipeline",
}

assert result == expected_result
Expand All @@ -683,12 +687,14 @@ def test_filter_session_configuration(self):
"ansi_mode": "false", # lowercase key
"STATEMENT_TIMEOUT": 7200, # uppercase key
"TiMeZoNe": "America/New_York", # mixed case key
"QueRy_TaGs": "team:marketing,test:case-insensitive",
}
result = _filter_session_configuration(case_insensitive_config)
expected_case_result = {
"ansi_mode": "false",
"statement_timeout": "7200",
"timezone": "America/New_York",
"query_tags": "team:marketing,test:case-insensitive",

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Similarly two scenarios we can add here

  1. Not passing the query tags config param
  2. Passing non parsable query tags -> "team:marketing,test"

Copy link
Author

@sreekanth-db sreekanth-db Aug 11, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

  1. _filter_session_configuration method loops through provided session config fields and filters out invalid params, so separate unit test for not passing query tags would be an overkill. For e2e tests, we already have few tests in which we don't pass query tags as part of session config.

  2. We don't do any parsing of query tags in the driver and we send the user provided value as is to the backend. As mentioned in the other comment, backend would ignore the non parsable query tags without raising the error

}
assert result == expected_case_result

Expand Down
2 changes: 1 addition & 1 deletion tests/unit/test_session.py
Original file line number Diff line number Diff line change
Expand Up @@ -154,7 +154,7 @@ def test_socket_timeout_passthrough(self, mock_client_class):

@patch("%s.session.ThriftDatabricksClient" % PACKAGE_NAME)
def test_configuration_passthrough(self, mock_client_class):
mock_session_config = Mock()
mock_session_config = {"ANSI_MODE": "FALSE", "QUERY_TAGS": "team:engineering,project:data-pipeline"}
databricks.sql.connect(
session_configuration=mock_session_config, **self.DUMMY_CONNECTION_ARGS
)
Expand Down
Loading