Skip to content

Commit 35a8cf0

Browse files
committed
make expected output optional
fix case where updating a record with expected_output to no expected_output (null)
1 parent 4637fdd commit 35a8cf0

9 files changed

+321
-18
lines changed

ddtrace/llmobs/_writer.py

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -297,6 +297,7 @@ class LLMObsExperimentsClient(BaseLLMObsWriter):
297297
EVP_SUBDOMAIN_HEADER_VALUE = EXP_SUBDOMAIN_NAME
298298
AGENTLESS_BASE_URL = AGENTLESS_EXP_BASE_URL
299299
ENDPOINT = ""
300+
TIMEOUT = 5.0
300301

301302
def request(self, method: str, path: str, body: JSONType = None) -> Response:
302303
headers = {
@@ -308,7 +309,7 @@ def request(self, method: str, path: str, body: JSONType = None) -> Response:
308309
headers[EVP_SUBDOMAIN_HEADER_NAME] = self.EVP_SUBDOMAIN_HEADER_VALUE
309310

310311
encoded_body = json.dumps(body).encode("utf-8") if body else b""
311-
conn = get_connection(self._intake)
312+
conn = get_connection(url=self._intake, timeout=self.TIMEOUT)
312313
try:
313314
url = self._intake + self._endpoint + path
314315
logger.debug("requesting %s", url)
@@ -363,15 +364,19 @@ def dataset_batch_update(
363364
irs: JSONType = [
364365
{
365366
"input": cast(Dict[str, JSONType], r["input_data"]),
366-
"expected_output": r["expected_output"],
367+
"expected_output": r.get("expected_output", ""),
367368
"metadata": r.get("metadata", {}),
368369
}
369370
for r in insert_records
370371
]
371372
urs: JSONType = [
372373
{
373374
"input": cast(Dict[str, JSONType], r["input_data"]),
374-
"expected_output": r["expected_output"],
375+
# if we default to None, the API will treat None (null in JSON)
376+
# as if the field was not in the JSON, and treat it as not being updated,
377+
# despite going from expected_output having a value to no value being a
378+
# valid use case
379+
"expected_output": r.get("expected_output", ""),
375380
"metadata": r.get("metadata", {}),
376381
"id": r["record_id"],
377382
}
@@ -428,7 +433,7 @@ def dataset_get_with_records(self, name: str) -> Dataset:
428433
{
429434
"record_id": record["id"],
430435
"input_data": attrs["input"],
431-
"expected_output": attrs["expected_output"],
436+
"expected_output": attrs.get("expected_output", ""),
432437
"metadata": attrs.get("metadata", {}),
433438
}
434439
)

setup.cfg

Lines changed: 0 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -6,19 +6,5 @@ skip = *.json,*.h,*.cpp,*.c,.riot,.tox,.mypy_cache,.git,*ddtrace/vendor,tests/co
66
exclude-file = .codespellignorelines
77
ignore-words-list = asend,dne,fo,medias,ment,nin,ot,setttings,statics,ba,spawnve,doas
88

9-
# DEV: We use `conftest.py` as a local pytest plugin to configure hooks for collection
10-
[tool:pytest]
11-
# --cov-report is intentionally empty else pytest-cov will default to generating a report
12-
addopts =
13-
--cov=ddtrace/
14-
--cov=tests/
15-
--cov-append
16-
--cov-report=
17-
--durations=10
18-
--junitxml=test-results/junit.xml
19-
# DEV: The default is `test_*\.py` which will miss `test.py` files
20-
python_files = test*\.py
21-
asyncio_mode = auto
22-
239
[flake8]
2410
max-line-length = 120
Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
interactions:
2+
- request:
3+
body: '{"data": {"type": "datasets", "id": "0f9a74c4-b36e-4fd0-ae2a-fb3cb929d8e5",
4+
"attributes": {"insert_records": [], "update_records": [{"input": {"prompt":
5+
"What is the capital of Germany?"}, "expected_output": "", "metadata": {}, "id":
6+
"8a7b812f-c7c1-40dc-a9b2-673f41e756b0"}], "delete_records": []}}}'
7+
headers:
8+
Accept:
9+
- '*/*'
10+
? !!python/object/apply:multidict._multidict.istr
11+
- Accept-Encoding
12+
: - identity
13+
Connection:
14+
- keep-alive
15+
Content-Length:
16+
- '299'
17+
? !!python/object/apply:multidict._multidict.istr
18+
- Content-Type
19+
: - application/json
20+
User-Agent:
21+
- python-requests/2.32.3
22+
method: POST
23+
uri: https://api.datadoghq.com/api/unstable/llm-obs/v1/datasets/0f9a74c4-b36e-4fd0-ae2a-fb3cb929d8e5/batch_update
24+
response:
25+
body:
26+
string: '{"data":[{"id":"fc052f56-4d05-43e0-8199-e5fa7397f602","type":"datasets","attributes":{"author":{"id":"de473b30-eb9f-11e9-a77a-c7405862b8bd"},"created_at":"2025-08-01T00:59:03.303054668Z","dataset_id":"0f9a74c4-b36e-4fd0-ae2a-fb3cb929d8e5","expected_output":"","input":{"prompt":"What
27+
is the capital of Germany?"},"metadata":{},"updated_at":"2025-08-01T00:59:03.303054742Z","version":2}}]}'
28+
headers:
29+
content-length:
30+
- '388'
31+
content-security-policy:
32+
- frame-ancestors 'self'; report-uri https://logs.browser-intake-datadoghq.com/api/v2/logs?dd-api-key=pube4f163c23bbf91c16b8f57f56af9fc58&dd-evp-origin=content-security-policy&ddsource=csp-report&ddtags=site%3Adatadoghq.com
33+
content-type:
34+
- application/vnd.api+json
35+
date:
36+
- Fri, 01 Aug 2025 00:59:03 GMT
37+
strict-transport-security:
38+
- max-age=31536000; includeSubDomains; preload
39+
vary:
40+
- Accept-Encoding
41+
x-content-type-options:
42+
- nosniff
43+
x-frame-options:
44+
- SAMEORIGIN
45+
status:
46+
code: 200
47+
message: OK
48+
version: 1
Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
interactions:
2+
- request:
3+
body: '{"data": {"type": "datasets", "id": "0f9a74c4-b36e-4fd0-ae2a-fb3cb929d8e5",
4+
"attributes": {"insert_records": [{"input": {"prompt": "What is the capital
5+
of France?"}, "expected_output": {"answer": "Paris"}, "metadata": {}}], "update_records":
6+
[], "delete_records": []}}}'
7+
headers:
8+
Accept:
9+
- '*/*'
10+
? !!python/object/apply:multidict._multidict.istr
11+
- Accept-Encoding
12+
: - identity
13+
Connection:
14+
- keep-alive
15+
Content-Length:
16+
- '269'
17+
? !!python/object/apply:multidict._multidict.istr
18+
- Content-Type
19+
: - application/json
20+
User-Agent:
21+
- python-requests/2.32.3
22+
method: POST
23+
uri: https://api.datadoghq.com/api/unstable/llm-obs/v1/datasets/0f9a74c4-b36e-4fd0-ae2a-fb3cb929d8e5/batch_update
24+
response:
25+
body:
26+
string: '{"data":[{"id":"8a7b812f-c7c1-40dc-a9b2-673f41e756b0","type":"datasets","attributes":{"author":{"id":"de473b30-eb9f-11e9-a77a-c7405862b8bd"},"created_at":"2025-08-01T00:59:00.450779819Z","dataset_id":"0f9a74c4-b36e-4fd0-ae2a-fb3cb929d8e5","expected_output":{"answer":"Paris"},"input":{"prompt":"What
27+
is the capital of France?"},"metadata":{},"updated_at":"2025-08-01T00:59:00.450779819Z","version":1}}]}'
28+
headers:
29+
content-length:
30+
- '403'
31+
content-security-policy:
32+
- frame-ancestors 'self'; report-uri https://logs.browser-intake-datadoghq.com/api/v2/logs?dd-api-key=pube4f163c23bbf91c16b8f57f56af9fc58&dd-evp-origin=content-security-policy&ddsource=csp-report&ddtags=site%3Adatadoghq.com
33+
content-type:
34+
- application/vnd.api+json
35+
date:
36+
- Fri, 01 Aug 2025 00:59:00 GMT
37+
strict-transport-security:
38+
- max-age=31536000; includeSubDomains; preload
39+
vary:
40+
- Accept-Encoding
41+
x-content-type-options:
42+
- nosniff
43+
x-frame-options:
44+
- SAMEORIGIN
45+
status:
46+
code: 200
47+
message: OK
48+
version: 1
Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
interactions:
2+
- request:
3+
body: null
4+
headers:
5+
Accept:
6+
- '*/*'
7+
? !!python/object/apply:multidict._multidict.istr
8+
- Accept-Encoding
9+
: - identity
10+
Connection:
11+
- keep-alive
12+
? !!python/object/apply:multidict._multidict.istr
13+
- Content-Length
14+
: - '0'
15+
? !!python/object/apply:multidict._multidict.istr
16+
- Content-Type
17+
: - application/json
18+
User-Agent:
19+
- python-requests/2.32.3
20+
method: GET
21+
uri: https://api.datadoghq.com/api/unstable/llm-obs/v1/datasets/0f9a74c4-b36e-4fd0-ae2a-fb3cb929d8e5/records
22+
response:
23+
body:
24+
string: '{"data":[{"id":"fc052f56-4d05-43e0-8199-e5fa7397f602","type":"datasets","attributes":{"author":{"id":"de473b30-eb9f-11e9-a77a-c7405862b8bd"},"created_at":"2025-08-01T00:59:03.303054Z","dataset_id":"0f9a74c4-b36e-4fd0-ae2a-fb3cb929d8e5","expected_output":"","input":{"prompt":"What
25+
is the capital of Germany?"},"metadata":{},"updated_at":"2025-08-01T00:59:03.303054Z"}}],"meta":{"after":""}}'
26+
headers:
27+
content-length:
28+
- '390'
29+
content-security-policy:
30+
- frame-ancestors 'self'; report-uri https://logs.browser-intake-datadoghq.com/api/v2/logs?dd-api-key=pube4f163c23bbf91c16b8f57f56af9fc58&dd-evp-origin=content-security-policy&ddsource=csp-report&ddtags=site%3Adatadoghq.com
31+
content-type:
32+
- application/vnd.api+json
33+
date:
34+
- Fri, 01 Aug 2025 00:59:07 GMT
35+
strict-transport-security:
36+
- max-age=31536000; includeSubDomains; preload
37+
vary:
38+
- Accept-Encoding
39+
x-content-type-options:
40+
- nosniff
41+
x-frame-options:
42+
- SAMEORIGIN
43+
status:
44+
code: 200
45+
message: OK
46+
version: 1
Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
interactions:
2+
- request:
3+
body: '{"data": {"type": "datasets", "attributes": {"type": "soft", "dataset_ids":
4+
["0f9a74c4-b36e-4fd0-ae2a-fb3cb929d8e5"]}}}'
5+
headers:
6+
Accept:
7+
- '*/*'
8+
? !!python/object/apply:multidict._multidict.istr
9+
- Accept-Encoding
10+
: - identity
11+
Connection:
12+
- keep-alive
13+
Content-Length:
14+
- '119'
15+
? !!python/object/apply:multidict._multidict.istr
16+
- Content-Type
17+
: - application/json
18+
User-Agent:
19+
- python-requests/2.32.3
20+
method: POST
21+
uri: https://api.datadoghq.com/api/unstable/llm-obs/v1/datasets/delete
22+
response:
23+
body:
24+
string: '{"data":[{"id":"0f9a74c4-b36e-4fd0-ae2a-fb3cb929d8e5","type":"datasets","attributes":{"author":{"id":"de473b30-eb9f-11e9-a77a-c7405862b8bd"},"created_at":"2025-08-01T00:59:00.33008Z","current_version":2,"deleted_at":"2025-08-01T00:59:07.274191Z","description":"A
25+
test dataset","name":"test-dataset-test_dataset_modify_single_record_on_optional_field[test_dataset_records0]","updated_at":"2025-08-01T00:59:03.310155Z"}}]}'
26+
headers:
27+
content-length:
28+
- '420'
29+
content-security-policy:
30+
- frame-ancestors 'self'; report-uri https://logs.browser-intake-datadoghq.com/api/v2/logs?dd-api-key=pube4f163c23bbf91c16b8f57f56af9fc58&dd-evp-origin=content-security-policy&ddsource=csp-report&ddtags=site%3Adatadoghq.com
31+
content-type:
32+
- application/vnd.api+json
33+
date:
34+
- Fri, 01 Aug 2025 00:59:07 GMT
35+
strict-transport-security:
36+
- max-age=31536000; includeSubDomains; preload
37+
vary:
38+
- Accept-Encoding
39+
x-content-type-options:
40+
- nosniff
41+
x-frame-options:
42+
- SAMEORIGIN
43+
status:
44+
code: 200
45+
message: OK
46+
version: 1
Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
interactions:
2+
- request:
3+
body: null
4+
headers:
5+
Accept:
6+
- '*/*'
7+
? !!python/object/apply:multidict._multidict.istr
8+
- Accept-Encoding
9+
: - identity
10+
Connection:
11+
- keep-alive
12+
? !!python/object/apply:multidict._multidict.istr
13+
- Content-Length
14+
: - '0'
15+
? !!python/object/apply:multidict._multidict.istr
16+
- Content-Type
17+
: - application/json
18+
User-Agent:
19+
- python-requests/2.32.3
20+
method: GET
21+
uri: https://api.datadoghq.com/api/unstable/llm-obs/v1/datasets?filter%5Bname%5D=test-dataset-test_dataset_modify_single_record_on_optional_field%5Btest_dataset_records0%5D
22+
response:
23+
body:
24+
string: '{"data":[{"id":"0f9a74c4-b36e-4fd0-ae2a-fb3cb929d8e5","type":"datasets","attributes":{"author":{"id":"de473b30-eb9f-11e9-a77a-c7405862b8bd"},"created_at":"2025-08-01T00:59:00.33008Z","current_version":2,"description":"A
25+
test dataset","name":"test-dataset-test_dataset_modify_single_record_on_optional_field[test_dataset_records0]","updated_at":"2025-08-01T00:59:03.310155Z"}}],"meta":{"after":""}}'
26+
headers:
27+
content-length:
28+
- '397'
29+
content-security-policy:
30+
- frame-ancestors 'self'; report-uri https://logs.browser-intake-datadoghq.com/api/v2/logs?dd-api-key=pube4f163c23bbf91c16b8f57f56af9fc58&dd-evp-origin=content-security-policy&ddsource=csp-report&ddtags=site%3Adatadoghq.com
31+
content-type:
32+
- application/vnd.api+json
33+
date:
34+
- Fri, 01 Aug 2025 00:59:05 GMT
35+
strict-transport-security:
36+
- max-age=31536000; includeSubDomains; preload
37+
vary:
38+
- Accept-Encoding
39+
x-content-type-options:
40+
- nosniff
41+
x-frame-options:
42+
- SAMEORIGIN
43+
status:
44+
code: 200
45+
message: OK
46+
version: 1
Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
interactions:
2+
- request:
3+
body: '{"data": {"type": "datasets", "attributes": {"name": "test-dataset-test_dataset_modify_single_record_on_optional_field[test_dataset_records0]",
4+
"description": "A test dataset"}}}'
5+
headers:
6+
Accept:
7+
- '*/*'
8+
? !!python/object/apply:multidict._multidict.istr
9+
- Accept-Encoding
10+
: - identity
11+
Connection:
12+
- keep-alive
13+
Content-Length:
14+
- '178'
15+
? !!python/object/apply:multidict._multidict.istr
16+
- Content-Type
17+
: - application/json
18+
User-Agent:
19+
- python-requests/2.32.3
20+
method: POST
21+
uri: https://api.datadoghq.com/api/unstable/llm-obs/v1/datasets
22+
response:
23+
body:
24+
string: '{"data":{"id":"0f9a74c4-b36e-4fd0-ae2a-fb3cb929d8e5","type":"datasets","attributes":{"author":{"id":"de473b30-eb9f-11e9-a77a-c7405862b8bd"},"created_at":"2025-08-01T00:59:00.33008021Z","current_version":0,"description":"A
25+
test dataset","name":"test-dataset-test_dataset_modify_single_record_on_optional_field[test_dataset_records0]","updated_at":"2025-08-01T00:59:00.33008021Z"}}}'
26+
headers:
27+
content-length:
28+
- '380'
29+
content-security-policy:
30+
- frame-ancestors 'self'; report-uri https://logs.browser-intake-datadoghq.com/api/v2/logs?dd-api-key=pube4f163c23bbf91c16b8f57f56af9fc58&dd-evp-origin=content-security-policy&ddsource=csp-report&ddtags=site%3Adatadoghq.com
31+
content-type:
32+
- application/vnd.api+json
33+
date:
34+
- Fri, 01 Aug 2025 00:59:00 GMT
35+
strict-transport-security:
36+
- max-age=31536000; includeSubDomains; preload
37+
vary:
38+
- Accept-Encoding
39+
x-content-type-options:
40+
- nosniff
41+
x-frame-options:
42+
- SAMEORIGIN
43+
status:
44+
code: 200
45+
message: OK
46+
version: 1

tests/llmobs/test_experiments.py

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -325,6 +325,38 @@ def test_dataset_modify_single_record(llmobs, test_dataset, test_dataset_records
325325
assert ds._version == 2
326326

327327

328+
@pytest.mark.parametrize(
329+
"test_dataset_records",
330+
[[DatasetRecord(input_data={"prompt": "What is the capital of France?"}, expected_output={"answer": "Paris"})]],
331+
)
332+
def test_dataset_modify_single_record_on_optional_field(llmobs, test_dataset, test_dataset_records):
333+
assert test_dataset._version == 1
334+
335+
test_dataset.update(
336+
0,
337+
DatasetRecord(input_data={"prompt": "What is the capital of Germany?"}),
338+
)
339+
test_dataset.push()
340+
assert len(test_dataset) == 1
341+
assert test_dataset._version == 2
342+
343+
assert test_dataset[0]["input_data"] == {"prompt": "What is the capital of Germany?"}
344+
assert "expected_output" not in test_dataset[0]
345+
assert test_dataset.name == test_dataset.name
346+
assert test_dataset.description == test_dataset.description
347+
348+
# assert that the version is consistent with a new pull
349+
350+
wait_for_backend()
351+
ds = llmobs.pull_dataset(name=test_dataset.name)
352+
assert ds[0]["input_data"] == {"prompt": "What is the capital of Germany?"}
353+
assert ds[0]["expected_output"] == ""
354+
assert len(ds) == 1
355+
assert ds.name == test_dataset.name
356+
assert ds.description == test_dataset.description
357+
assert ds._version == 2
358+
359+
328360
@pytest.mark.parametrize(
329361
"test_dataset_records",
330362
[[DatasetRecord(input_data={"prompt": "What is the capital of France?"}, expected_output={"answer": "Paris"})]],

0 commit comments

Comments
 (0)