diff --git a/langfuse/api/resources/commons/types/base_score.py b/langfuse/api/resources/commons/types/base_score.py index 89394956..07c4966d 100644 --- a/langfuse/api/resources/commons/types/base_score.py +++ b/langfuse/api/resources/commons/types/base_score.py @@ -23,6 +23,7 @@ class BaseScore(pydantic_v1.BaseModel): alias="authorUserId", default=None ) comment: typing.Optional[str] = None + metadata: typing.Optional[typing.Any] = None config_id: typing.Optional[str] = pydantic_v1.Field(alias="configId", default=None) """ Reference a score config on a score. When set, config and score name must be equal and value must comply to optionally defined numerical range diff --git a/langfuse/api/resources/commons/types/score.py b/langfuse/api/resources/commons/types/score.py index 8eed33b7..061853f4 100644 --- a/langfuse/api/resources/commons/types/score.py +++ b/langfuse/api/resources/commons/types/score.py @@ -26,6 +26,7 @@ class Score_Numeric(pydantic_v1.BaseModel): alias="authorUserId", default=None ) comment: typing.Optional[str] = None + metadata: typing.Optional[typing.Any] = None config_id: typing.Optional[str] = pydantic_v1.Field(alias="configId", default=None) queue_id: typing.Optional[str] = pydantic_v1.Field(alias="queueId", default=None) environment: typing.Optional[str] = None @@ -84,6 +85,7 @@ class Score_Categorical(pydantic_v1.BaseModel): alias="authorUserId", default=None ) comment: typing.Optional[str] = None + metadata: typing.Optional[typing.Any] = None config_id: typing.Optional[str] = pydantic_v1.Field(alias="configId", default=None) queue_id: typing.Optional[str] = pydantic_v1.Field(alias="queueId", default=None) environment: typing.Optional[str] = None @@ -142,6 +144,7 @@ class Score_Boolean(pydantic_v1.BaseModel): alias="authorUserId", default=None ) comment: typing.Optional[str] = None + metadata: typing.Optional[typing.Any] = None config_id: typing.Optional[str] = pydantic_v1.Field(alias="configId", default=None) queue_id: typing.Optional[str] = pydantic_v1.Field(alias="queueId", default=None) environment: typing.Optional[str] = None diff --git a/langfuse/api/resources/ingestion/types/score_body.py b/langfuse/api/resources/ingestion/types/score_body.py index dbe2fbbd..df5a59b5 100644 --- a/langfuse/api/resources/ingestion/types/score_body.py +++ b/langfuse/api/resources/ingestion/types/score_body.py @@ -35,6 +35,7 @@ class ScoreBody(pydantic_v1.BaseModel): alias="observationId", default=None ) comment: typing.Optional[str] = None + metadata: typing.Optional[typing.Any] = None data_type: typing.Optional[ScoreDataType] = pydantic_v1.Field( alias="dataType", default=None ) diff --git a/langfuse/api/resources/score/types/create_score_request.py b/langfuse/api/resources/score/types/create_score_request.py index c11030f4..74dccc59 100644 --- a/langfuse/api/resources/score/types/create_score_request.py +++ b/langfuse/api/resources/score/types/create_score_request.py @@ -34,6 +34,7 @@ class CreateScoreRequest(pydantic_v1.BaseModel): alias="observationId", default=None ) comment: typing.Optional[str] = None + metadata: typing.Optional[typing.Any] = None environment: typing.Optional[str] = pydantic_v1.Field(default=None) """ The environment of the score. Can be any lowercase alphanumeric string with hyphens and underscores that does not start with 'langfuse'. diff --git a/langfuse/api/resources/score/types/get_scores_response_data.py b/langfuse/api/resources/score/types/get_scores_response_data.py index 5642e0f8..11b20a7b 100644 --- a/langfuse/api/resources/score/types/get_scores_response_data.py +++ b/langfuse/api/resources/score/types/get_scores_response_data.py @@ -28,6 +28,7 @@ class GetScoresResponseData_Numeric(pydantic_v1.BaseModel): alias="authorUserId", default=None ) comment: typing.Optional[str] = None + metadata: typing.Optional[typing.Any] = None config_id: typing.Optional[str] = pydantic_v1.Field(alias="configId", default=None) queue_id: typing.Optional[str] = pydantic_v1.Field(alias="queueId", default=None) environment: typing.Optional[str] = None @@ -87,6 +88,7 @@ class GetScoresResponseData_Categorical(pydantic_v1.BaseModel): alias="authorUserId", default=None ) comment: typing.Optional[str] = None + metadata: typing.Optional[typing.Any] = None config_id: typing.Optional[str] = pydantic_v1.Field(alias="configId", default=None) queue_id: typing.Optional[str] = pydantic_v1.Field(alias="queueId", default=None) environment: typing.Optional[str] = None @@ -146,6 +148,7 @@ class GetScoresResponseData_Boolean(pydantic_v1.BaseModel): alias="authorUserId", default=None ) comment: typing.Optional[str] = None + metadata: typing.Optional[typing.Any] = None config_id: typing.Optional[str] = pydantic_v1.Field(alias="configId", default=None) queue_id: typing.Optional[str] = pydantic_v1.Field(alias="queueId", default=None) environment: typing.Optional[str] = None diff --git a/langfuse/client.py b/langfuse/client.py index dc7ed940..72e26990 100644 --- a/langfuse/client.py +++ b/langfuse/client.py @@ -1573,6 +1573,7 @@ def score( trace_id: typing.Optional[str] = None, id: typing.Optional[str] = None, comment: typing.Optional[str] = None, + metadata: typing.Optional[typing.Any] = None, observation_id: typing.Optional[str] = None, config_id: typing.Optional[str] = None, **kwargs, @@ -1588,6 +1589,7 @@ def score( trace_id: typing.Optional[str] = None, id: typing.Optional[str] = None, comment: typing.Optional[str] = None, + metadata: typing.Optional[typing.Any] = None, observation_id: typing.Optional[str] = None, config_id: typing.Optional[str] = None, **kwargs, @@ -1602,6 +1604,7 @@ def score( trace_id: typing.Optional[str] = None, id: typing.Optional[str] = None, comment: typing.Optional[str] = None, + metadata: typing.Optional[typing.Any] = None, observation_id: typing.Optional[str] = None, config_id: typing.Optional[str] = None, **kwargs, @@ -1616,6 +1619,7 @@ def score( trace_id (str): The id of the trace to which the score should be attached. id (Optional[str]): The id of the score. If not provided, a new UUID is generated. comment (Optional[str]): Additional context/explanation of the score. + metadata (Optional[Any]): Additional metadata of the score. Can be any JSON object. Metadata is merged when being updated via the API. observation_id (Optional[str]): The id of the observation to which the score should be attached. config_id (Optional[str]): The id of the score config. When set, the score value is validated against the config. Defaults to None. **kwargs: Additional keyword arguments to include in the score. @@ -1655,6 +1659,7 @@ def score( "value": value, "data_type": data_type, "comment": comment, + "metadata": metadata, "config_id": config_id, "environment": self.environment, **kwargs, @@ -2415,6 +2420,7 @@ def score( value: float, data_type: typing.Optional[Literal["NUMERIC", "BOOLEAN"]] = None, comment: typing.Optional[str] = None, + metadata: typing.Optional[typing.Any] = None, config_id: typing.Optional[str] = None, **kwargs, ) -> "StatefulClient": ... @@ -2428,6 +2434,7 @@ def score( value: str, data_type: typing.Optional[Literal["CATEGORICAL"]] = "CATEGORICAL", comment: typing.Optional[str] = None, + metadata: typing.Optional[typing.Any] = None, config_id: typing.Optional[str] = None, **kwargs, ) -> "StatefulClient": ... @@ -2440,6 +2447,7 @@ def score( value: typing.Union[float, str], data_type: typing.Optional[ScoreDataType] = None, comment: typing.Optional[str] = None, + metadata: typing.Optional[typing.Any] = None, config_id: typing.Optional[str] = None, **kwargs, ) -> "StatefulClient": @@ -2451,6 +2459,7 @@ def score( data_type (Optional[ScoreDataType]): The data type of the score. When not set, the data type is inferred from the score config's data type, when present. When no config is set, the data type is inferred from the value's type, i.e. float values are categorized as numeric scores and string values as categorical scores. comment (Optional[str]): Additional context/explanation of the score. + metadata (Optional[Any]): Additional metadata of the score. Can be any JSON object. Metadata is merged when being updated via the API. id (Optional[str]): The id of the score. If not provided, a new UUID is generated. config_id (Optional[str]): The id of the score config. When set, the score value is validated against the config. Defaults to None. **kwargs: Additional keyword arguments to include in the score. @@ -2484,6 +2493,7 @@ def score( "value": value, "data_type": data_type, "comment": comment, + "metadata": metadata, "config_id": config_id, "environment": self.environment, **kwargs, diff --git a/langfuse/extract_model.py b/langfuse/extract_model.py index 19252284..5880e962 100644 --- a/langfuse/extract_model.py +++ b/langfuse/extract_model.py @@ -106,7 +106,10 @@ def _extract_model_name( def _extract_model_from_repr_by_pattern( - id: str, serialized: Optional[Dict[str, Any]], pattern: str, default: Optional[str] = None + id: str, + serialized: Optional[Dict[str, Any]], + pattern: str, + default: Optional[str] = None, ): if serialized is None: return None diff --git a/tests/test_core_sdk.py b/tests/test_core_sdk.py index 38aea9a7..82d6b19f 100644 --- a/tests/test_core_sdk.py +++ b/tests/test_core_sdk.py @@ -502,7 +502,8 @@ def test_score_trace(): trace_id=langfuse.get_trace_id(), name="valuation", value=0.5, - comment="This is a comment", + comment="tests/test_core_sdk.py::test_score_trace", + metadata={"key": "value"}, ) langfuse.flush() @@ -519,9 +520,10 @@ def test_score_trace(): assert score["name"] == "valuation" assert score["value"] == 0.5 - assert score["comment"] == "This is a comment" + assert score["comment"] == "tests/test_core_sdk.py::test_score_trace" assert score["observationId"] is None assert score["dataType"] == "NUMERIC" + assert score["metadata"] == {"key": "value"} def test_score_trace_nested_trace(): @@ -534,7 +536,8 @@ def test_score_trace_nested_trace(): trace.score( name="valuation", value=0.5, - comment="This is a comment", + comment="tests/test_core_sdk.py::test_score_trace_nested_trace", + metadata={"key": "value"}, ) langfuse.flush() @@ -551,9 +554,10 @@ def test_score_trace_nested_trace(): assert score.name == "valuation" assert score.value == 0.5 - assert score.comment == "This is a comment" + assert score.comment == "tests/test_core_sdk.py::test_score_trace_nested_trace" assert score.observation_id is None assert score.data_type == "NUMERIC" + assert score.metadata == {"key": "value"} def test_score_trace_nested_observation(): @@ -567,7 +571,8 @@ def test_score_trace_nested_observation(): span.score( name="valuation", value=0.5, - comment="This is a comment", + comment="tests/test_core_sdk.py::test_score_trace_nested_observation", + metadata={"key": "value"}, ) langfuse.flush() @@ -584,9 +589,12 @@ def test_score_trace_nested_observation(): assert score.name == "valuation" assert score.value == 0.5 - assert score.comment == "This is a comment" + assert ( + score.comment == "tests/test_core_sdk.py::test_score_trace_nested_observation" + ) assert score.observation_id == span.id assert score.data_type == "NUMERIC" + assert score.metadata == {"key": "value"} def test_score_span(): @@ -610,7 +618,8 @@ def test_score_span(): observation_id=spanId, name="valuation", value=1, - comment="This is a comment", + comment="tests/test_core_sdk.py::test_score_span", + metadata={"key": "value"}, ) langfuse.flush() @@ -626,9 +635,10 @@ def test_score_span(): assert score["name"] == "valuation" assert score["value"] == 1 - assert score["comment"] == "This is a comment" + assert score["comment"] == "tests/test_core_sdk.py::test_score_span" assert score["observationId"] == spanId assert score["dataType"] == "NUMERIC" + assert score["metadata"] == {"key": "value"} def test_create_trace_and_span(): @@ -1584,3 +1594,36 @@ def test_environment_from_env_var(monkeypatch): fetched_trace = api_wrapper.get_trace(trace.id) assert fetched_trace["environment"] == "testing" + + +@pytest.mark.parametrize( + "input_metadata, expected_metadata", + [ + ("Test Metadata", {"metadata": "Test Metadata"}), + (1, {"metadata": 1}), + (1.0, {"metadata": 1.0}), + ({"key": "value"}, {"key": "value"}), + (["value1", "value2"], {"metadata": ["value1", "value2"]}), + ], +) +def test_metadata(input_metadata, expected_metadata): + langfuse = Langfuse(debug=True) + api_wrapper = LangfuseAPI() + + trace = langfuse.trace(name="test_metadata", metadata=input_metadata) + observation = trace.generation(name="test_gen", metadata=input_metadata) + trace.score(name="test_score", value=1, metadata=input_metadata) + langfuse.flush() + sleep(1) + + fetched_trace = api_wrapper.get_trace(trace.id) + fetched_observation = api_wrapper.get_observation(observation.id) + fetched_score = fetched_trace["scores"][0] + + trace_metadata = fetched_trace["metadata"] + observation_metadata = fetched_observation["metadata"] + score_metadata = fetched_score["metadata"] + + assert trace_metadata == expected_metadata + assert observation_metadata == expected_metadata + assert score_metadata == expected_metadata