Skip to content

feat: add metadata field to scores #1153

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 7 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions langfuse/api/resources/commons/types/base_score.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ class BaseScore(pydantic_v1.BaseModel):
alias="authorUserId", default=None
)
comment: typing.Optional[str] = None
metadata: typing.Optional[typing.Any] = None
config_id: typing.Optional[str] = pydantic_v1.Field(alias="configId", default=None)
"""
Reference a score config on a score. When set, config and score name must be equal and value must comply to optionally defined numerical range
Expand Down
3 changes: 3 additions & 0 deletions langfuse/api/resources/commons/types/score.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ class Score_Numeric(pydantic_v1.BaseModel):
alias="authorUserId", default=None
)
comment: typing.Optional[str] = None
metadata: typing.Optional[typing.Any] = None
config_id: typing.Optional[str] = pydantic_v1.Field(alias="configId", default=None)
queue_id: typing.Optional[str] = pydantic_v1.Field(alias="queueId", default=None)
environment: typing.Optional[str] = None
Expand Down Expand Up @@ -84,6 +85,7 @@ class Score_Categorical(pydantic_v1.BaseModel):
alias="authorUserId", default=None
)
comment: typing.Optional[str] = None
metadata: typing.Optional[typing.Any] = None
config_id: typing.Optional[str] = pydantic_v1.Field(alias="configId", default=None)
queue_id: typing.Optional[str] = pydantic_v1.Field(alias="queueId", default=None)
environment: typing.Optional[str] = None
Expand Down Expand Up @@ -142,6 +144,7 @@ class Score_Boolean(pydantic_v1.BaseModel):
alias="authorUserId", default=None
)
comment: typing.Optional[str] = None
metadata: typing.Optional[typing.Any] = None
config_id: typing.Optional[str] = pydantic_v1.Field(alias="configId", default=None)
queue_id: typing.Optional[str] = pydantic_v1.Field(alias="queueId", default=None)
environment: typing.Optional[str] = None
Expand Down
1 change: 1 addition & 0 deletions langfuse/api/resources/ingestion/types/score_body.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ class ScoreBody(pydantic_v1.BaseModel):
alias="observationId", default=None
)
comment: typing.Optional[str] = None
metadata: typing.Optional[typing.Any] = None
data_type: typing.Optional[ScoreDataType] = pydantic_v1.Field(
alias="dataType", default=None
)
Expand Down
1 change: 1 addition & 0 deletions langfuse/api/resources/score/types/create_score_request.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ class CreateScoreRequest(pydantic_v1.BaseModel):
alias="observationId", default=None
)
comment: typing.Optional[str] = None
metadata: typing.Optional[typing.Any] = None
environment: typing.Optional[str] = pydantic_v1.Field(default=None)
"""
The environment of the score. Can be any lowercase alphanumeric string with hyphens and underscores that does not start with 'langfuse'.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ class GetScoresResponseData_Numeric(pydantic_v1.BaseModel):
alias="authorUserId", default=None
)
comment: typing.Optional[str] = None
metadata: typing.Optional[typing.Any] = None
config_id: typing.Optional[str] = pydantic_v1.Field(alias="configId", default=None)
queue_id: typing.Optional[str] = pydantic_v1.Field(alias="queueId", default=None)
environment: typing.Optional[str] = None
Expand Down Expand Up @@ -87,6 +88,7 @@ class GetScoresResponseData_Categorical(pydantic_v1.BaseModel):
alias="authorUserId", default=None
)
comment: typing.Optional[str] = None
metadata: typing.Optional[typing.Any] = None
config_id: typing.Optional[str] = pydantic_v1.Field(alias="configId", default=None)
queue_id: typing.Optional[str] = pydantic_v1.Field(alias="queueId", default=None)
environment: typing.Optional[str] = None
Expand Down Expand Up @@ -146,6 +148,7 @@ class GetScoresResponseData_Boolean(pydantic_v1.BaseModel):
alias="authorUserId", default=None
)
comment: typing.Optional[str] = None
metadata: typing.Optional[typing.Any] = None
config_id: typing.Optional[str] = pydantic_v1.Field(alias="configId", default=None)
queue_id: typing.Optional[str] = pydantic_v1.Field(alias="queueId", default=None)
environment: typing.Optional[str] = None
Expand Down
10 changes: 10 additions & 0 deletions langfuse/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -1573,6 +1573,7 @@ def score(
trace_id: typing.Optional[str] = None,
id: typing.Optional[str] = None,
comment: typing.Optional[str] = None,
metadata: typing.Optional[typing.Any] = None,
observation_id: typing.Optional[str] = None,
config_id: typing.Optional[str] = None,
**kwargs,
Expand All @@ -1588,6 +1589,7 @@ def score(
trace_id: typing.Optional[str] = None,
id: typing.Optional[str] = None,
comment: typing.Optional[str] = None,
metadata: typing.Optional[typing.Any] = None,
observation_id: typing.Optional[str] = None,
config_id: typing.Optional[str] = None,
**kwargs,
Expand All @@ -1602,6 +1604,7 @@ def score(
trace_id: typing.Optional[str] = None,
id: typing.Optional[str] = None,
comment: typing.Optional[str] = None,
metadata: typing.Optional[typing.Any] = None,
observation_id: typing.Optional[str] = None,
config_id: typing.Optional[str] = None,
**kwargs,
Expand All @@ -1616,6 +1619,7 @@ def score(
trace_id (str): The id of the trace to which the score should be attached.
id (Optional[str]): The id of the score. If not provided, a new UUID is generated.
comment (Optional[str]): Additional context/explanation of the score.
metadata (Optional[Any]): Additional metadata of the score. Can be any JSON object. Metadata is merged when being updated via the API.
observation_id (Optional[str]): The id of the observation to which the score should be attached.
config_id (Optional[str]): The id of the score config. When set, the score value is validated against the config. Defaults to None.
**kwargs: Additional keyword arguments to include in the score.
Expand Down Expand Up @@ -1655,6 +1659,7 @@ def score(
"value": value,
"data_type": data_type,
"comment": comment,
"metadata": metadata,
"config_id": config_id,
"environment": self.environment,
**kwargs,
Expand Down Expand Up @@ -2415,6 +2420,7 @@ def score(
value: float,
data_type: typing.Optional[Literal["NUMERIC", "BOOLEAN"]] = None,
comment: typing.Optional[str] = None,
metadata: typing.Optional[typing.Any] = None,
config_id: typing.Optional[str] = None,
**kwargs,
) -> "StatefulClient": ...
Expand All @@ -2428,6 +2434,7 @@ def score(
value: str,
data_type: typing.Optional[Literal["CATEGORICAL"]] = "CATEGORICAL",
comment: typing.Optional[str] = None,
metadata: typing.Optional[typing.Any] = None,
config_id: typing.Optional[str] = None,
**kwargs,
) -> "StatefulClient": ...
Expand All @@ -2440,6 +2447,7 @@ def score(
value: typing.Union[float, str],
data_type: typing.Optional[ScoreDataType] = None,
comment: typing.Optional[str] = None,
metadata: typing.Optional[typing.Any] = None,
config_id: typing.Optional[str] = None,
**kwargs,
) -> "StatefulClient":
Expand All @@ -2451,6 +2459,7 @@ def score(
data_type (Optional[ScoreDataType]): The data type of the score. When not set, the data type is inferred from the score config's data type, when present.
When no config is set, the data type is inferred from the value's type, i.e. float values are categorized as numeric scores and string values as categorical scores.
comment (Optional[str]): Additional context/explanation of the score.
metadata (Optional[Any]): Additional metadata of the score. Can be any JSON object. Metadata is merged when being updated via the API.
id (Optional[str]): The id of the score. If not provided, a new UUID is generated.
config_id (Optional[str]): The id of the score config. When set, the score value is validated against the config. Defaults to None.
**kwargs: Additional keyword arguments to include in the score.
Expand Down Expand Up @@ -2484,6 +2493,7 @@ def score(
"value": value,
"data_type": data_type,
"comment": comment,
"metadata": metadata,
"config_id": config_id,
"environment": self.environment,
**kwargs,
Expand Down
5 changes: 4 additions & 1 deletion langfuse/extract_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,10 @@ def _extract_model_name(


def _extract_model_from_repr_by_pattern(
id: str, serialized: Optional[Dict[str, Any]], pattern: str, default: Optional[str] = None
id: str,
serialized: Optional[Dict[str, Any]],
pattern: str,
default: Optional[str] = None,
):
if serialized is None:
return None
Expand Down
59 changes: 51 additions & 8 deletions tests/test_core_sdk.py
Original file line number Diff line number Diff line change
Expand Up @@ -502,7 +502,8 @@ def test_score_trace():
trace_id=langfuse.get_trace_id(),
name="valuation",
value=0.5,
comment="This is a comment",
comment="tests/test_core_sdk.py::test_score_trace",
metadata={"key": "value"},
)

langfuse.flush()
Expand All @@ -519,9 +520,10 @@ def test_score_trace():

assert score["name"] == "valuation"
assert score["value"] == 0.5
assert score["comment"] == "This is a comment"
assert score["comment"] == "tests/test_core_sdk.py::test_score_trace"
assert score["observationId"] is None
assert score["dataType"] == "NUMERIC"
assert score["metadata"] == {"key": "value"}


def test_score_trace_nested_trace():
Expand All @@ -534,7 +536,8 @@ def test_score_trace_nested_trace():
trace.score(
name="valuation",
value=0.5,
comment="This is a comment",
comment="tests/test_core_sdk.py::test_score_trace_nested_trace",
metadata={"key": "value"},
)

langfuse.flush()
Expand All @@ -551,9 +554,10 @@ def test_score_trace_nested_trace():

assert score.name == "valuation"
assert score.value == 0.5
assert score.comment == "This is a comment"
assert score.comment == "tests/test_core_sdk.py::test_score_trace_nested_trace"
assert score.observation_id is None
assert score.data_type == "NUMERIC"
assert score.metadata == {"key": "value"}


def test_score_trace_nested_observation():
Expand All @@ -567,7 +571,8 @@ def test_score_trace_nested_observation():
span.score(
name="valuation",
value=0.5,
comment="This is a comment",
comment="tests/test_core_sdk.py::test_score_trace_nested_observation",
metadata={"key": "value"},
)

langfuse.flush()
Expand All @@ -584,9 +589,12 @@ def test_score_trace_nested_observation():

assert score.name == "valuation"
assert score.value == 0.5
assert score.comment == "This is a comment"
assert (
score.comment == "tests/test_core_sdk.py::test_score_trace_nested_observation"
)
assert score.observation_id == span.id
assert score.data_type == "NUMERIC"
assert score.metadata == {"key": "value"}


def test_score_span():
Expand All @@ -610,7 +618,8 @@ def test_score_span():
observation_id=spanId,
name="valuation",
value=1,
comment="This is a comment",
comment="tests/test_core_sdk.py::test_score_span",
metadata={"key": "value"},
)

langfuse.flush()
Expand All @@ -626,9 +635,10 @@ def test_score_span():

assert score["name"] == "valuation"
assert score["value"] == 1
assert score["comment"] == "This is a comment"
assert score["comment"] == "tests/test_core_sdk.py::test_score_span"
assert score["observationId"] == spanId
assert score["dataType"] == "NUMERIC"
assert score["metadata"] == {"key": "value"}


def test_create_trace_and_span():
Expand Down Expand Up @@ -1584,3 +1594,36 @@ def test_environment_from_env_var(monkeypatch):

fetched_trace = api_wrapper.get_trace(trace.id)
assert fetched_trace["environment"] == "testing"


@pytest.mark.parametrize(
"input_metadata, expected_metadata",
[
("Test Metadata", {"metadata": "Test Metadata"}),
(1, {"metadata": 1}),
(1.0, {"metadata": 1.0}),
({"key": "value"}, {"key": "value"}),
(["value1", "value2"], {"metadata": ["value1", "value2"]}),
],
)
def test_metadata(input_metadata, expected_metadata):
langfuse = Langfuse(debug=True)
api_wrapper = LangfuseAPI()

trace = langfuse.trace(name="test_metadata", metadata=input_metadata)
observation = trace.generation(name="test_gen", metadata=input_metadata)
trace.score(name="test_score", value=1, metadata=input_metadata)
langfuse.flush()
sleep(1)

fetched_trace = api_wrapper.get_trace(trace.id)
fetched_observation = api_wrapper.get_observation(observation.id)
fetched_score = fetched_trace["scores"][0]

trace_metadata = fetched_trace["metadata"]
observation_metadata = fetched_observation["metadata"]
score_metadata = fetched_score["metadata"]

assert trace_metadata == expected_metadata
assert observation_metadata == expected_metadata
assert score_metadata == expected_metadata
Loading