Skip to content

Commit 3ff5ec3

Browse files
Add measures to metrics (temporary) & fix metric deduplication (#436)
Towards #387 ### Description This PR contains a few small changes and improvements, all grouped up in the process of getting to a point where metricflow can onboard onto the new dsi version immediately. At a high level, these changes take three forms. I've tried to detail in comments throughout the PR, but here as well: * general clean up or improvements that came up along the way * fixing the equality checking when searching for matching metrics (to avoid creating duplicates). * this is a correctness issue here in this repo. It can arise when we have a metric created from a measure with create_metric=true and then also create one for inputs. (This was found in some of the "parse-the-entire-project" tests in metricflow.) * adding `measure`s and sometimes `input_measures` to some of the new metrics made in transformations. This is only necessary because the upgrades in MF are not yet complete and some operations still throw errors when encountering metrics without input measures (especially in tests :/ ), and these new lines can be removed in a few weeks when everything is ready. I've also tested this by pointing the metricflow repo at the top commit here to make sure `make test` is running alright since the latest dsi version would not work in MF before. ### Checklist - [x] I have read [the contributing guide](https://github.com/dbt-labs/dbt-semantic-interfaces/blob/main/CONTRIBUTING.md) and understand what's expected of me - [x] I have signed the [CLA](https://docs.getdbt.com/docs/contributor-license-agreements) - [x] This PR includes tests, or tests are not required/relevant for this PR - [x] I have run `changie new` to [create a changelog entry](https://github.com/dbt-labs/dbt-semantic-interfaces/blob/main/CONTRIBUTING.md#adding-a-changelog-entry)
1 parent cca2603 commit 3ff5ec3

File tree

6 files changed

+278
-25
lines changed

6 files changed

+278
-25
lines changed
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
kind: Fixes
2+
body: Fix transformation metric de-duplication and add measures to new metrics to improve metricflow compatibility.
3+
time: 2025-10-02T11:26:37.069417-07:00
4+
custom:
5+
Author: theyostalservice
6+
Issue: "387"

dbt_semantic_interfaces/implementations/metric.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -347,10 +347,14 @@ def build_metric_aggregation_params(
347347
348348
It lives here instead of measures to avoid circular import issues.
349349
"""
350+
agg_params = measure.agg_params.copy(deep=True) if measure.agg_params is not None else None
351+
non_additive_dimension = (
352+
measure.non_additive_dimension.copy(deep=True) if measure.non_additive_dimension is not None else None
353+
)
350354
return PydanticMetricAggregationParams(
351355
semantic_model=semantic_model_name,
352356
agg=measure.agg,
353-
agg_params=measure.agg_params,
357+
agg_params=agg_params,
354358
agg_time_dimension=measure.agg_time_dimension,
355-
non_additive_dimension=measure.non_additive_dimension,
359+
non_additive_dimension=non_additive_dimension,
356360
)

dbt_semantic_interfaces/transformations/add_input_metric_measures.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -31,9 +31,7 @@ def _get_measures_for_metric(
3131
) -> Set[PydanticMetricInputMeasure]:
3232
"""Returns a unique set of input measures for a given metric."""
3333
measures: Set = set()
34-
matched_metric = next(
35-
iter((metric for metric in semantic_manifest.metrics if metric.name == metric_name)), None
36-
)
34+
matched_metric = next((metric for metric in semantic_manifest.metrics if metric.name == metric_name), None)
3735
if matched_metric:
3836
if matched_metric.type is MetricType.SIMPLE or matched_metric.type is MetricType.CUMULATIVE:
3937
if matched_metric.type_params.measure is not None:

dbt_semantic_interfaces/transformations/measure_to_metric_transformation_pieces/measure_features_to_metric_name.py

Lines changed: 37 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
from dbt_semantic_interfaces.implementations.elements.measure import PydanticMeasure
44
from dbt_semantic_interfaces.implementations.metric import (
55
PydanticMetric,
6+
PydanticMetricInputMeasure,
67
PydanticMetricTypeParams,
78
)
89
from dbt_semantic_interfaces.implementations.semantic_manifest import (
@@ -52,7 +53,7 @@ def _store_metric_name(
5253
key = (measure_name, fill_nulls_with, join_to_timespine)
5354
self._metric_name_dict[key] = metric_name
5455

55-
def _find_metric_clone_in_manifest(
56+
def _find_simple_metric_functional_clone_in_manifest(
5657
self,
5758
metric: PydanticMetric,
5859
manifest: PydanticSemanticManifest,
@@ -61,20 +62,36 @@ def _find_metric_clone_in_manifest(
6162
6263
returns the metric if it exists, otherwise None
6364
64-
Note: this can be further optimized by pre-caching metrics based on features,
65-
but let's not prematurely optimize.
65+
Note: this is appropriate for SIMPLE metrics that would **replace a measure** in
66+
the new YAML. This code would require updates and expansion to handle anything beyond that.
6667
"""
67-
search_metric = metric.copy(deep=True)
68+
69+
def _metrics_equivalent(search_metric: PydanticMetric, manifest_metric: PydanticMetric) -> bool:
70+
"""Check if the given metric and manifest_metric are equivalent based on selected fields."""
71+
fields_match = (
72+
search_metric.type == manifest_metric.type
73+
and search_metric.type_params.window == manifest_metric.type_params.window
74+
and search_metric.type_params.grain_to_date == manifest_metric.type_params.grain_to_date
75+
and search_metric.type_params.metric_aggregation_params
76+
== manifest_metric.type_params.metric_aggregation_params
77+
and search_metric.type_params.join_to_timespine == manifest_metric.type_params.join_to_timespine
78+
and search_metric.type_params.fill_nulls_with == manifest_metric.type_params.fill_nulls_with
79+
and search_metric.type_params.expr == manifest_metric.type_params.expr
80+
and search_metric.filter == manifest_metric.filter
81+
and search_metric.time_granularity == manifest_metric.time_granularity
82+
)
83+
if not fields_match:
84+
return False
85+
if (
86+
manifest_metric.type_params.measure is not None
87+
and search_metric.type_params.measure != manifest_metric.type_params.measure
88+
):
89+
return False
90+
return True
91+
6892
for existing_metric in manifest.metrics:
69-
# this allows us to a straight equality comparison, which is safer in the future
70-
# than implementing a custom comparison function.
71-
search_metric.name = existing_metric.name
72-
search_metric.metadata = existing_metric.metadata
73-
search_metric.type_params.is_private = existing_metric.type_params.is_private
74-
if search_metric == existing_metric:
93+
if _metrics_equivalent(search_metric=metric, manifest_metric=existing_metric):
7594
return existing_metric
76-
print("provided metric", search_metric)
77-
print("existing metric", existing_metric)
7895
return None
7996

8097
@staticmethod
@@ -83,7 +100,7 @@ def build_metric_from_measure_configuration(
83100
semantic_model_name: str,
84101
fill_nulls_with: Optional[int],
85102
join_to_timespine: Optional[bool],
86-
is_private: bool = True,
103+
is_private: bool,
87104
) -> PydanticMetric:
88105
"""Build a metric from the measure configuration.
89106
@@ -176,8 +193,14 @@ def get_or_create_metric_for_measure(
176193
semantic_model_name=model_name,
177194
fill_nulls_with=fill_nulls_with,
178195
join_to_timespine=join_to_timespine,
196+
is_private=True,
179197
)
180-
metric = self._find_metric_clone_in_manifest(
198+
# TODO SL-4257: this is supporting legacy cases in MF until work there is complete,
199+
# and should be removeable long before the rest of the backward-compatibility work.
200+
built_metric.type_params.measure = PydanticMetricInputMeasure(name=measure.name)
201+
built_metric.type_params.input_measures = [PydanticMetricInputMeasure(name=measure.name)]
202+
203+
metric = self._find_simple_metric_functional_clone_in_manifest(
181204
metric=built_metric,
182205
manifest=manifest,
183206
)

tests/parsing/test_metric_parsing_with_custom_grain.py

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@ def test_cumulative_metric_with_custom_grain_to_date() -> None: # noqa: D
4848
type_params:
4949
measure:
5050
name: bookings
51+
fill_nulls_with: 15
5152
cumulative_type_params:
5253
grain_to_date: martian_week
5354
"""
@@ -121,9 +122,9 @@ def test_cumulative_metric_with_custom_window() -> None: # noqa: D
121122
)
122123
assert not model.issues.has_blocking_issues
123124
semantic_manifest = model.semantic_manifest
124-
# 2 explicit ones and one that is created for the measure input for the
125-
# cumulative metric's params
126-
assert len(semantic_manifest.metrics) == 3
125+
# 2 explicit metrics. The cumulative metric's input metric should be deduplicated
126+
# so it will match.
127+
assert len(semantic_manifest.metrics) == 2
127128

128129
metric = next((m for m in semantic_manifest.metrics if m.name == "test_cumulative_metric_with_custom_window"), None)
129130
assert metric is not None, "Can't find metric"
@@ -185,9 +186,9 @@ def test_conversion_metric_with_custom_grain_window() -> None: # noqa: D
185186
)
186187
assert not model.issues.has_blocking_issues
187188
semantic_manifest = model.semantic_manifest
188-
# 2 explicit ones and one that is created for the measure input for the
189-
# cumulative metric's params
190-
assert len(semantic_manifest.metrics) == 3
189+
# 2 explicitly created metrics. The conversion measure -> metric transformation
190+
# should not need to create a new metric since the existing one already matches.
191+
assert len(semantic_manifest.metrics) == 2
191192

192193
metric = next(
193194
(m for m in semantic_manifest.metrics if m.name == "test_conversion_metric_with_custom_grain_window"), None
Lines changed: 221 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,221 @@
1+
from dbt_semantic_interfaces.implementations.elements.dimension import (
2+
PydanticDimension,
3+
PydanticDimensionTypeParams,
4+
)
5+
from dbt_semantic_interfaces.implementations.elements.entity import PydanticEntity
6+
from dbt_semantic_interfaces.implementations.elements.measure import PydanticMeasure
7+
from dbt_semantic_interfaces.implementations.metric import (
8+
PydanticCumulativeTypeParams,
9+
PydanticMetric,
10+
PydanticMetricInput,
11+
PydanticMetricInputMeasure,
12+
PydanticMetricTimeWindow,
13+
PydanticMetricTypeParams,
14+
)
15+
from dbt_semantic_interfaces.implementations.node_relation import PydanticNodeRelation
16+
from dbt_semantic_interfaces.implementations.project_configuration import (
17+
PydanticProjectConfiguration,
18+
)
19+
from dbt_semantic_interfaces.implementations.semantic_manifest import (
20+
PydanticSemanticManifest,
21+
)
22+
from dbt_semantic_interfaces.implementations.semantic_model import (
23+
PydanticSemanticModel,
24+
PydanticSemanticModelDefaults,
25+
)
26+
from dbt_semantic_interfaces.transformations.semantic_manifest_transformer import (
27+
PydanticSemanticManifestTransformer,
28+
)
29+
from dbt_semantic_interfaces.type_enums import (
30+
AggregationType,
31+
DimensionType,
32+
EntityType,
33+
MetricType,
34+
TimeGranularity,
35+
)
36+
from dbt_semantic_interfaces.validations.semantic_manifest_validator import (
37+
SemanticManifestValidator,
38+
)
39+
40+
41+
def _project_config() -> PydanticProjectConfiguration:
42+
return PydanticProjectConfiguration()
43+
44+
45+
def test_e2e_measure_create_metric_then_cumulative_uses_metric_input() -> None:
46+
"""End-to-end: measure create_metric=True, cumulative references created metric by name."""
47+
sm_name = "sm"
48+
time_dim_name = "ds"
49+
sm = PydanticSemanticModel(
50+
name=sm_name,
51+
defaults=PydanticSemanticModelDefaults(agg_time_dimension=time_dim_name),
52+
node_relation=PydanticNodeRelation(alias=sm_name, schema_name="schema"),
53+
entities=[
54+
PydanticEntity(
55+
name="user",
56+
type=EntityType.PRIMARY,
57+
expr="user_id",
58+
),
59+
],
60+
dimensions=[
61+
PydanticDimension(
62+
name="ds",
63+
type=DimensionType.TIME,
64+
type_params=PydanticDimensionTypeParams(time_granularity=TimeGranularity.DAY),
65+
),
66+
PydanticDimension(
67+
name="created_at",
68+
type=DimensionType.TIME,
69+
type_params=PydanticDimensionTypeParams(time_granularity=TimeGranularity.DAY),
70+
),
71+
PydanticDimension(
72+
name="ds_partitioned",
73+
type=DimensionType.TIME,
74+
is_partition=True,
75+
type_params=PydanticDimensionTypeParams(time_granularity=TimeGranularity.DAY),
76+
),
77+
PydanticDimension(
78+
name="home_state",
79+
type=DimensionType.CATEGORICAL,
80+
),
81+
PydanticDimension(
82+
name="last_profile_edit_ts",
83+
type=DimensionType.TIME,
84+
type_params=PydanticDimensionTypeParams(time_granularity=TimeGranularity.MILLISECOND),
85+
),
86+
PydanticDimension(
87+
name="bio_added_ts",
88+
type=DimensionType.TIME,
89+
type_params=PydanticDimensionTypeParams(time_granularity=TimeGranularity.SECOND),
90+
),
91+
PydanticDimension(
92+
name="last_login_ts",
93+
type=DimensionType.TIME,
94+
type_params=PydanticDimensionTypeParams(time_granularity=TimeGranularity.MINUTE),
95+
),
96+
PydanticDimension(
97+
name="archived_at",
98+
type=DimensionType.TIME,
99+
type_params=PydanticDimensionTypeParams(time_granularity=TimeGranularity.HOUR),
100+
),
101+
],
102+
measures=[
103+
PydanticMeasure(
104+
name="archived_users",
105+
agg=AggregationType.SUM,
106+
expr="1",
107+
create_metric=True,
108+
)
109+
],
110+
)
111+
112+
metrics = [
113+
PydanticMetric(
114+
name="subdaily_cumulative_window_metric",
115+
type=MetricType.CUMULATIVE,
116+
description="m1_cumulative_1 description",
117+
type_params=PydanticMetricTypeParams(
118+
measure=PydanticMetricInputMeasure(name="archived_users"),
119+
cumulative_type_params=PydanticCumulativeTypeParams(
120+
window=PydanticMetricTimeWindow(count=3, granularity="hour"),
121+
),
122+
),
123+
),
124+
PydanticMetric(
125+
name="subdaily_cumulative_grain_to_date_metric",
126+
type=MetricType.CUMULATIVE,
127+
description="m1_cumulative_2 description",
128+
type_params=PydanticMetricTypeParams(
129+
measure=PydanticMetricInputMeasure(name="archived_users"),
130+
cumulative_type_params=PydanticCumulativeTypeParams(
131+
grain_to_date="hour",
132+
),
133+
),
134+
),
135+
PydanticMetric(
136+
name="subdaily_offset_window_metric",
137+
type=MetricType.DERIVED,
138+
description="archived_users_offset_window description",
139+
type_params=PydanticMetricTypeParams(
140+
expr="archived_users",
141+
metrics=[
142+
PydanticMetricInput(
143+
name="archived_users",
144+
offset_window=PydanticMetricTimeWindow(count=1, granularity="hour"),
145+
)
146+
],
147+
),
148+
),
149+
PydanticMetric(
150+
name="subdaily_offset_grain_to_date_metric",
151+
type=MetricType.DERIVED,
152+
description="offset grain to date metric with a sub-daily agg time dim",
153+
type_params=PydanticMetricTypeParams(
154+
expr="archived_users",
155+
metrics=[
156+
PydanticMetricInput(
157+
name="archived_users",
158+
offset_to_grain="hour",
159+
)
160+
],
161+
),
162+
),
163+
PydanticMetric(
164+
name="subdaily_join_to_time_spine_metric",
165+
type=MetricType.SIMPLE,
166+
description="simple metric with sub-daily agg time dim that joins to time spine",
167+
type_params=PydanticMetricTypeParams(
168+
measure=PydanticMetricInputMeasure(
169+
name="archived_users",
170+
join_to_timespine=True,
171+
),
172+
),
173+
),
174+
PydanticMetric(
175+
name="simple_subdaily_metric_default_day",
176+
type=MetricType.SIMPLE,
177+
description="simple metric with sub-daily agg time dim that doesn't specify default granularity",
178+
type_params=PydanticMetricTypeParams(
179+
measure=PydanticMetricInputMeasure(
180+
name="archived_users",
181+
),
182+
),
183+
),
184+
PydanticMetric(
185+
name="simple_subdaily_metric_default_hour",
186+
type=MetricType.SIMPLE,
187+
description="simple metric with sub-daily agg time dim that has an explicit default granularity",
188+
type_params=PydanticMetricTypeParams(
189+
measure=PydanticMetricInputMeasure(
190+
name="archived_users",
191+
),
192+
),
193+
time_granularity="hour",
194+
),
195+
PydanticMetric(
196+
name="archived_users_join_to_time_spine",
197+
type=MetricType.SIMPLE,
198+
description="subdaily metric joining to time spine",
199+
type_params=PydanticMetricTypeParams(
200+
measure=PydanticMetricInputMeasure(
201+
name="archived_users",
202+
join_to_timespine=True,
203+
),
204+
),
205+
),
206+
]
207+
208+
manifest = PydanticSemanticManifest(
209+
semantic_models=[sm],
210+
metrics=metrics,
211+
project_configuration=_project_config(),
212+
)
213+
214+
transformed = PydanticSemanticManifestTransformer.transform(model=manifest)
215+
216+
model_validator = SemanticManifestValidator[PydanticSemanticManifest]()
217+
model_validator.checked_validations(transformed)
218+
219+
# Expect exactly 1 new metric - the proxy simple metric created for the measure
220+
assert len(transformed.metrics) == len(metrics) + 1
221+
assert any(m for m in transformed.metrics if m.type == MetricType.SIMPLE and m.name == "archived_users")

0 commit comments

Comments
 (0)