Skip to content

Commit 63843c5

Browse files
fix(date_parser): fixed bug for advanced time range filter (apache#31867)
1 parent 8960db4 commit 63843c5

File tree

2 files changed

+226
-11
lines changed

2 files changed

+226
-11
lines changed

superset/utils/date_parser.py

Lines changed: 186 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -143,6 +143,174 @@ def parse_past_timedelta(
143143
)
144144

145145

146+
def get_relative_base(unit: str, relative_start: str | None = None) -> str:
147+
"""
148+
Determines the relative base (`now` or `today`) based on the granularity of the unit
149+
and an optional user-provided base expression. This is used as the base for all
150+
queries parsed from `time_range_lookup`.
151+
152+
Args:
153+
unit (str): The time unit (e.g., "second", "minute", "hour", "day", etc.).
154+
relative_start (datetime | None): Optional user-provided base time.
155+
156+
Returns:
157+
datetime: The base time (`now`, `today`, or user-provided).
158+
"""
159+
if relative_start is not None:
160+
return relative_start
161+
162+
granular_units = {"second", "minute", "hour"}
163+
broad_units = {"day", "week", "month", "quarter", "year"}
164+
165+
if unit.lower() in granular_units:
166+
return "now"
167+
elif unit.lower() in broad_units:
168+
return "today"
169+
raise ValueError(f"Unknown unit: {unit}")
170+
171+
172+
def handle_start_of(base_expression: str, unit: str) -> str:
173+
"""
174+
Generates a datetime expression for the start of a given unit (e.g., start of month,
175+
start of year).
176+
This function is used to handle queries matching the first regex in
177+
`time_range_lookup`.
178+
179+
Args:
180+
base_expression (str): The base datetime expression (e.g., "DATETIME('now')"),
181+
provided by `get_relative_base`.
182+
unit (str): The granularity to calculate the start for (e.g., "year",
183+
"month", "week"),
184+
extracted from the regex.
185+
186+
Returns:
187+
str: The resulting expression for the start of the specified unit.
188+
189+
Raises:
190+
ValueError: If the unit is not one of the valid options.
191+
192+
Relation to `time_range_lookup`:
193+
- Handles the "start of" or "beginning of" modifiers in the first regex pattern.
194+
- Example: "start of this month" → `DATETRUNC(DATETIME('today'), month)`.
195+
"""
196+
valid_units = {"year", "quarter", "month", "week", "day"}
197+
if unit in valid_units:
198+
return f"DATETRUNC({base_expression}, {unit})"
199+
raise ValueError(f"Invalid unit for 'start of': {unit}")
200+
201+
202+
def handle_end_of(base_expression: str, unit: str) -> str:
203+
"""
204+
Generates a datetime expression for the end of a given unit (e.g., end of month,
205+
end of year).
206+
This function is used to handle queries matching the first regex in
207+
`time_range_lookup`.
208+
209+
Args:
210+
base_expression (str): The base datetime expression (e.g., "DATETIME('now')"),
211+
provided by `get_relative_base`.
212+
unit (str): The granularity to calculate the end for (e.g., "year", "month",
213+
"week"), extracted from the regex.
214+
215+
Returns:
216+
str: The resulting expression for the end of the specified unit.
217+
218+
Raises:
219+
ValueError: If the unit is not one of the valid options.
220+
221+
Relation to `time_range_lookup`:
222+
- Handles the "end of" modifier in the first regex pattern.
223+
- Example: "end of last month" → `LASTDAY(DATETIME('today'), month)`.
224+
"""
225+
valid_units = {"year", "quarter", "month", "week", "day"}
226+
if unit in valid_units:
227+
return f"LASTDAY({base_expression}, {unit})"
228+
raise ValueError(f"Invalid unit for 'end of': {unit}")
229+
230+
231+
def handle_modifier_and_unit(
232+
modifier: str, scope: str, delta: str, unit: str, relative_base: str
233+
) -> str:
234+
"""
235+
Generates a datetime expression based on a modifier, scope, delta, unit,
236+
and relative base.
237+
This function handles queries matching the first regex pattern in
238+
`time_range_lookup`.
239+
240+
Args:
241+
modifier (str): Specifies the operation (e.g., "start of", "end of").
242+
Extracted from the regex to determine whether to calculate the start or end.
243+
scope (str): The time scope (e.g., "this", "last", "next", "prior"),
244+
extracted from the regex.
245+
delta (str): The numeric delta value (e.g., "1", "2"), extracted from the regex.
246+
unit (str): The granularity (e.g., "day", "month", "year"), extracted from
247+
the regex.
248+
relative_base (str): The base datetime expression (e.g., "now" or "today"),
249+
determined by `get_relative_base`.
250+
251+
Returns:
252+
str: The resulting datetime expression.
253+
254+
Raises:
255+
ValueError: If the modifier is invalid.
256+
257+
Relation to `time_range_lookup`:
258+
- Processes queries like "start of this month" or "end of prior 2 years".
259+
- Example: "start of this month" → `DATETRUNC(DATETIME('today'), month)`.
260+
261+
Example:
262+
>>> handle_modifier_and_unit("start of", "this", "", "month", "today")
263+
"DATETRUNC(DATETIME('today'), month)"
264+
265+
>>> handle_modifier_and_unit("end of", "last", "1", "year", "today")
266+
"LASTDAY(DATEADD(DATETIME('today'), -1, year), year)"
267+
"""
268+
base_expression = handle_scope_and_unit(scope, delta, unit, relative_base)
269+
270+
if modifier.lower() in ["start of", "beginning of"]:
271+
return handle_start_of(base_expression, unit.lower())
272+
elif modifier.lower() == "end of":
273+
return handle_end_of(base_expression, unit.lower())
274+
else:
275+
raise ValueError(f"Unknown modifier: {modifier}")
276+
277+
278+
def handle_scope_and_unit(scope: str, delta: str, unit: str, relative_base: str) -> str:
279+
"""
280+
Generates a datetime expression based on the scope, delta, unit, and relative base.
281+
This function handles queries matching the second regex pattern in
282+
`time_range_lookup`.
283+
284+
Args:
285+
scope (str): The time scope (e.g., "this", "last", "next", "prior"),
286+
extracted from the regex.
287+
delta (str): The numeric delta value (e.g., "1", "2"), extracted from the regex.
288+
unit (str): The granularity (e.g., "second", "minute", "hour", "day"),
289+
extracted from the regex.
290+
relative_base (str): The base datetime expression (e.g., "now" or "today"),
291+
determined by `get_relative_base`.
292+
293+
Returns:
294+
str: The resulting datetime expression.
295+
296+
Raises:
297+
ValueError: If the scope is invalid.
298+
299+
Relation to `time_range_lookup`:
300+
- Processes queries like "last 2 weeks" or "this month".
301+
- Example: "last 2 weeks" → `DATEADD(DATETIME('today'), -2, week)`.
302+
"""
303+
_delta = int(delta) if delta else 1
304+
if scope.lower() == "this":
305+
return f"DATETIME('{relative_base}')"
306+
elif scope.lower() in ["last", "prior"]:
307+
return f"DATEADD(DATETIME('{relative_base}'), -{_delta}, {unit})"
308+
elif scope.lower() == "next":
309+
return f"DATEADD(DATETIME('{relative_base}'), {_delta}, {unit})"
310+
else:
311+
raise ValueError(f"Invalid scope: {scope}")
312+
313+
146314
def get_since_until( # pylint: disable=too-many-arguments,too-many-locals,too-many-branches,too-many-statements # noqa: C901
147315
time_range: str | None = None,
148316
since: str | None = None,
@@ -241,21 +409,28 @@ def get_since_until( # pylint: disable=too-many-arguments,too-many-locals,too-m
241409
if time_range and separator in time_range:
242410
time_range_lookup = [
243411
(
244-
r"^last\s+(day|week|month|quarter|year)$",
245-
lambda unit: f"DATEADD(DATETIME('{_relative_start}'), -1, {unit})",
246-
),
247-
(
248-
r"^last\s+([0-9]+)\s+(second|minute|hour|day|week|month|year)s?$",
249-
lambda delta,
250-
unit: f"DATEADD(DATETIME('{_relative_start}'), -{int(delta)}, {unit})", # pylint: disable=line-too-long,useless-suppression
412+
r"^(start of|beginning of|end of)\s+"
413+
r"(this|last|next|prior)\s+"
414+
r"([0-9]+)?\s*"
415+
r"(day|week|month|quarter|year)s?$", # Matches phrases like "start of next month" # pylint: disable=line-too-long,useless-suppression # noqa: E501
416+
lambda modifier, scope, delta, unit: handle_modifier_and_unit(
417+
modifier,
418+
scope,
419+
delta,
420+
unit,
421+
get_relative_base(unit, relative_start),
422+
),
251423
),
252424
(
253-
r"^next\s+([0-9]+)\s+(second|minute|hour|day|week|month|year)s?$",
254-
lambda delta,
255-
unit: f"DATEADD(DATETIME('{_relative_end}'), {int(delta)}, {unit})", # pylint: disable=line-too-long,useless-suppression
425+
r"^(this|last|next|prior)\s+"
426+
r"([0-9]+)?\s*"
427+
r"(second|minute|day|week|month|quarter|year)s?$", # Matches "next 5 days" or "last 2 weeks" # pylint: disable=line-too-long,useless-suppression # noqa: E501
428+
lambda scope, delta, unit: handle_scope_and_unit(
429+
scope, delta, unit, get_relative_base(unit, relative_start)
430+
),
256431
),
257432
(
258-
r"^(DATETIME.*|DATEADD.*|DATETRUNC.*|LASTDAY.*|HOLIDAY.*)$",
433+
r"^(DATETIME.*|DATEADD.*|DATETRUNC.*|LASTDAY.*|HOLIDAY.*)$", # Matches date-related keywords # pylint: disable=line-too-long,useless-suppression # noqa: E501
259434
lambda text: text,
260435
),
261436
]

tests/unit_tests/utils/date_parser_tests.py

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -92,6 +92,46 @@ def test_get_since_until() -> None:
9292
expected = datetime(2016, 11, 6), datetime(2016, 11, 8)
9393
assert result == expected
9494

95+
result = get_since_until(" : now")
96+
expected = None, datetime(2016, 11, 7, 9, 30, 10)
97+
assert result == expected
98+
99+
result = get_since_until(" : last 2 minutes")
100+
expected = None, datetime(2016, 11, 7, 9, 28, 10)
101+
assert result == expected
102+
103+
result = get_since_until(" : prior 2 minutes")
104+
expected = None, datetime(2016, 11, 7, 9, 28, 10)
105+
assert result == expected
106+
107+
result = get_since_until(" : next 2 minutes")
108+
expected = None, datetime(2016, 11, 7, 9, 32, 10)
109+
assert result == expected
110+
111+
result = get_since_until("start of this month : ")
112+
expected = datetime(2016, 11, 1), None
113+
assert result == expected
114+
115+
result = get_since_until("start of next month : ")
116+
expected = datetime(2016, 12, 1), None
117+
assert result == expected
118+
119+
result = get_since_until("end of this month : ")
120+
expected = datetime(2016, 11, 30), None
121+
assert result == expected
122+
123+
result = get_since_until("end of next month : ")
124+
expected = datetime(2016, 12, 31), None
125+
assert result == expected
126+
127+
result = get_since_until("beginning of next year : ")
128+
expected = datetime(2017, 1, 1), None
129+
assert result == expected
130+
131+
result = get_since_until("beginning of last year : ")
132+
expected = datetime(2015, 1, 1), None
133+
assert result == expected
134+
95135
result = get_since_until("2018-01-01T00:00:00 : 2018-12-31T23:59:59")
96136
expected = datetime(2018, 1, 1), datetime(2018, 12, 31, 23, 59, 59)
97137
assert result == expected

0 commit comments

Comments
 (0)