diff --git a/python/cucumber_expressions/argument.py b/python/cucumber_expressions/argument.py index 040c55456..5f79483b7 100644 --- a/python/cucumber_expressions/argument.py +++ b/python/cucumber_expressions/argument.py @@ -1,42 +1,52 @@ from __future__ import annotations -from typing import Optional, List +from typing import Optional, List, Tuple -from cucumber_expressions.group import Group from cucumber_expressions.parameter_type import ParameterType -from cucumber_expressions.tree_regexp import TreeRegexp +from cucumber_expressions.tree_regexp import TreeRegexp, Group from cucumber_expressions.errors import CucumberExpressionError class Argument: - def __init__(self, group, parameter_type): - self._group: Group = group - self.parameter_type: ParameterType = parameter_type + def __init__( + self, group: Group, parameter_type: ParameterType, name: Optional[str] + ): + self.group = group + self.parameter_type = parameter_type + self.name = name @staticmethod def build( - tree_regexp: TreeRegexp, text: str, parameter_types: List + tree_regexp: TreeRegexp, + text: str, + parameter_types_and_names: List[Tuple[ParameterType, Optional[str]]], ) -> Optional[List[Argument]]: + # Check if all elements in parameter_types_and_names are tuples + for item in parameter_types_and_names: + if not isinstance(item, tuple) or len(item) != 2: + raise CucumberExpressionError( + f"Expected a tuple of (ParameterType, Optional[str]), but got {type(item)}: {item}" + ) + match_group = tree_regexp.match(text) if not match_group: return None arg_groups = match_group.children - if len(arg_groups) != len(parameter_types): + if len(arg_groups) != len(parameter_types_and_names): + param_count = len(parameter_types_and_names) raise CucumberExpressionError( - f"Group has {len(arg_groups)} capture groups, but there were {len(parameter_types)} parameter types" + f"Group has {len(arg_groups)} capture groups, but there were {param_count} parameter types/names" ) return [ - Argument(arg_group, parameter_type) - for parameter_type, arg_group in zip(parameter_types, arg_groups) + Argument(arg_group, parameter_type, parameter_name) + for (parameter_type, parameter_name), arg_group in zip( + parameter_types_and_names, arg_groups + ) ] @property def value(self): return self.parameter_type.transform(self.group.values if self.group else None) - - @property - def group(self): - return self._group diff --git a/python/cucumber_expressions/ast.py b/python/cucumber_expressions/ast.py index f6ddb390f..9db38f026 100644 --- a/python/cucumber_expressions/ast.py +++ b/python/cucumber_expressions/ast.py @@ -1,7 +1,7 @@ from __future__ import annotations from enum import Enum -from typing import Optional, List +from typing import Optional, List, Any class NodeType(Enum): @@ -78,7 +78,7 @@ def end(self) -> int: def text(self) -> str: return self.token or "".join([node_value.text for node_value in self.nodes]) - def to_json(self): + def to_json(self) -> dict[str, Any]: json_obj = {"type": self.ast_type.value} if self.nodes is not None: json_obj["nodes"] = [node_value.to_json() for node_value in self.nodes] @@ -140,7 +140,7 @@ def type_of(char: str) -> TokenType: return TokenType.TEXT @staticmethod - def symbol_of(token: TokenType): + def symbol_of(token: TokenType) -> str: possible_token_character_key = token.name + "_CHARACTER" if any( e.name @@ -151,7 +151,7 @@ def symbol_of(token: TokenType): return "" @staticmethod - def purpose_of(token: TokenType): + def purpose_of(token: TokenType) -> str: if token in [TokenType.BEGIN_OPTIONAL, TokenType.END_OPTIONAL]: return "optional text" if token in [TokenType.BEGIN_PARAMETER, TokenType.END_PARAMETER]: @@ -160,7 +160,7 @@ def purpose_of(token: TokenType): return "alternation" return "" - def to_json(self): + def to_json(self) -> dict[str, Any]: return { "type": self.ast_type.value, "text": self.text, diff --git a/python/cucumber_expressions/expression.py b/python/cucumber_expressions/expression.py index 8dbe75b01..9fc4e743b 100644 --- a/python/cucumber_expressions/expression.py +++ b/python/cucumber_expressions/expression.py @@ -1,33 +1,34 @@ -from typing import Optional, List +from typing import Optional, List, Tuple from cucumber_expressions.argument import Argument from cucumber_expressions.ast import Node, NodeType from cucumber_expressions.expression_parser import CucumberExpressionParser from cucumber_expressions.parameter_type import ParameterType +from cucumber_expressions.parameter_type_registry import ParameterTypeRegistry from cucumber_expressions.tree_regexp import TreeRegexp from cucumber_expressions.errors import ( - UndefinedParameterTypeError, ParameterIsNotAllowedInOptional, OptionalIsNotAllowedInOptional, OptionalMayNotBeEmpty, AlternativeMayNotBeEmpty, AlternativeMayNotExclusivelyContainOptionals, + UndefinedParameterTypeError, ) ESCAPE_PATTERN = rb"([\\^\[({$.|?*+})\]])" class CucumberExpression: - def __init__(self, expression, parameter_type_registry): + def __init__(self, expression: str, parameter_type_registry: ParameterTypeRegistry): self.expression = expression self.parameter_type_registry = parameter_type_registry - self.parameter_types: List[ParameterType] = [] + self.parameter_types_and_names: List[Tuple[ParameterType, Optional[str]]] = [] self.tree_regexp = TreeRegexp( self.rewrite_to_regex(CucumberExpressionParser().parse(self.expression)) ) def match(self, text: str) -> Optional[List[Argument]]: - return Argument.build(self.tree_regexp, text, self.parameter_types) + return Argument.build(self.tree_regexp, text, self.parameter_types_and_names) @property def source(self): @@ -57,23 +58,21 @@ def rewrite_to_regex(self, node: Node): def escape_regex(expression) -> str: return expression.translate({i: "\\" + chr(i) for i in ESCAPE_PATTERN}) - def rewrite_optional(self, node: Node): - _possible_node_with_params = self.get_possible_node_with_parameters(node) - if _possible_node_with_params: + def rewrite_optional(self, node: Node) -> str: + if self.get_possible_node_with_parameters(node): raise ParameterIsNotAllowedInOptional( - _possible_node_with_params, self.expression + self.get_possible_node_with_parameters(node), self.expression ) - _possible_node_with_optionals = self.get_possible_node_with_optionals(node) - if _possible_node_with_optionals: + if self.get_possible_node_with_optionals(node): raise OptionalIsNotAllowedInOptional( - _possible_node_with_optionals, self.expression + self.get_possible_node_with_optionals(node), self.expression ) if self.are_nodes_empty(node): raise OptionalMayNotBeEmpty(node, self.expression) regex = "".join([self.rewrite_to_regex(_node) for _node in node.nodes]) return rf"(?:{regex})?" - def rewrite_alternation(self, node: Node): + def rewrite_alternation(self, node: Node) -> str: for alternative in node.nodes: if not alternative.nodes: raise AlternativeMayNotBeEmpty(alternative, self.expression) @@ -87,20 +86,34 @@ def rewrite_alternation(self, node: Node): def rewrite_alternative(self, node: Node): return "".join([self.rewrite_to_regex(_node) for _node in node.nodes]) - def rewrite_parameter(self, node: Node): + def rewrite_parameter(self, node: Node) -> str: name = node.text - parameter_type = self.parameter_type_registry.lookup_by_type_name(name) + group_name, parameter_type = self.parse_parameter_name(name) if not parameter_type: raise UndefinedParameterTypeError(node, self.expression, name) - self.parameter_types.append(parameter_type) + self.parameter_types_and_names.append((parameter_type, group_name)) regexps = parameter_type.regexps if len(regexps) == 1: return rf"({regexps[0]})" return rf"((?:{')|(?:'.join(regexps)}))" + def parse_parameter_name( + self, name: str + ) -> Tuple[Optional[str], Optional[ParameterType]]: + """Helper function to parse the parameter name and return group_name and parameter_type.""" + if ":" in name: + group_name, parameter_type_name = [part.strip() for part in name.split(":")] + parameter_type = self.parameter_type_registry.lookup_by_type_name( + parameter_type_name + ) + else: + group_name = None + parameter_type = self.parameter_type_registry.lookup_by_type_name(name) + return group_name, parameter_type + def rewrite_expression(self, node: Node): regex = "".join([self.rewrite_to_regex(_node) for _node in node.nodes]) return rf"^{regex}$" diff --git a/python/cucumber_expressions/expression_factory.py b/python/cucumber_expressions/expression_factory.py new file mode 100644 index 000000000..58cfa1e6a --- /dev/null +++ b/python/cucumber_expressions/expression_factory.py @@ -0,0 +1,39 @@ +import re + +from cucumber_expressions.expression import CucumberExpression +from cucumber_expressions.parameter_type_registry import ParameterTypeRegistry +from cucumber_expressions.regular_expression import RegularExpression + +CURLY_BRACKET_PATTERN = re.compile(r"{(.*?)}") +INVALID_CURLY_PATTERN = re.compile(r"^\d+(?:,\d+)?$") + + +class ExpressionFactory: + def __init__( + self, parameter_type_registry: ParameterTypeRegistry = ParameterTypeRegistry() + ): + self.parameter_type_registry = parameter_type_registry + + @staticmethod + def _has_curly_brackets(string: str) -> bool: + return "{" in string and "}" in string + + @staticmethod + def _extract_text_in_curly_brackets(string: str) -> list: + return CURLY_BRACKET_PATTERN.findall(string) + + def is_cucumber_expression(self, expression_string: str): + if not self._has_curly_brackets(expression_string): + return False + bracket_texts = self._extract_text_in_curly_brackets(expression_string) + # Check if any match does not contain an integer or an integer and a comma + for text in bracket_texts: + # Check if the match is a regex pattern (matches integer or integer-comma pattern) + if INVALID_CURLY_PATTERN.match(text): + return False # Found a form of curly bracket + return True # All curly brackets are valid + + def create_expression(self, expression_string: str): + if self.is_cucumber_expression(expression_string): + return CucumberExpression(expression_string, self.parameter_type_registry) + return RegularExpression(expression_string, self.parameter_type_registry) diff --git a/python/cucumber_expressions/expression_generator.py b/python/cucumber_expressions/expression_generator.py index 116e4523f..34595ad22 100644 --- a/python/cucumber_expressions/expression_generator.py +++ b/python/cucumber_expressions/expression_generator.py @@ -8,10 +8,11 @@ from cucumber_expressions.combinatorial_generated_expression_factory import ( CombinatorialGeneratedExpressionFactory, ) +from cucumber_expressions.parameter_type_registry import ParameterTypeRegistry class CucumberExpressionGenerator: - def __init__(self, parameter_type_registry): + def __init__(self, parameter_type_registry: ParameterTypeRegistry): self.parameter_type_registry = parameter_type_registry def generate_expressions(self, text: str) -> List[GeneratedExpression]: diff --git a/python/cucumber_expressions/expression_parser.py b/python/cucumber_expressions/expression_parser.py index bf755fdbd..8fd71c4f9 100644 --- a/python/cucumber_expressions/expression_parser.py +++ b/python/cucumber_expressions/expression_parser.py @@ -1,6 +1,6 @@ from __future__ import annotations -from typing import NamedTuple, Optional, Callable, List +from typing import NamedTuple, Optional, Callable, List, Tuple, Union from cucumber_expressions.ast import Token, TokenType, Node, NodeType from cucumber_expressions.errors import ( @@ -171,7 +171,7 @@ def parse_between( begin_token: TokenType, end_token: TokenType, parsers: List, - ) -> Callable[[Parser], Result | tuple[int, Node]]: + ) -> Callable[[Parser], Union[Result, Tuple[int, Node]]]: def _parse_between(parser: Parser): if not self.looking_at(parser.tokens, parser.current, begin_token): return Result(0, None) @@ -221,7 +221,7 @@ def parse_tokens_until( tokens: List[Token], start_at: int, end_tokens: List[TokenType], - ) -> tuple[int, List[Node]]: + ) -> Tuple[int, List[Node]]: current = start_at size = len(tokens) ast: List[Node] = [] diff --git a/python/cucumber_expressions/group.py b/python/cucumber_expressions/group.py index 06c1a88d2..22ace3752 100644 --- a/python/cucumber_expressions/group.py +++ b/python/cucumber_expressions/group.py @@ -1,14 +1,22 @@ from __future__ import annotations -from typing import List +from typing import List, Optional class Group: - def __init__(self, value: str, start: int, end: int, children: List[Group]): + def __init__( + self, + value: str, + start: int, + end: int, + children: List[Group], + name: Optional[str] = None, + ): self._children = children self._value = value self._start = start self._end = end + self.name = name @property def value(self): diff --git a/python/cucumber_expressions/group_builder.py b/python/cucumber_expressions/group_builder.py index 793151105..543efaeb3 100644 --- a/python/cucumber_expressions/group_builder.py +++ b/python/cucumber_expressions/group_builder.py @@ -16,12 +16,16 @@ def __init__(self): def add(self, group_builder: GroupBuilder): self._group_builders.append(group_builder) - def build(self, match, group_indices) -> Group: + def build(self, match, group_indices, group_name_map: dict) -> Group: group_index = next(group_indices) - children: List[Group] = [ - gb.build(match, group_indices) for gb in self._group_builders + group_name = group_name_map.get(group_index, None) + + children = [ + gb.build(match, group_indices, group_name_map) + for gb in self._group_builders ] return Group( + name=group_name, value=match.group(group_index), start=match.regs[group_index][0], end=match.regs[group_index][1], diff --git a/python/cucumber_expressions/parameter_type.py b/python/cucumber_expressions/parameter_type.py index 751835ec1..7b5588ce1 100644 --- a/python/cucumber_expressions/parameter_type.py +++ b/python/cucumber_expressions/parameter_type.py @@ -1,7 +1,7 @@ from __future__ import annotations import re -from typing import Callable, Optional, Pattern +from typing import Callable, Optional, Pattern, Union, List from cucumber_expressions.errors import CucumberExpressionError @@ -44,8 +44,8 @@ def compare(pt1: ParameterType, pt2: ParameterType): def __init__( self, - name, - regexp, + name: str | None, + regexp: Union[List[str], str, List[Pattern], Pattern], type, transformer: Optional[Callable] = None, use_for_snippets: bool = True, @@ -96,9 +96,11 @@ def _get_regexp_source(regexp_pattern: Pattern) -> str: ) return regexp_pattern.pattern - def to_array(self, regexps: list[str] | str | list[Pattern] | Pattern) -> list[str]: + def to_array( + self, regexps: Union[List[str], str, List[Pattern], Pattern] + ) -> List[str]: """Make a list of regexps if not already""" - array: list = regexps if isinstance(regexps, list) else [regexps] + array: List = regexps if isinstance(regexps, list) else [regexps] return [ regexp if isinstance(regexp, str) else self._get_regexp_source(regexp) for regexp in array diff --git a/python/cucumber_expressions/parameter_type_registry.py b/python/cucumber_expressions/parameter_type_registry.py index e8fe20c4e..7dff14ee3 100644 --- a/python/cucumber_expressions/parameter_type_registry.py +++ b/python/cucumber_expressions/parameter_type_registry.py @@ -1,9 +1,8 @@ import functools import re from decimal import Decimal -from typing import Optional, List +from typing import Optional, List, Union -from cucumber_expressions.expression_generator import CucumberExpressionGenerator from cucumber_expressions.parameter_type import ParameterType from cucumber_expressions.errors import ( CucumberExpressionError, @@ -85,13 +84,23 @@ def lookup_by_type_name(self, name: str) -> Optional[ParameterType]: return self.parameter_type_by_name.get(name) def lookup_by_regexp( - self, parameter_type_regexp: str, expression_regexp, text: str + self, + parameter_type_regexp: str, + expression_regexp: Union[str, re.Pattern], + text: str, ): - raw_regex = rf"{parameter_type_regexp}" - parameter_types = self.parameter_types_by_regexp.get(raw_regex) + """ + Lookup and match the text using parameter types, then transform the results. + Supports both named and unnamed capture groups. + """ + parameter_types = self.parameter_types_by_regexp.get(parameter_type_regexp) if not parameter_types: return None if len(parameter_types) > 1 and not parameter_types[0].prefer_for_regexp_match: + from cucumber_expressions.expression_generator import ( + CucumberExpressionGenerator, + ) + generated_expressions = CucumberExpressionGenerator( self ).generate_expressions(text) diff --git a/python/cucumber_expressions/regular_expression.py b/python/cucumber_expressions/regular_expression.py index dd8e7a3d8..847e7f3c1 100644 --- a/python/cucumber_expressions/regular_expression.py +++ b/python/cucumber_expressions/regular_expression.py @@ -1,11 +1,14 @@ import re -from typing import Optional, List +from collections.abc import Generator +from typing import Optional, Union, List from cucumber_expressions.argument import Argument from cucumber_expressions.parameter_type import ParameterType from cucumber_expressions.parameter_type_registry import ParameterTypeRegistry from cucumber_expressions.tree_regexp import TreeRegexp +NAMED_CAPTURE_GROUP_REGEX = re.compile(r"\?P<([^>]+)>") + class RegularExpression: """Creates a new instance. Use this when the transform types are not known in advance, @@ -13,7 +16,9 @@ class RegularExpression: dynamically typed languages.""" def __init__( - self, expression_regexp, parameter_type_registry: ParameterTypeRegistry + self, + expression_regexp: Union[re.Pattern, str], + parameter_type_registry: ParameterTypeRegistry, ): """Creates a new instance. Use this when the transform types are not known in advance, and should be determined by the regular expression's capture groups. Use this with @@ -28,19 +33,57 @@ def __init__( self.tree_regexp: TreeRegexp = TreeRegexp(self.expression_regexp.pattern) def match(self, text) -> Optional[List[Argument]]: - return Argument.build( - self.tree_regexp, text, list(self.generate_parameter_types(text)) + # Convert the generator to a list before passing it to Argument.build + parameter_types_and_names = list( + (parameter_type, capture_name) + for parameter_type, capture_name in self.generate_parameter_types(text) ) + return Argument.build(self.tree_regexp, text, parameter_types_and_names) + + @staticmethod + def _remove_named_groups(pattern: str) -> str: + """ + Remove named capture groups from the regex pattern using precompiled regex. + """ + return NAMED_CAPTURE_GROUP_REGEX.sub("", pattern) + + def _process_capture_group(self, group_source: str): + """ + Check if the capture group is named and extract the name. + If it's a named capture group, return the name and the modified regex. + """ + # Check for named capture group using the precompiled regex + match = NAMED_CAPTURE_GROUP_REGEX.match(group_source) + + if match: + # Extract the name of the capture group + capture_group_name = match.group(1) + # Remove the named group part using the precompiled regex + cleaned_pattern = self._remove_named_groups(group_source) + return capture_group_name, cleaned_pattern + else: + # No named group, just return the original pattern + return None, group_source - def generate_parameter_types(self, text): + def generate_parameter_types(self, text) -> Generator: for group_builder in self.tree_regexp.group_builder.children: + # Extract the raw source for the group parameter_type_regexp = group_builder.source + + # Process the capture group (check if it's named and clean the pattern) + capture_name, cleaned_pattern = self._process_capture_group( + parameter_type_regexp + ) + + # Lookup the parameter type using the stripped capture group possible_regexp = self.parameter_type_registry.lookup_by_regexp( - parameter_type_regexp, self.expression_regexp, text + cleaned_pattern, self.expression_regexp, text ) - yield possible_regexp or ParameterType( - None, parameter_type_regexp, str, lambda *s: s[0], False, False + + parameter_type = possible_regexp or ParameterType( + capture_name, cleaned_pattern, str, lambda *s: s[0], False, False ) + yield parameter_type, capture_name @property def regexp(self): diff --git a/python/cucumber_expressions/tree_regexp.py b/python/cucumber_expressions/tree_regexp.py index cda2997aa..a4d6fca7d 100644 --- a/python/cucumber_expressions/tree_regexp.py +++ b/python/cucumber_expressions/tree_regexp.py @@ -1,23 +1,23 @@ import re -from typing import List, Pattern +from typing import Pattern, Union, Optional, List from cucumber_expressions.ast import EscapeCharacters +from cucumber_expressions.group import Group from cucumber_expressions.group_builder import GroupBuilder class TreeRegexp: - def __init__(self, regexp: str): + def __init__(self, regexp: Union[Pattern[str], str]): self.regexp = regexp if isinstance(regexp, Pattern) else re.compile(regexp) - self._group_builder = None - if not self._group_builder: - self._group_builder = self.create_group_builder(self.regexp) + self.group_builder = self.create_group_builder(self.regexp) - def match(self, string: str): + def match(self, string: str) -> Optional[Group]: matches = self.regexp.match(string) if not matches: return None group_indices = range(len(matches.groups()) + 1) - return self.group_builder.build(matches, iter(group_indices)) + group_names_map = {v: k for k, v in self.regexp.groupindex.items()} + return self.group_builder.build(matches, iter(group_indices), group_names_map) def create_group_builder(self, regexp): source = regexp.pattern @@ -25,6 +25,7 @@ def create_group_builder(self, regexp): group_start_stack = [] escaping: bool = False char_class: bool = False + for index, char in enumerate(source): if char == "[" and not escaping: char_class = True @@ -35,6 +36,11 @@ def create_group_builder(self, regexp): group_builder = GroupBuilder() if self.is_non_capturing(source, index): group_builder.capturing = False + elif self.is_named_group(source, index): + group_builder.capturing = True + # Handle named groups here (mark their names) + group_name = self.extract_named_group_name(source, index) + group_builder.name = group_name stack.append(group_builder) elif char == ")" and not escaping and not char_class: group_builder = stack.pop() @@ -51,22 +57,30 @@ def create_group_builder(self, regexp): return stack.pop() @staticmethod - def is_non_capturing(source, index): - # Regex is valid. Bounds check not required. + def is_named_group(source: str, index: int) -> bool: + """ + Check if the group at the given index is a named capturing group, e.g. (?P...). + """ + return source[index + 1 : index + 3] == "P<" and source[index + 3] != "?" + + @staticmethod + def extract_named_group_name(source: str, index: int) -> str: + """ + Extract the name of a named capturing group, e.g., (?P...) returns "name". + """ + group_name_start = index + 3 + group_name_end = source.find(">", group_name_start) + return source[group_name_start:group_name_end] + + @staticmethod + def is_non_capturing(source: str, index: int) -> bool: + # Check if it's a non-capturing group like (?:...) if source[index + 1] != "?": - # (X) return False - if source[index + 2] != "<": - # (?:X) - # (?idmsuxU-idmsuxU) - # (?idmsux-idmsux:X) - # (?=X) - # (?!X) - # (?>X) - return True - # (?<=X) or (?X) - return source[index + 3] in ["=", "!"] - @property - def group_builder(self): - return self._group_builder + # If it's a named group (e.g., (?P...)), it's still a capturing group + if source[index + 2] == "P" and source[index + 3] == "<": + return False # Named capturing group, should return False (it's capturing) + + # Otherwise, it's a non-capturing group (e.g., (?:...), (?=...), etc.) + return True diff --git a/python/tests/test_argument.py b/python/tests/test_argument.py index 44413b57c..3f836d831 100644 --- a/python/tests/test_argument.py +++ b/python/tests/test_argument.py @@ -10,6 +10,6 @@ def test_exposes_parameter_type(self): arguments = Argument.build( tree_regexp, "three blind mice", - [parameter_type_registry.lookup_by_type_name("string")], + [(parameter_type_registry.lookup_by_type_name("string"), None)], ) assert arguments[0].parameter_type.name == "string" diff --git a/python/tests/test_expression.py b/python/tests/test_expression.py index 0e5f40e2e..acc18866c 100644 --- a/python/tests/test_expression.py +++ b/python/tests/test_expression.py @@ -1,5 +1,6 @@ from decimal import Decimal from pathlib import Path +from typing import Optional, Any, Tuple from tests.definitions import TESTDATA_ROOT_DIR @@ -12,18 +13,14 @@ def get_expectation_yamls(): yaml_dir = Path(TESTDATA_ROOT_DIR) / "cucumber-expression" / "matching" - return [ - Path(yaml_dir) / file - for file in Path(yaml_dir).iterdir() - if file.suffix == ".yaml" - ] + return [yaml_dir / file for file in yaml_dir.iterdir() if file.suffix == ".yaml"] def match( expression: str, match_text: str, parameter_registry: ParameterTypeRegistry = ParameterTypeRegistry(), -): +) -> Optional[Tuple[Any, str]]: cucumber_expression = CucumberExpression(expression, parameter_registry) matches = cucumber_expression.match(match_text) @@ -34,7 +31,7 @@ def transform_value(value): return str(value) return value - return matches and [transform_value(arg.value) for arg in matches] + return matches and [(transform_value(arg.value), arg.name) for arg in matches] class TestCucumberExpression: @@ -47,11 +44,24 @@ def test_cucumber_expression_matches(self, load_test_yamls: dict): assert excinfo.value.args[0] == expectation["exception"] else: values = match(expectation["expression"], expectation["text"]) - assert values == expectation["expected_args"] + actual_result = None if values is None else [value[0] for value in values] + assert actual_result == expectation["expected_args"] def test_documents_match_arguments(self): values = match("I have {int} cuke(s)", "I have 7 cukes") - assert values[0] == 7 + assert values[0] == (7, None) + + def test_documents_match_arguments_with_names(self): + values = match("I have {cuke_count:int} cuke(s)", "I have 7 cukes") + assert values[0] == (7, "cuke_count") + + def test_documents_match_arguments_with_names_and_spaces(self): + values = match( + "I have { cuke_count : int } cuke(s) and {gherkin_count: int} gherkin(s)", + "I have 7 cukes and 4 gherkins", + ) + assert values[0] == (7, "cuke_count") + assert values[1] == (4, "gherkin_count") def test_matches_float(self): assert match("{float}", "") is None @@ -63,40 +73,44 @@ def test_matches_float(self): assert match("{float}", ",1") is None assert match("{float}", "1.") is None - assert match("{float}", "1") == [1] - assert match("{float}", "-1") == [-1] - assert match("{float}", "1.1") == [1.1] + assert match("{float}", "1") == [(1, None)] + assert match("{float}", "-1") == [(-1, None)] + assert match("{float}", "1.1") == [(1.1, None)] assert match("{float}", "1,000") is None assert match("{float}", "1,000,0") is None assert match("{float}", "1,000.1") is None assert match("{float}", "1,000,10") is None assert match("{float}", "1,0.1") is None assert match("{float}", "1,000,000.1") is None - assert match("{float}", "-1.1") == [-1.1] + assert match("{float}", "-1.1") == [(-1.1, None)] - assert match("{float}", ".1") == [0.1] - assert match("{float}", "-.1") == [-0.1] - assert match("{float}", "-.1000001") == [-0.1000001] - assert match("{float}", "1E1") == [10.0] - assert match("{float}", ".1E1") == [1] + assert match("{float}", ".1") == [(0.1, None)] + assert match("{float}", "-.1") == [(-0.1, None)] + assert match("{float}", "-.1000001") == [(-0.1000001, None)] + assert match("{float}", "1E1") == [(10.0, None)] + assert match("{float}", ".1E1") == [(1, None)] assert match("{float}", "E1") is None - assert match("{float}", "-.1E-1") == [-0.01] - assert match("{float}", "-.1E-2") == [-0.001] - assert match("{float}", "-.1E+1") == [-1] - assert match("{float}", "-.1E+2") == [-10] - assert match("{float}", "-.1E1") == [-1] - assert match("{float}", "-.1E2") == [-10] + assert match("{float}", "-.1E-1") == [(-0.01, None)] + assert match("{float}", "-.1E-2") == [(-0.001, None)] + assert match("{float}", "-.1E+1") == [(-1, None)] + assert match("{float}", "-.1E+2") == [(-10, None)] + assert match("{float}", "-.1E1") == [(-1, None)] + assert match("{float}", "-.1E2") == [(-10, None)] def test_float_with_zero(self): - assert match("{float}", "0") == [0.0] + assert match("{float}", "0") == [(0.0, None)] def test_matches_anonymous(self): - assert match("{}", "0.22") == ["0.22"] + assert match("{}", "0.22") == [("0.22", None)] def test_exposes_source(self): expr = "I have {int} cuke(s)" assert CucumberExpression(expr, ParameterTypeRegistry()).source == expr + def test_with_name_exposes_source(self): + expr = "I have {cuke_count:int} cuke(s)" + assert CucumberExpression(expr, ParameterTypeRegistry()).source == expr + def test_unmatched_optional_groups_have_undefined_values(self): parameter_type_registry = ParameterTypeRegistry() parameter_type_registry.define_parameter_type( @@ -110,11 +124,11 @@ def test_unmatched_optional_groups_have_undefined_values(self): ) ) - assert match("{textAndOrNumber}", "TLA", parameter_type_registry)[0] == [ - "TLA", + assert match("{textAndOrNumber}", "TLA", parameter_type_registry)[0] == ( + ["TLA", None], None, - ] - assert match("{textAndOrNumber}", "123", parameter_type_registry)[0] == [ + ) + assert match("{textAndOrNumber}", "123", parameter_type_registry)[0] == ( + [None, "123"], None, - "123", - ] + ) diff --git a/python/tests/test_expression_factory.py b/python/tests/test_expression_factory.py new file mode 100644 index 000000000..74a5f79a9 --- /dev/null +++ b/python/tests/test_expression_factory.py @@ -0,0 +1,31 @@ +from cucumber_expressions.expression import CucumberExpression +from cucumber_expressions.expression_factory import ExpressionFactory +from cucumber_expressions.regular_expression import RegularExpression + + +def test_expression_factory_regex(): + input_str = r"I have (?P\d+) cukes? in my (?P\w+) now" + expression = ExpressionFactory().create_expression(input_str) + assert isinstance(expression, RegularExpression) + matches = expression.match("I have 4 cukes in my belly now") + assert matches[0].value == 4 + assert matches[0].name == "cuke_count" + assert matches[1].value == "belly" + assert matches[1].name == "word" + + +def test_expression_factory_cucumber_expression(): + input_str = "I have {name:int} cukes in my {string} now" + expression = ExpressionFactory().create_expression(input_str) + assert isinstance(expression, CucumberExpression) + matches = expression.match('I have 4 cukes in my "belly" now') + assert matches[0].value == 4 + assert matches[0].name == "name" + assert matches[1].value == "belly" + assert matches[1].name is None + + +def test_expression_factory_invalid(): + input_str = r"^(?:(\d{2,4})-)?(\d{1,3})\s*([A-Za-z]{3})\s*(?:\{(\d+,\d+|\d+)\})?(\d{1,2})(?:\{[A-Za-z0-9]+\})?$" + expression = ExpressionFactory().create_expression(input_str) + assert isinstance(expression, RegularExpression) diff --git a/python/tests/test_tree_regex.py b/python/tests/test_tree_regex.py index 2865ff50d..87941ad37 100644 --- a/python/tests/test_tree_regex.py +++ b/python/tests/test_tree_regex.py @@ -58,13 +58,25 @@ def test_ignores_atomic_non_capturing_group(self): tree_regexp = TreeRegexp("a(?=(?Pb))(?P=tmp)c") group = tree_regexp.match("abc") assert "abc" == group.value - assert 0 == len(group.children) + assert 1 == len(group.children) def test_matches_named_capturing_group(self): tree_regexp = TreeRegexp("a(?Pb)c$") group = tree_regexp.match("abc") assert "abc" == group.value - assert 0 == len(group.children) + assert 1 == len(group.children) + + def test_matches_named_capturing_group_returns_name(self): + tree_regexp = TreeRegexp(r"(a)(?Pb)(c)(?Pd)") + group = tree_regexp.match("abcd") + assert "abcd" == group.value + assert len(group.children) == 4 + assert group.children[0].value == "a" + assert group.children[1].value == "b" + assert group.children[1].name == "name" + assert group.children[2].value == "c" + assert group.children[3].value == "d" + assert group.children[3].name == "other" def test_matches_optional_group(self): tree_regexp = TreeRegexp("^Something( with an optional argument)?")