Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add named groups for python #316

Draft
wants to merge 7 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
40 changes: 25 additions & 15 deletions python/cucumber_expressions/argument.py
Original file line number Diff line number Diff line change
@@ -1,42 +1,52 @@
from __future__ import annotations

from typing import Optional, List
from typing import Optional, List, Tuple

from cucumber_expressions.group import Group
from cucumber_expressions.parameter_type import ParameterType
from cucumber_expressions.tree_regexp import TreeRegexp
from cucumber_expressions.tree_regexp import TreeRegexp, Group
from cucumber_expressions.errors import CucumberExpressionError


class Argument:
def __init__(self, group, parameter_type):
self._group: Group = group
self.parameter_type: ParameterType = parameter_type
def __init__(
self, group: Group, parameter_type: ParameterType, name: Optional[str]
):
self.group = group
self.parameter_type = parameter_type
self.name = name

@staticmethod
def build(
tree_regexp: TreeRegexp, text: str, parameter_types: List
tree_regexp: TreeRegexp,
text: str,
parameter_types_and_names: List[Tuple[ParameterType, Optional[str]]],
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

should this maybe be parameter_types_with_names which then would make sense because the name could often be nil (Which feels "right")

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good point!

) -> Optional[List[Argument]]:
# Check if all elements in parameter_types_and_names are tuples
for item in parameter_types_and_names:
if not isinstance(item, tuple) or len(item) != 2:
raise CucumberExpressionError(
f"Expected a tuple of (ParameterType, Optional[str]), but got {type(item)}: {item}"
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This error is very technical. What should a user do if/when they encounter this error?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Also do most users know what a tuple is?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good point - I'll have a think - this was mainly done for my benefit when debugging!

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Tuple is a common type used in Python - the type itself should make sense, but I'll review for clarity

Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Syntax: {param_name:param_type}
How do you represent the Optional[str] when param_type is not present? With None?
For simplicity sake, could it assume string type if param_type is omitted?
I think this error is a bit redundant because the parser should handle the errors/exceptions. Meaning that if the code is working properly you will never throw this error.

)

match_group = tree_regexp.match(text)
if not match_group:
return None

arg_groups = match_group.children

if len(arg_groups) != len(parameter_types):
if len(arg_groups) != len(parameter_types_and_names):
param_count = len(parameter_types_and_names)
raise CucumberExpressionError(
f"Group has {len(arg_groups)} capture groups, but there were {len(parameter_types)} parameter types"
f"Group has {len(arg_groups)} capture groups, but there were {param_count} parameter types/names"
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

think the ending of this error shouldn't be amended - the issue is still that there were an incorrect number of parameter types (The names being present / not is irrelevant for the length issue)

)

return [
Argument(arg_group, parameter_type)
for parameter_type, arg_group in zip(parameter_types, arg_groups)
Argument(arg_group, parameter_type, parameter_name)
for (parameter_type, parameter_name), arg_group in zip(
parameter_types_and_names, arg_groups
)
]

@property
def value(self):
return self.parameter_type.transform(self.group.values if self.group else None)

@property
def group(self):
return self._group
10 changes: 5 additions & 5 deletions python/cucumber_expressions/ast.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from __future__ import annotations

from enum import Enum
from typing import Optional, List
from typing import Optional, List, Any


class NodeType(Enum):
Expand Down Expand Up @@ -78,7 +78,7 @@ def end(self) -> int:
def text(self) -> str:
return self.token or "".join([node_value.text for node_value in self.nodes])

def to_json(self):
def to_json(self) -> dict[str, Any]:
json_obj = {"type": self.ast_type.value}
if self.nodes is not None:
json_obj["nodes"] = [node_value.to_json() for node_value in self.nodes]
Expand Down Expand Up @@ -140,7 +140,7 @@ def type_of(char: str) -> TokenType:
return TokenType.TEXT

@staticmethod
def symbol_of(token: TokenType):
def symbol_of(token: TokenType) -> str:
possible_token_character_key = token.name + "_CHARACTER"
if any(
e.name
Expand All @@ -151,7 +151,7 @@ def symbol_of(token: TokenType):
return ""

@staticmethod
def purpose_of(token: TokenType):
def purpose_of(token: TokenType) -> str:
if token in [TokenType.BEGIN_OPTIONAL, TokenType.END_OPTIONAL]:
return "optional text"
if token in [TokenType.BEGIN_PARAMETER, TokenType.END_PARAMETER]:
Expand All @@ -160,7 +160,7 @@ def purpose_of(token: TokenType):
return "alternation"
return ""

def to_json(self):
def to_json(self) -> dict[str, Any]:
return {
"type": self.ast_type.value,
"text": self.text,
Expand Down
45 changes: 29 additions & 16 deletions python/cucumber_expressions/expression.py
Original file line number Diff line number Diff line change
@@ -1,33 +1,34 @@
from typing import Optional, List
from typing import Optional, List, Tuple

from cucumber_expressions.argument import Argument
from cucumber_expressions.ast import Node, NodeType
from cucumber_expressions.expression_parser import CucumberExpressionParser
from cucumber_expressions.parameter_type import ParameterType
from cucumber_expressions.parameter_type_registry import ParameterTypeRegistry
from cucumber_expressions.tree_regexp import TreeRegexp
from cucumber_expressions.errors import (
UndefinedParameterTypeError,
ParameterIsNotAllowedInOptional,
OptionalIsNotAllowedInOptional,
OptionalMayNotBeEmpty,
AlternativeMayNotBeEmpty,
AlternativeMayNotExclusivelyContainOptionals,
UndefinedParameterTypeError,
)

ESCAPE_PATTERN = rb"([\\^\[({$.|?*+})\]])"


class CucumberExpression:
def __init__(self, expression, parameter_type_registry):
def __init__(self, expression: str, parameter_type_registry: ParameterTypeRegistry):
self.expression = expression
self.parameter_type_registry = parameter_type_registry
self.parameter_types: List[ParameterType] = []
self.parameter_types_and_names: List[Tuple[ParameterType, Optional[str]]] = []
self.tree_regexp = TreeRegexp(
self.rewrite_to_regex(CucumberExpressionParser().parse(self.expression))
)

def match(self, text: str) -> Optional[List[Argument]]:
return Argument.build(self.tree_regexp, text, self.parameter_types)
return Argument.build(self.tree_regexp, text, self.parameter_types_and_names)

@property
def source(self):
Expand Down Expand Up @@ -57,23 +58,21 @@ def rewrite_to_regex(self, node: Node):
def escape_regex(expression) -> str:
return expression.translate({i: "\\" + chr(i) for i in ESCAPE_PATTERN})

def rewrite_optional(self, node: Node):
_possible_node_with_params = self.get_possible_node_with_parameters(node)
if _possible_node_with_params:
def rewrite_optional(self, node: Node) -> str:
if self.get_possible_node_with_parameters(node):
raise ParameterIsNotAllowedInOptional(
_possible_node_with_params, self.expression
self.get_possible_node_with_parameters(node), self.expression
)
_possible_node_with_optionals = self.get_possible_node_with_optionals(node)
if _possible_node_with_optionals:
if self.get_possible_node_with_optionals(node):
raise OptionalIsNotAllowedInOptional(
_possible_node_with_optionals, self.expression
self.get_possible_node_with_optionals(node), self.expression
)
if self.are_nodes_empty(node):
raise OptionalMayNotBeEmpty(node, self.expression)
regex = "".join([self.rewrite_to_regex(_node) for _node in node.nodes])
return rf"(?:{regex})?"

def rewrite_alternation(self, node: Node):
def rewrite_alternation(self, node: Node) -> str:
for alternative in node.nodes:
if not alternative.nodes:
raise AlternativeMayNotBeEmpty(alternative, self.expression)
Expand All @@ -87,20 +86,34 @@ def rewrite_alternation(self, node: Node):
def rewrite_alternative(self, node: Node):
return "".join([self.rewrite_to_regex(_node) for _node in node.nodes])

def rewrite_parameter(self, node: Node):
def rewrite_parameter(self, node: Node) -> str:
name = node.text
parameter_type = self.parameter_type_registry.lookup_by_type_name(name)
group_name, parameter_type = self.parse_parameter_name(name)

if not parameter_type:
raise UndefinedParameterTypeError(node, self.expression, name)

self.parameter_types.append(parameter_type)
self.parameter_types_and_names.append((parameter_type, group_name))

regexps = parameter_type.regexps
if len(regexps) == 1:
return rf"({regexps[0]})"
return rf"((?:{')|(?:'.join(regexps)}))"

def parse_parameter_name(
self, name: str
) -> Tuple[Optional[str], Optional[ParameterType]]:
"""Helper function to parse the parameter name and return group_name and parameter_type."""
if ":" in name:
group_name, parameter_type_name = [part.strip() for part in name.split(":")]
parameter_type = self.parameter_type_registry.lookup_by_type_name(
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This allows for the empty group name, which is distinct from the None group name. Probably now what we want.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It might also be worth while to push this into the parser.

parameter_type_name
)
else:
group_name = None
parameter_type = self.parameter_type_registry.lookup_by_type_name(name)
return group_name, parameter_type

def rewrite_expression(self, node: Node):
regex = "".join([self.rewrite_to_regex(_node) for _node in node.nodes])
return rf"^{regex}$"
Expand Down
39 changes: 39 additions & 0 deletions python/cucumber_expressions/expression_factory.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
import re

from cucumber_expressions.expression import CucumberExpression
from cucumber_expressions.parameter_type_registry import ParameterTypeRegistry
from cucumber_expressions.regular_expression import RegularExpression

CURLY_BRACKET_PATTERN = re.compile(r"{(.*?)}")
INVALID_CURLY_PATTERN = re.compile(r"^\d+(?:,\d+)?$")


class ExpressionFactory:
def __init__(
self, parameter_type_registry: ParameterTypeRegistry = ParameterTypeRegistry()
):
self.parameter_type_registry = parameter_type_registry

@staticmethod
def _has_curly_brackets(string: str) -> bool:
return "{" in string and "}" in string

@staticmethod
def _extract_text_in_curly_brackets(string: str) -> list:
return CURLY_BRACKET_PATTERN.findall(string)

def is_cucumber_expression(self, expression_string: str):
Copy link
Contributor

@mpkorstanje mpkorstanje Jan 23, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think this check is simple enough. The primary constraint is explaining to people what is and is not a cucumber expression. For Java I eventually settled on requiring that all regular expressions start with ^ or end with $ and that everything else is a Cucumber expression. This is both simple and unambiguous.

This helps avoid a situation where a user makes a mistake in a Cucumber expression, causing Cucumber to think it is a regular expressions and then fail because the regular expression also isn't valid and results in a very cryptic error message.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is it worth standardising this check then across all flavours? I have no idea what we do in ruby as I've not dug into this stuff since the initial release some 4/5 years ago

Copy link
Contributor Author

@jsa34 jsa34 Jan 28, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The problem is that I cannot think of an easy way to distinguish a regex from a normal string in Python - the ^$ syntax isn't used, and are generally just strings.

Hence, I thought to try and identify the other way around - seeing if it's a Cucumber Expression. I realise now looking at it that just looking for curly bracket pairs as a discriminator for Cucumber Expressions doesn't fly, so this will need to be fixed.

I was a bit stuck here as I couldn't think of a reliable deterministic manner to distinguish between the two types, so input very welcome!

Copy link

@neskk neskk Jan 31, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think that you can still look for the ^/ $ indicators.
This is a nice rule that deterministically distinguishes cucumber-expressions from a regexp ones.

the ^$ syntax isn't used, and are generally just strings.

Pytest-BDD and other python BDD frameworks, when no specific parser is specified (default), they should check if ^/ $ are present and if so treat it as a regular expression. If not, treat it as a cucumber-expression.
This type of regexp step-def is different from using parsers.re, but I think it could be a replacement for it all together.

if not self._has_curly_brackets(expression_string):
return False
bracket_texts = self._extract_text_in_curly_brackets(expression_string)
# Check if any match does not contain an integer or an integer and a comma
for text in bracket_texts:
# Check if the match is a regex pattern (matches integer or integer-comma pattern)
if INVALID_CURLY_PATTERN.match(text):
return False # Found a form of curly bracket
return True # All curly brackets are valid

def create_expression(self, expression_string: str):
if self.is_cucumber_expression(expression_string):
return CucumberExpression(expression_string, self.parameter_type_registry)
return RegularExpression(expression_string, self.parameter_type_registry)
3 changes: 2 additions & 1 deletion python/cucumber_expressions/expression_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,11 @@
from cucumber_expressions.combinatorial_generated_expression_factory import (
CombinatorialGeneratedExpressionFactory,
)
from cucumber_expressions.parameter_type_registry import ParameterTypeRegistry


class CucumberExpressionGenerator:
def __init__(self, parameter_type_registry):
def __init__(self, parameter_type_registry: ParameterTypeRegistry):
self.parameter_type_registry = parameter_type_registry

def generate_expressions(self, text: str) -> List[GeneratedExpression]:
Expand Down
6 changes: 3 additions & 3 deletions python/cucumber_expressions/expression_parser.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from __future__ import annotations

from typing import NamedTuple, Optional, Callable, List
from typing import NamedTuple, Optional, Callable, List, Tuple, Union

from cucumber_expressions.ast import Token, TokenType, Node, NodeType
from cucumber_expressions.errors import (
Expand Down Expand Up @@ -171,7 +171,7 @@ def parse_between(
begin_token: TokenType,
end_token: TokenType,
parsers: List,
) -> Callable[[Parser], Result | tuple[int, Node]]:
) -> Callable[[Parser], Union[Result, Tuple[int, Node]]]:
def _parse_between(parser: Parser):
if not self.looking_at(parser.tokens, parser.current, begin_token):
return Result(0, None)
Expand Down Expand Up @@ -221,7 +221,7 @@ def parse_tokens_until(
tokens: List[Token],
start_at: int,
end_tokens: List[TokenType],
) -> tuple[int, List[Node]]:
) -> Tuple[int, List[Node]]:
current = start_at
size = len(tokens)
ast: List[Node] = []
Expand Down
12 changes: 10 additions & 2 deletions python/cucumber_expressions/group.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,22 @@
from __future__ import annotations

from typing import List
from typing import List, Optional


class Group:
def __init__(self, value: str, start: int, end: int, children: List[Group]):
def __init__(
self,
value: str,
start: int,
end: int,
children: List[Group],
name: Optional[str] = None,
):
self._children = children
self._value = value
self._start = start
self._end = end
self.name = name

@property
def value(self):
Expand Down
10 changes: 7 additions & 3 deletions python/cucumber_expressions/group_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,12 +16,16 @@ def __init__(self):
def add(self, group_builder: GroupBuilder):
self._group_builders.append(group_builder)

def build(self, match, group_indices) -> Group:
def build(self, match, group_indices, group_name_map: dict) -> Group:
group_index = next(group_indices)
children: List[Group] = [
gb.build(match, group_indices) for gb in self._group_builders
group_name = group_name_map.get(group_index, None)

children = [
gb.build(match, group_indices, group_name_map)
for gb in self._group_builders
]
return Group(
name=group_name,
value=match.group(group_index),
start=match.regs[group_index][0],
end=match.regs[group_index][1],
Expand Down
12 changes: 7 additions & 5 deletions python/cucumber_expressions/parameter_type.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from __future__ import annotations

import re
from typing import Callable, Optional, Pattern
from typing import Callable, Optional, Pattern, Union, List

from cucumber_expressions.errors import CucumberExpressionError

Expand Down Expand Up @@ -44,8 +44,8 @@ def compare(pt1: ParameterType, pt2: ParameterType):

def __init__(
self,
name,
regexp,
name: str | None,
regexp: Union[List[str], str, List[Pattern], Pattern],
type,
transformer: Optional[Callable] = None,
use_for_snippets: bool = True,
Expand Down Expand Up @@ -96,9 +96,11 @@ def _get_regexp_source(regexp_pattern: Pattern) -> str:
)
return regexp_pattern.pattern

def to_array(self, regexps: list[str] | str | list[Pattern] | Pattern) -> list[str]:
def to_array(
self, regexps: Union[List[str], str, List[Pattern], Pattern]
) -> List[str]:
"""Make a list of regexps if not already"""
array: list = regexps if isinstance(regexps, list) else [regexps]
array: List = regexps if isinstance(regexps, list) else [regexps]
return [
regexp if isinstance(regexp, str) else self._get_regexp_source(regexp)
for regexp in array
Expand Down
Loading
Loading