From b818731d468ba2ebb218a9e58222f54db6d6c917 Mon Sep 17 00:00:00 2001 From: Kostiantyn Goloveshko Date: Thu, 19 Dec 2024 00:06:30 +0200 Subject: [PATCH] Python: move model generation to Ruby templates --- .github/workflows/test-python.yml | 2 + .pre-commit-config.yaml | 1 + codegen/codegen.rb | 1 + codegen/generators/python.rb | 111 ++ codegen/templates/python.py.erb | 65 + python/Makefile | 34 +- python/pyproject.toml | 2 + python/src/cucumber_messages/__init__.py | 14 +- python/src/cucumber_messages/_messages.py | 1176 ++++++++++------- .../src/cucumber_messages/json_converter.py | 431 +++--- python/tests/__init__.py | 0 python/tests/data/message_samples/__init__.py | 0 .../minimal/minimal.feature.ndjson | 12 - python/tests/test_json_converter.py | 323 ++--- python/tests/test_messages.py | 166 +++ python/tests/test_model_load.py | 265 +--- 16 files changed, 1381 insertions(+), 1222 deletions(-) create mode 100644 codegen/generators/python.rb create mode 100644 codegen/templates/python.py.erb delete mode 100644 python/tests/__init__.py delete mode 100644 python/tests/data/message_samples/__init__.py delete mode 100644 python/tests/data/message_samples/minimal/minimal.feature.ndjson create mode 100644 python/tests/test_messages.py diff --git a/.github/workflows/test-python.yml b/.github/workflows/test-python.yml index 0258c750..007e8b54 100644 --- a/.github/workflows/test-python.yml +++ b/.github/workflows/test-python.yml @@ -27,6 +27,8 @@ jobs: python-version: "3.11" - os: ubuntu-latest python-version: "3.12" + - os: ubuntu-latest + python-version: "3.13" - os: ubuntu-latest python-version: "pypy3.9" - os: ubuntu-latest diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 0e230d54..87ea6402 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -2,6 +2,7 @@ # See https://pre-commit.com/hooks.html for more hooks --- files: ^python/ +exclude: ^python/src/cucumber_messages/_messages\.py repos: - repo: https://github.com/psf/black rev: 24.10.0 diff --git a/codegen/codegen.rb b/codegen/codegen.rb index c484dd63..c3afd83d 100644 --- a/codegen/codegen.rb +++ b/codegen/codegen.rb @@ -11,6 +11,7 @@ require_relative 'generators/markdown' require_relative 'generators/perl' require_relative 'generators/php' +require_relative 'generators/python' require_relative 'generators/ruby' require_relative 'generators/typescript' diff --git a/codegen/generators/python.rb b/codegen/generators/python.rb new file mode 100644 index 00000000..060219cb --- /dev/null +++ b/codegen/generators/python.rb @@ -0,0 +1,111 @@ +# frozen_string_literal: true + +module Generator + class Python < Base + def array_type_for(type_name) + inner_type = if language_translations_for_data_types.values.include?(type_name) + type_name # Keep primitive types as is + else + class_name(type_name) # CamelCase for complex types + end + "list[#{inner_type}]" + end + + def format_description(raw_description, indent_string: ' ') + return '""" """' if raw_description.nil? + + formatted = raw_description + .split("\n") + .map { |line| "#{line}" } + .join("\n#{indent_string}") + %("""\n#{indent_string}#{formatted}\n#{indent_string}""") + end + + def language_translations_for_data_types + { + 'integer' => 'int', + 'string' => 'str', + 'boolean' => 'bool', + 'array' => 'list' + } + end + + private + + def default_value(parent_type_name, property_name, property) + if property['type'] == 'string' + default_value_for_string(parent_type_name, property_name, property) + elsif property['type'] == 'integer' + '0' + elsif property['type'] == 'boolean' + 'False' + elsif property['type'] == 'array' + '[]' + elsif property['$ref'] + "#{class_name(type_for(parent_type_name, nil, property))}()" + else + 'None' + end + end + + def default_value_for_string(parent_type_name, property_name, property) + if property['enum'] + enum_type_name = type_for(parent_type_name, property_name, property) + "#{class_name(enum_type_name)}.#{enum_constant(property['enum'][0])}" + else + '""' + end + end + + def type_for(parent_type_name, property_name, property) + if property['$ref'] + property_type_from_ref(property['$ref']) + elsif property['type'] + property_type_from_type(parent_type_name, property_name, property, type: property['type']) + else + raise "Property #{property_name} did not define 'type' or '$ref'" + end + end + + def property_type_from_type(parent_type_name, property_name, property, type:) + if type == 'array' + array_type_for(type_for(parent_type_name, nil, property['items'])) + elsif property['enum'] + enum_name(parent_type_name, property_name, property['enum']) + else + language_translations_for_data_types.fetch(type) + end + end + + def enum_constant(value) + value.gsub(/[.\/+]/, '_').downcase + end + + def enum_name(parent_type_name, property_name, enum) + "#{class_name(parent_type_name)}#{capitalize(property_name)}".tap do |name| + @enum_set.add({ name: name, values: enum }) + end + end + + def property_type_from_ref(ref) + class_name(ref) + end + + def class_name(ref) + return ref if language_translations_for_data_types.values.include?(ref) + + # Remove .json extension if present + name = ref.sub(/\.json$/, '') + # Get the basename without path + name = File.basename(name) + # Convert each word to proper case, handling camelCase and snake_case + parts = name.gsub(/[._-]/, '_').split('_').map do |part| + # Split by any existing camelCase + subparts = part.scan(/[A-Z][a-z]*|[a-z]+/) + subparts.map(&:capitalize).join + end + # Join all parts to create final CamelCase name + parts.join + end + end +end \ No newline at end of file diff --git a/codegen/templates/python.py.erb b/codegen/templates/python.py.erb new file mode 100644 index 00000000..d3e55351 --- /dev/null +++ b/codegen/templates/python.py.erb @@ -0,0 +1,65 @@ +# This code was generated using the code generator from cucumber-messages. +# Manual changes will be lost if the code is regenerated. +# Generator: cucumber-messages-python + +from __future__ import annotations +from dataclasses import dataclass +from enum import Enum +from typing import Optional + +<%- @enums.each do |enum| -%> +class <%= enum[:name] %>(Enum): + <%- enum[:values].each do |value| -%> + <%= value.downcase.gsub(/[.\/+\s-]/, '_') %> = "<%= value %>" + <%- end -%> + +<%- end -%> +<%- @schemas.each do |key, definition| -%> +@dataclass +class <%= class_name(key) %>: + <%- if definition['description'] -%> + <%= format_description(definition['description']) %> + <%- end -%> + <%- if definition['properties'].any? -%> + <%- + required_fields = definition['required'] || [] + properties = definition['properties'].sort_by do |name, *| + [required_fields.include?(name) ? 0 : 1, name] + end + -%> + <%- properties.each do |property_name, property| -%> + <%- + snake_name = property_name.gsub(/([A-Z]+)([A-Z][a-z])/, '\1_\2') + .gsub(/([a-z\d])([A-Z])/, '\1_\2') + .downcase + + property_type = type_for(key, property_name, property) + is_required = required_fields.include?(property_name) + is_list = property_type.start_with?('list[') + + if is_list + list_type = property_type.match(/list\[(.*?)\]/) + inner_type = list_type[1] + if inner_type =~ /^[A-Z]/ + property_type = "list['#{class_name(inner_type)}']" + else + property_type = "list[#{inner_type}]" + end + elsif property_type =~ /^[A-Z]/ + property_type = "'#{class_name(property_type)}'" + end + -%> + <%- if property['description'] -%> + <%= format_description(property['description']) %> + <%- end -%> + <%- if is_required -%> + <%= snake_name %>: <%= property_type %> + <%- else -%> + <%= snake_name %>: Optional[<%= property_type %>] = None + <%- end -%> + <%- end -%> + <%- else -%> + pass + <%- end -%> + +<%- end -%> \ No newline at end of file diff --git a/python/Makefile b/python/Makefile index 8f54a092..3d02c544 100644 --- a/python/Makefile +++ b/python/Makefile @@ -2,36 +2,16 @@ schemas = $(shell find ../jsonschema -name "*.json") .DEFAULT_GOAL = help -MKFILE_PATH := $(abspath $(lastword $(MAKEFILE_LIST))) -HERE := $(dir $(MKFILE_PATH)) - help: ## Show this help @awk 'BEGIN {FS = ":.*##"; printf "\nUsage:\n make \n\nWhere is one of:\n"} /^[$$()% a-zA-Z_-]+:.*?##/ { printf " \033[36m%-15s\033[0m %s\n", $$1, $$2 } /^##@/ { printf "\n\033[1m%s\033[0m\n", substr($$0, 5) } ' $(MAKEFILE_LIST) -generate: require install-deps - datamodel-codegen \ - --output-model-type "dataclasses.dataclass" \ - --input $(HERE)../jsonschema/Envelope.json \ - --output $(HERE)src/cucumber_messages/_messages.py \ - --input-file-type=jsonschema \ - --class-name Envelope \ - --target-python-version=3.9 \ - --allow-extra-fields \ - --allow-population-by-field-name \ - --snake-case-field \ - --use-standard-collections \ - --use-double-quotes \ - --use-exact-imports \ - --use-field-description \ - --use-union-operator \ - --disable-timestamp +generate: require src/cucumber_messages/_messages.py -require: ## Check requirements for the code generation (python is required) - @python --version >/dev/null 2>&1 || (echo "ERROR: python is required."; exit 1) +require: ## Check requirements for the code generation (ruby is required) + @ruby --version >/dev/null 2>&1 || (echo "ERROR: ruby is required."; exit 1) -clean: ## Stub for the ancestor Makefile - rm -rf $(HERE)src/cucumber_messages/_messages.py. +clean: ## Remove automatically generated files and related artifacts + rm -f src/cucumber_messages/_messages.py -install-deps: ## Install generation dependencies - python -m ensurepip --upgrade - pip install $(HERE)[generation] +src/cucumber_messages/_messages.py: $(schemas) ../codegen/codegen.rb ../codegen/templates/python.py.erb + ruby ../codegen/codegen.rb Generator::Python python.py.erb > $@ diff --git a/python/pyproject.toml b/python/pyproject.toml index 7624692f..7ba32524 100644 --- a/python/pyproject.toml +++ b/python/pyproject.toml @@ -50,6 +50,8 @@ test = [ ] test-coverage = [ "coverage", + "GitPython", + "packaging", "pytest" ] diff --git a/python/src/cucumber_messages/__init__.py b/python/src/cucumber_messages/__init__.py index f2048da5..8d35298f 100644 --- a/python/src/cucumber_messages/__init__.py +++ b/python/src/cucumber_messages/__init__.py @@ -1,14 +1,4 @@ -from . import _messages -from . import json_converter +from . import _messages, json_converter from ._messages import * -# Renaming types because of confusing collision naming -HookType = Type -PickleStepType = Type1 -ExpressionType = Type2 - -serializer: json_converter.DataclassSerializer = json_converter.DataclassSerializer(module_scope=_messages) - -del Type -del Type1 -del Type2 +message_converter: json_converter.JsonDataclassConverter = json_converter.JsonDataclassConverter(module_scope=_messages) diff --git a/python/src/cucumber_messages/_messages.py b/python/src/cucumber_messages/_messages.py index 6becf686..7bcb7c4b 100644 --- a/python/src/cucumber_messages/_messages.py +++ b/python/src/cucumber_messages/_messages.py @@ -1,813 +1,985 @@ -# generated by datamodel-codegen: -# filename: Envelope.json +# This code was generated using the code generator from cucumber-messages. +# Manual changes will be lost if the code is regenerated. +# Generator: cucumber-messages-python from __future__ import annotations - from dataclasses import dataclass from enum import Enum +from typing import Optional -class ContentEncoding(Enum): +class AttachmentContentEncoding(Enum): identity = "IDENTITY" base64 = "BASE64" -class MediaType(Enum): +class HookType(Enum): + before_test_run = "BEFORE_TEST_RUN" + after_test_run = "AFTER_TEST_RUN" + before_test_case = "BEFORE_TEST_CASE" + after_test_case = "AFTER_TEST_CASE" + before_test_step = "BEFORE_TEST_STEP" + after_test_step = "AFTER_TEST_STEP" + + +class PickleStepType(Enum): + unknown = "Unknown" + context = "Context" + action = "Action" + outcome = "Outcome" + + +class SourceMediaType(Enum): text_x_cucumber_gherkin_plain = "text/x.cucumber.gherkin+plain" text_x_cucumber_gherkin_markdown = "text/x.cucumber.gherkin+markdown" +class StepDefinitionPatternType(Enum): + cucumber_expression = "CUCUMBER_EXPRESSION" + regular_expression = "REGULAR_EXPRESSION" + + +class StepKeywordType(Enum): + unknown = "Unknown" + context = "Context" + action = "Action" + outcome = "Outcome" + conjunction = "Conjunction" + + +class TestStepResultStatus(Enum): + unknown = "UNKNOWN" + passed = "PASSED" + skipped = "SKIPPED" + pending = "PENDING" + undefined = "UNDEFINED" + ambiguous = "AMBIGUOUS" + failed = "FAILED" + + @dataclass -class Source: - uri: str +class Attachment: """ + //// Attachments (parse errors, execution errors, screenshots, links...) + * - The [URI](https://en.wikipedia.org/wiki/Uniform_Resource_Identifier) - of the source, typically a file path relative to the root directory + An attachment represents any kind of data associated with a line in a + [Source](#io.cucumber.messages.Source) file. It can be used for: + + * Syntax errors during parse time + * Screenshots captured and attached during execution + * Logs captured and attached during execution + + It is not to be used for runtime errors raised/thrown during execution. This + is captured in `TestResult`. """ - data: str + """ - The contents of the file + * + The body of the attachment. If `contentEncoding` is `IDENTITY`, the attachment + is simply the string. If it's `BASE64`, the string should be Base64 decoded to + obtain the attachment. """ - media_type: MediaType + body: str """ - The media type of the file. Can be used to specify custom types, such as - text/x.cucumber.gherkin+plain + * + Whether to interpret `body` "as-is" (IDENTITY) or if it needs to be Base64-decoded (BASE64). + + Content encoding is *not* determined by the media type, but rather by the type + of the object being attached: + + - string: IDENTITY + - byte array: BASE64 + - stream: BASE64 """ + content_encoding: "AttachmentContentEncoding" + """ + * + The media type of the data. This can be any valid + [IANA Media Type](https://www.iana.org/assignments/media-types/media-types.xhtml) + as well as Cucumber-specific media types such as `text/x.cucumber.gherkin+plain` + and `text/x.cucumber.stacktrace+plain` + """ + media_type: str + """ + * + Suggested file name of the attachment. (Provided by the user as an argument to `attach`) + """ + file_name: Optional[str] = None + source: Optional["Source"] = None + test_case_started_id: Optional[str] = None + test_run_started_id: Optional[str] = None + test_step_id: Optional[str] = None + """ + * + A URL where the attachment can be retrieved. This field should not be set by Cucumber. + It should be set by a program that reads a message stream and does the following for + each Attachment message: + + - Writes the body (after base64 decoding if necessary) to a new file. + - Sets `body` and `contentEncoding` to `null` + - Writes out the new attachment message + + This will result in a smaller message stream, which can improve performance and + reduce bandwidth of message consumers. It also makes it easier to process and download attachments + separately from reports. + """ + url: Optional[str] = None @dataclass -class Location: - line: int - column: int | None = None - - -@dataclass -class Comment: - location: Location +class Duration: """ - The location of the comment + The structure is pretty close of the Timestamp one. For clarity, a second type + of message is used. """ - text: str + """ - The text of the comment + Non-negative fractions of a second at nanosecond resolution. Negative + second values with fractions must still have non-negative nanos values + that count forward in time. Must be from 0 to 999,999,999 + inclusive. """ + nanos: int + seconds: int @dataclass -class DocString: - location: Location - content: str - delimiter: str - media_type: str | None = None - +class Envelope: + """ + When removing a field, replace it with reserved, rather than deleting the line. + When adding a field, add it to the end and increment the number by one. + See https://developers.google.com/protocol-buffers/docs/proto#updating for details -class KeywordType(Enum): - unknown = "Unknown" - context = "Context" - action = "Action" - outcome = "Outcome" - conjunction = "Conjunction" + * + All the messages that are passed between different components/processes are Envelope + messages. + """ + + attachment: Optional["Attachment"] = None + gherkin_document: Optional["GherkinDocument"] = None + hook: Optional["Hook"] = None + meta: Optional["Meta"] = None + parameter_type: Optional["ParameterType"] = None + parse_error: Optional["ParseError"] = None + pickle: Optional["Pickle"] = None + source: Optional["Source"] = None + step_definition: Optional["StepDefinition"] = None + test_case: Optional["TestCase"] = None + test_case_finished: Optional["TestCaseFinished"] = None + test_case_started: Optional["TestCaseStarted"] = None + test_run_finished: Optional["TestRunFinished"] = None + test_run_hook_finished: Optional["TestRunHookFinished"] = None + test_run_hook_started: Optional["TestRunHookStarted"] = None + test_run_started: Optional["TestRunStarted"] = None + test_step_finished: Optional["TestStepFinished"] = None + test_step_started: Optional["TestStepStarted"] = None + undefined_parameter_type: Optional["UndefinedParameterType"] = None @dataclass -class TableCell: - location: Location +class Exception: """ - The location of the cell + A simplified representation of an exception """ - value: str + """ - The value of the cell + The type of the exception that caused this result. E.g. "Error" or "org.opentest4j.AssertionFailedError" + """ + type: str + """ + The message of exception that caused this result. E.g. expected: "a" but was: "b" """ + message: Optional[str] = None + """ + The stringified stack trace of the exception that caused this result + """ + stack_trace: Optional[str] = None @dataclass -class TableRow: - location: Location +class GherkinDocument: """ - The location of the first cell in the row + * + The [AST](https://en.wikipedia.org/wiki/Abstract_syntax_tree) of a Gherkin document. + Cucumber implementations should *not* depend on `GherkinDocument` or any of its + children for execution - use [Pickle](#io.cucumber.messages.Pickle) instead. + + The only consumers of `GherkinDocument` should only be formatters that produce + "rich" output, resembling the original Gherkin document. """ - cells: list[TableCell] + """ - Cells in the row + All the comments in the Gherkin document """ - id: str + comments: list["Comment"] + feature: Optional["Feature"] = None + """ + * + The [URI](https://en.wikipedia.org/wiki/Uniform_Resource_Identifier) + of the source, typically a file path relative to the root directory + """ + uri: Optional[str] = None @dataclass -class Tag: - location: Location +class Background: + description: str + id: str + keyword: str """ - Location of the tag + The location of the `Background` keyword """ + location: "Location" name: str + steps: list["Step"] + + +@dataclass +class Comment: """ - The name of the tag (including the leading `@`) + * + A comment in a Gherkin document """ - id: str + """ - Unique ID to be able to reference the Tag from PickleTag + The location of the comment """ - - -class Type(Enum): - before_test_run = "BEFORE_TEST_RUN" - after_test_run = "AFTER_TEST_RUN" - before_test_case = "BEFORE_TEST_CASE" - after_test_case = "AFTER_TEST_CASE" - before_test_step = "BEFORE_TEST_STEP" - after_test_step = "AFTER_TEST_STEP" + location: "Location" + """ + The text of the comment + """ + text: str @dataclass -class JavaMethod: - class_name: str - method_name: str - method_parameter_types: list[str] +class DataTable: + location: "Location" + rows: list["TableRow"] @dataclass -class JavaStackTraceElement: - class_name: str - file_name: str - method_name: str +class DocString: + content: str + delimiter: str + location: "Location" + media_type: Optional[str] = None @dataclass -class Git: - remote: str - revision: str - branch: str | None = None - tag: str | None = None +class Examples: + description: str + id: str + keyword: str + """ + The location of the `Examples` keyword + """ + location: "Location" + name: str + table_body: list["TableRow"] + tags: list["Tag"] + table_header: Optional["TableRow"] = None @dataclass -class Product: +class Feature: + """ + Zero or more children + """ + + children: list["FeatureChild"] + """ + The line(s) underneath the line with the `keyword` that are used as description + """ + description: str + """ + The text of the `Feature` keyword (in the language specified by `language`) + """ + keyword: str + """ + The [ISO 639-1](https://en.wikipedia.org/wiki/ISO_639-1) language code of the Gherkin document + """ + language: str + """ + The location of the `Feature` keyword + """ + location: "Location" + """ + The name of the feature (the text following the `keyword`) + """ name: str """ - The product name + All the tags placed above the `Feature` keyword """ - version: str | None = None + tags: list["Tag"] + + +@dataclass +class FeatureChild: """ - The product version + * + A child node of a `Feature` node """ + background: Optional["Background"] = None + rule: Optional["Rule"] = None + scenario: Optional["Scenario"] = None + @dataclass -class PickleDocString: - content: str - media_type: str | None = None +class Rule: + children: list["RuleChild"] + description: str + id: str + keyword: str + """ + The location of the `Rule` keyword + """ + location: "Location" + name: str + """ + All the tags placed above the `Rule` keyword + """ + tags: list["Tag"] -class Type1(Enum): - unknown = "Unknown" - context = "Context" - action = "Action" - outcome = "Outcome" +@dataclass +class RuleChild: + """ + * + A child node of a `Rule` node + """ + + background: Optional["Background"] = None + scenario: Optional["Scenario"] = None @dataclass -class PickleTableCell: - value: str +class Scenario: + description: str + examples: list["Examples"] + id: str + keyword: str + """ + The location of the `Scenario` keyword + """ + location: "Location" + name: str + steps: list["Step"] + tags: list["Tag"] @dataclass -class PickleTableRow: - cells: list[PickleTableCell] +class Step: + """ + A step + """ + + """ + Unique ID to be able to reference the Step from PickleStep + """ + id: str + """ + The actual keyword as it appeared in the source. + """ + keyword: str + """ + The location of the steps' `keyword` + """ + location: "Location" + text: str + data_table: Optional["DataTable"] = None + doc_string: Optional["DocString"] = None + """ + The test phase signalled by the keyword: Context definition (Given), Action performance (When), Outcome assertion (Then). Other keywords signal Continuation (And and But) from a prior keyword. Please note that all translations which a dialect maps to multiple keywords (`*` is in this category for all dialects), map to 'Unknown'. + """ + keyword_type: Optional["StepKeywordType"] = None @dataclass -class PickleTag: - name: str - ast_node_id: str +class TableCell: """ - Points to the AST node this was created from + A cell in a `TableRow` """ + """ + The location of the cell + """ + location: "Location" + """ + The value of the cell + """ + value: str -class Type2(Enum): - cucumber_expression = "CUCUMBER_EXPRESSION" - regular_expression = "REGULAR_EXPRESSION" + +@dataclass +class TableRow: + """ + A row in a table + """ + + """ + Cells in the row + """ + cells: list["TableCell"] + id: str + """ + The location of the first cell in the row + """ + location: "Location" @dataclass -class StepDefinitionPattern: - source: str - type: Type2 +class Tag: + """ + * + A tag + """ + + """ + Unique ID to be able to reference the Tag from PickleTag + """ + id: str + """ + Location of the tag + """ + location: "Location" + """ + The name of the tag (including the leading `@`) + """ + name: str @dataclass -class Group: - children: list[Group] - start: int | None = None - value: str | None = None +class Hook: + id: str + source_reference: "SourceReference" + name: Optional[str] = None + tag_expression: Optional[str] = None + type: Optional["HookType"] = None @dataclass -class StepMatchArgument: - group: Group +class Location: """ * - Represents the outermost capture group of an argument. This message closely matches the - `Group` class in the `cucumber-expressions` library. + Points to a line and a column in a text file """ - parameter_type_name: str | None = None - -@dataclass -class StepMatchArgumentsList: - step_match_arguments: list[StepMatchArgument] + line: int + column: Optional[int] = None @dataclass -class TestStep: - id: str - hook_id: str | None = None +class Meta: """ - Pointer to the `Hook` (if derived from a Hook) + * + This message contains meta information about the environment. Consumers can use + this for various purposes. """ - pickle_step_id: str | None = None + """ - Pointer to the `PickleStep` (if derived from a `PickleStep`) + 386, arm, amd64 etc """ - step_definition_ids: list[str] | None = None + cpu: "Product" """ - Pointer to all the matching `StepDefinition`s (if derived from a `PickleStep`) - Each element represents a matching step definition. A size of 0 means `UNDEFINED`, - and a size of 2+ means `AMBIGUOUS` + SpecFlow, Cucumber-JVM, Cucumber.js, Cucumber-Ruby, Behat etc. """ - step_match_arguments_lists: list[StepMatchArgumentsList] | None = None + implementation: "Product" """ - A list of list of StepMatchArgument (if derived from a `PickleStep`). + Windows, Linux, MacOS etc """ - - -@dataclass -class Timestamp: - seconds: int + os: "Product" """ - Represents seconds of UTC time since Unix epoch - 1970-01-01T00:00:00Z. Must be from 0001-01-01T00:00:00Z to - 9999-12-31T23:59:59Z inclusive. + * + The [SEMVER](https://semver.org/) version number of the protocol """ - nanos: int + protocol_version: str """ - Non-negative fractions of a second at nanosecond resolution. Negative - second values with fractions must still have non-negative nanos values - that count forward in time. Must be from 0 to 999,999,999 - inclusive. + Java, Ruby, Node.js etc """ + runtime: "Product" + ci: Optional["Ci"] = None @dataclass -class TestCaseStarted: - attempt: int +class Ci: """ - * - The first attempt should have value 0, and for each retry the value - should increase by 1. + CI environment """ - id: str + """ - * - Because a `TestCase` can be run multiple times (in case of a retry), - we use this field to group messages relating to the same attempt. + Name of the CI product, e.g. "Jenkins", "CircleCI" etc. """ - test_case_id: str - timestamp: Timestamp - worker_id: str | None = None + name: str """ - An identifier for the worker process running this test case, if test cases are being run in parallel. The identifier will be unique per worker, but no particular format is defined - it could be an index, uuid, machine name etc - and as such should be assumed that it's not human readable. + The build number. Some CI servers use non-numeric build numbers, which is why this is a string + """ + build_number: Optional[str] = None + git: Optional["Git"] = None """ + Link to the build + """ + url: Optional[str] = None @dataclass -class Exception: - type: str +class Git: """ - The type of the exception that caused this result. E.g. "Error" or "org.opentest4j.AssertionFailedError" + Information about Git, provided by the Build/CI server as environment + variables. + """ + + remote: str + revision: str + branch: Optional[str] = None + tag: Optional[str] = None + + +@dataclass +class Product: """ - message: str | None = None + Used to describe various properties of Meta """ - The message of exception that caused this result. E.g. expected: "a" but was: "b" + """ - stack_trace: str | None = None + The product name """ - The stringified stack trace of the exception that caused this result + name: str + """ + The product version """ + version: Optional[str] = None @dataclass -class TestRunStarted: - timestamp: Timestamp - id: str | None = None +class ParameterType: + id: str + """ + The name is unique, so we don't need an id. + """ + name: str + prefer_for_regular_expression_match: bool + regular_expressions: list[str] + use_for_snippets: bool + source_reference: Optional["SourceReference"] = None -class Status(Enum): - unknown = "UNKNOWN" - passed = "PASSED" - skipped = "SKIPPED" - pending = "PENDING" - undefined = "UNDEFINED" - ambiguous = "AMBIGUOUS" - failed = "FAILED" +@dataclass +class ParseError: + message: str + source: "SourceReference" @dataclass -class Duration: - seconds: int - nanos: int - """ - Non-negative fractions of a second at nanosecond resolution. Negative - second values with fractions must still have non-negative nanos values - that count forward in time. Must be from 0 to 999,999,999 - inclusive. +class Pickle: """ + //// Pickles + * + A `Pickle` represents a template for a `TestCase`. It is typically derived + from another format, such as [GherkinDocument](#io.cucumber.messages.GherkinDocument). + In the future a `Pickle` may be derived from other formats such as Markdown or + Excel files. -@dataclass -class TestStepStarted: - test_case_started_id: str - test_step_id: str - timestamp: Timestamp + By making `Pickle` the main data structure Cucumber uses for execution, the + implementation of Cucumber itself becomes simpler, as it doesn't have to deal + with the complex structure of a [GherkinDocument](#io.cucumber.messages.GherkinDocument). + Each `PickleStep` of a `Pickle` is matched with a `StepDefinition` to create a `TestCase` + """ -@dataclass -class TestRunHookStarted: + """ + * + Points to the AST node locations of the pickle. The last one represents the unique + id of the pickle. A pickle constructed from `Examples` will have the first + id originating from the `Scenario` AST node, and the second from the `TableRow` AST node. + """ + ast_node_ids: list[str] + """ + * + A unique id for the pickle + """ id: str """ - Unique identifier for this hook execution + The language of the pickle """ - test_run_started_id: str + language: str """ - Identifier for the test run that this hook execution belongs to + The name of the pickle """ - hook_id: str + name: str """ - Identifier for the hook that will be executed + One or more steps + """ + steps: list["PickleStep"] + """ + * + One or more tags. If this pickle is constructed from a Gherkin document, + It includes inherited tags from the `Feature` as well. """ - timestamp: Timestamp + tags: list["PickleTag"] + """ + The uri of the source file + """ + uri: str @dataclass -class UndefinedParameterType: - expression: str - name: str +class PickleDocString: + content: str + media_type: Optional[str] = None @dataclass -class Attachment: - body: str +class PickleStep: """ * - The body of the attachment. If `contentEncoding` is `IDENTITY`, the attachment - is simply the string. If it's `BASE64`, the string should be Base64 decoded to - obtain the attachment. - """ - content_encoding: ContentEncoding + An executable step """ - * - Whether to interpret `body` "as-is" (IDENTITY) or if it needs to be Base64-decoded (BASE64). - - Content encoding is *not* determined by the media type, but rather by the type - of the object being attached: - - string: IDENTITY - - byte array: BASE64 - - stream: BASE64 """ - media_type: str + References the IDs of the source of the step. For Gherkin, this can be + the ID of a Step, and possibly also the ID of a TableRow """ - * - The media type of the data. This can be any valid - [IANA Media Type](https://www.iana.org/assignments/media-types/media-types.xhtml) - as well as Cucumber-specific media types such as `text/x.cucumber.gherkin+plain` - and `text/x.cucumber.stacktrace+plain` + ast_node_ids: list[str] """ - file_name: str | None = None + A unique ID for the PickleStep """ - * - Suggested file name of the attachment. (Provided by the user as an argument to `attach`) + id: str + text: str + argument: Optional["PickleStepArgument"] = None """ - source: Source | None = None - test_case_started_id: str | None = None - test_step_id: str | None = None - url: str | None = None + The context in which the step was specified: context (Given), action (When) or outcome (Then). + + Note that the keywords `But` and `And` inherit their meaning from prior steps and the `*` 'keyword' doesn't have specific meaning (hence Unknown) """ - * - A URL where the attachment can be retrieved. This field should not be set by Cucumber. - It should be set by a program that reads a message stream and does the following for - each Attachment message: + type: Optional["PickleStepType"] = None - - Writes the body (after base64 decoding if necessary) to a new file. - - Sets `body` and `contentEncoding` to `null` - - Writes out the new attachment message - This will result in a smaller message stream, which can improve performance and - reduce bandwidth of message consumers. It also makes it easier to process and download attachments - separately from reports. +@dataclass +class PickleStepArgument: """ - test_run_started_id: str | None = None + An optional argument + """ + + data_table: Optional["PickleTable"] = None + doc_string: Optional["PickleDocString"] = None @dataclass -class DataTable: - location: Location - rows: list[TableRow] +class PickleTable: + rows: list["PickleTableRow"] @dataclass -class Examples: - location: Location - """ - The location of the `Examples` keyword - """ - tags: list[Tag] - keyword: str - name: str - description: str - table_body: list[TableRow] - id: str - table_header: TableRow | None = None +class PickleTableCell: + value: str @dataclass -class Step: - location: Location - """ - The location of the steps' `keyword` - """ - keyword: str - """ - The actual keyword as it appeared in the source. - """ - text: str - id: str +class PickleTableRow: + cells: list["PickleTableCell"] + + +@dataclass +class PickleTag: """ - Unique ID to be able to reference the Step from PickleStep + * + A tag """ - keyword_type: KeywordType | None = None + """ - The test phase signalled by the keyword: Context definition (Given), Action performance (When), Outcome assertion (Then). Other keywords signal Continuation (And and But) from a prior keyword. Please note that all translations which a dialect maps to multiple keywords (`*` is in this category for all dialects), map to 'Unknown'. + Points to the AST node this was created from """ - doc_string: DocString | None = None - data_table: DataTable | None = None + ast_node_id: str + name: str @dataclass -class SourceReference: - uri: str | None = None - java_method: JavaMethod | None = None - java_stack_trace_element: JavaStackTraceElement | None = None - location: Location | None = None +class Source: + """ + //// Source + * + A source file, typically a Gherkin document or Java/Ruby/JavaScript source code + """ -@dataclass -class Ci: - name: str """ - Name of the CI product, e.g. "Jenkins", "CircleCI" etc. + The contents of the file """ - url: str | None = None + data: str """ - Link to the build + The media type of the file. Can be used to specify custom types, such as + text/x.cucumber.gherkin+plain """ - build_number: str | None = None + media_type: "SourceMediaType" """ - The build number. Some CI servers use non-numeric build numbers, which is why this is a string + * + The [URI](https://en.wikipedia.org/wiki/Uniform_Resource_Identifier) + of the source, typically a file path relative to the root directory """ - git: Git | None = None + uri: str @dataclass -class ParameterType: - name: str +class SourceReference: """ - The name is unique, so we don't need an id. + * + Points to a [Source](#io.cucumber.messages.Source) identified by `uri` and a + [Location](#io.cucumber.messages.Location) within that file. """ - regular_expressions: list[str] - prefer_for_regular_expression_match: bool - use_for_snippets: bool - id: str - source_reference: SourceReference | None = None - -@dataclass -class ParseError: - source: SourceReference - message: str + java_method: Optional["JavaMethod"] = None + java_stack_trace_element: Optional["JavaStackTraceElement"] = None + location: Optional["Location"] = None + uri: Optional[str] = None @dataclass -class PickleTable: - rows: list[PickleTableRow] +class JavaMethod: + class_name: str + method_name: str + method_parameter_types: list[str] @dataclass -class StepDefinition: - id: str - pattern: StepDefinitionPattern - source_reference: SourceReference +class JavaStackTraceElement: + class_name: str + file_name: str + method_name: str @dataclass -class TestCase: +class StepDefinition: id: str - pickle_id: str - """ - The ID of the `Pickle` this `TestCase` is derived from. - """ - test_steps: list[TestStep] - test_run_started_id: str | None = None - """ - Identifier for the test run that this test case belongs to - """ + pattern: "StepDefinitionPattern" + source_reference: "SourceReference" @dataclass -class TestCaseFinished: - test_case_started_id: str - timestamp: Timestamp - will_be_retried: bool +class StepDefinitionPattern: + source: str + type: "StepDefinitionPatternType" @dataclass -class TestRunFinished: - success: bool - """ - A test run is successful if all steps are either passed or skipped, all before/after hooks passed and no other exceptions where thrown. - """ - timestamp: Timestamp - """ - Timestamp when the TestRun is finished - """ - message: str | None = None - """ - An informative message about the test run. Typically additional information about failure, but not necessarily. - """ - exception: Exception | None = None - """ - Any exception thrown during the test run, if any. Does not include exceptions thrown while executing steps. +class TestCase: """ - test_run_started_id: str | None = None + //// TestCases + * + A `TestCase` contains a sequence of `TestStep`s. + """ -@dataclass -class TestStepResult: - duration: Duration - status: Status - message: str | None = None + id: str """ - An arbitrary bit of information that explains this result. This can be a stack trace of anything else. + The ID of the `Pickle` this `TestCase` is derived from. """ - exception: Exception | None = None + pickle_id: str + test_steps: list["TestStep"] """ - Exception thrown while executing this step, if any. + Identifier for the test run that this test case belongs to """ + test_run_started_id: Optional[str] = None @dataclass -class TestRunHookFinished: - test_run_hook_started_id: str - """ - Identifier for the hook execution that has finished - """ - result: TestStepResult - timestamp: Timestamp +class Group: + children: list["Group"] + start: Optional[int] = None + value: Optional[str] = None @dataclass -class Background: - location: Location - """ - The location of the `Background` keyword +class StepMatchArgument: """ - keyword: str - name: str - description: str - steps: list[Step] - id: str + * + Represents a single argument extracted from a step match and passed to a step definition. + This is used for the following purposes: + - Construct an argument to pass to a step definition (possibly through a parameter type transform) + - Highlight the matched parameter in rich formatters such as the HTML formatter + This message closely matches the `Argument` class in the `cucumber-expressions` library. + """ -@dataclass -class Scenario: - location: Location """ - The location of the `Scenario` keyword + * + Represents the outermost capture group of an argument. This message closely matches the + `Group` class in the `cucumber-expressions` library. """ - tags: list[Tag] - keyword: str - name: str - description: str - steps: list[Step] - examples: list[Examples] - id: str + group: "Group" + parameter_type_name: Optional[str] = None @dataclass -class Hook: - id: str - source_reference: SourceReference - name: str | None = None - tag_expression: str | None = None - type: Type | None = None +class StepMatchArgumentsList: + step_match_arguments: list["StepMatchArgument"] @dataclass -class Meta: - protocol_version: str +class TestStep: """ * - The [SEMVER](https://semver.org/) version number of the protocol + A `TestStep` is derived from either a `PickleStep` + combined with a `StepDefinition`, or from a `Hook`. """ - implementation: Product + + id: str """ - SpecFlow, Cucumber-JVM, Cucumber.js, Cucumber-Ruby, Behat etc. + Pointer to the `Hook` (if derived from a Hook) """ - runtime: Product + hook_id: Optional[str] = None """ - Java, Ruby, Node.js etc + Pointer to the `PickleStep` (if derived from a `PickleStep`) """ - os: Product + pickle_step_id: Optional[str] = None """ - Windows, Linux, MacOS etc + Pointer to all the matching `StepDefinition`s (if derived from a `PickleStep`) + Each element represents a matching step definition. A size of 0 means `UNDEFINED`, + and a size of 2+ means `AMBIGUOUS` """ - cpu: Product + step_definition_ids: Optional[list[str]] = None """ - 386, arm, amd64 etc + A list of list of StepMatchArgument (if derived from a `PickleStep`). """ - ci: Ci | None = None - - -@dataclass -class PickleStepArgument: - doc_string: PickleDocString | None = None - data_table: PickleTable | None = None + step_match_arguments_lists: Optional[list["StepMatchArgumentsList"]] = None @dataclass -class TestStepFinished: +class TestCaseFinished: test_case_started_id: str - test_step_id: str - test_step_result: TestStepResult - timestamp: Timestamp - - -@dataclass -class RuleChild: - background: Background | None = None - scenario: Scenario | None = None + timestamp: "Timestamp" + will_be_retried: bool @dataclass -class PickleStep: - ast_node_ids: list[str] +class TestCaseStarted: """ - References the IDs of the source of the step. For Gherkin, this can be - the ID of a Step, and possibly also the ID of a TableRow + * + The first attempt should have value 0, and for each retry the value + should increase by 1. """ - id: str + + attempt: int """ - A unique ID for the PickleStep + * + Because a `TestCase` can be run multiple times (in case of a retry), + we use this field to group messages relating to the same attempt. """ - text: str - argument: PickleStepArgument | None = None - type: Type1 | None = None + id: str + test_case_id: str + timestamp: "Timestamp" """ - The context in which the step was specified: context (Given), action (When) or outcome (Then). - - Note that the keywords `But` and `And` inherit their meaning from prior steps and the `*` 'keyword' doesn't have specific meaning (hence Unknown) + An identifier for the worker process running this test case, if test cases are being run in parallel. The identifier will be unique per worker, but no particular format is defined - it could be an index, uuid, machine name etc - and as such should be assumed that it's not human readable. """ + worker_id: Optional[str] = None @dataclass -class Rule: - location: Location - """ - The location of the `Rule` keyword - """ - tags: list[Tag] +class TestRunFinished: """ - All the tags placed above the `Rule` keyword + A test run is successful if all steps are either passed or skipped, all before/after hooks passed and no other exceptions where thrown. """ - keyword: str - name: str - description: str - children: list[RuleChild] - id: str - -@dataclass -class Pickle: - id: str + success: bool """ - * - A unique id for the pickle + Timestamp when the TestRun is finished """ - uri: str + timestamp: "Timestamp" """ - The uri of the source file + Any exception thrown during the test run, if any. Does not include exceptions thrown while executing steps. """ - name: str + exception: Optional["Exception"] = None """ - The name of the pickle + An informative message about the test run. Typically additional information about failure, but not necessarily. """ - language: str + message: Optional[str] = None + test_run_started_id: Optional[str] = None + + +@dataclass +class TestRunHookFinished: + result: "TestStepResult" """ - The language of the pickle + Identifier for the hook execution that has finished """ - steps: list[PickleStep] + test_run_hook_started_id: str + timestamp: "Timestamp" + + +@dataclass +class TestRunHookStarted: """ - One or more steps + Identifier for the hook that will be executed """ - tags: list[PickleTag] + + hook_id: str """ - * - One or more tags. If this pickle is constructed from a Gherkin document, - It includes inherited tags from the `Feature` as well. + Unique identifier for this hook execution """ - ast_node_ids: list[str] + id: str """ - * - Points to the AST node locations of the pickle. The last one represents the unique - id of the pickle. A pickle constructed from `Examples` will have the first - id originating from the `Scenario` AST node, and the second from the `TableRow` AST node. + Identifier for the test run that this hook execution belongs to """ + test_run_started_id: str + timestamp: "Timestamp" @dataclass -class FeatureChild: - rule: Rule | None = None - background: Background | None = None - scenario: Scenario | None = None +class TestRunStarted: + timestamp: "Timestamp" + id: Optional[str] = None @dataclass -class Feature: - location: Location - """ - The location of the `Feature` keyword - """ - tags: list[Tag] - """ - All the tags placed above the `Feature` keyword - """ - language: str - """ - The [ISO 639-1](https://en.wikipedia.org/wiki/ISO_639-1) language code of the Gherkin document - """ - keyword: str - """ - The text of the `Feature` keyword (in the language specified by `language`) - """ - name: str - """ - The name of the feature (the text following the `keyword`) - """ - description: str +class TestStepFinished: + test_case_started_id: str + test_step_id: str + test_step_result: "TestStepResult" + timestamp: "Timestamp" + + +@dataclass +class TestStepResult: + duration: "Duration" + status: "TestStepResultStatus" """ - The line(s) underneath the line with the `keyword` that are used as description + Exception thrown while executing this step, if any. """ - children: list[FeatureChild] + exception: Optional["Exception"] = None """ - Zero or more children + An arbitrary bit of information that explains this result. This can be a stack trace of anything else. """ + message: Optional[str] = None @dataclass -class GherkinDocument: - comments: list[Comment] +class TestStepStarted: + test_case_started_id: str + test_step_id: str + timestamp: "Timestamp" + + +@dataclass +class Timestamp: """ - All the comments in the Gherkin document + Non-negative fractions of a second at nanosecond resolution. Negative + second values with fractions must still have non-negative nanos values + that count forward in time. Must be from 0 to 999,999,999 + inclusive. """ - uri: str | None = None + + nanos: int """ - * - The [URI](https://en.wikipedia.org/wiki/Uniform_Resource_Identifier) - of the source, typically a file path relative to the root directory + Represents seconds of UTC time since Unix epoch + 1970-01-01T00:00:00Z. Must be from 0001-01-01T00:00:00Z to + 9999-12-31T23:59:59Z inclusive. """ - feature: Feature | None = None + seconds: int @dataclass -class Envelope: - attachment: Attachment | None = None - gherkin_document: GherkinDocument | None = None - hook: Hook | None = None - meta: Meta | None = None - parameter_type: ParameterType | None = None - parse_error: ParseError | None = None - pickle: Pickle | None = None - source: Source | None = None - step_definition: StepDefinition | None = None - test_case: TestCase | None = None - test_case_finished: TestCaseFinished | None = None - test_case_started: TestCaseStarted | None = None - test_run_finished: TestRunFinished | None = None - test_run_started: TestRunStarted | None = None - test_step_finished: TestStepFinished | None = None - test_step_started: TestStepStarted | None = None - test_run_hook_started: TestRunHookStarted | None = None - test_run_hook_finished: TestRunHookFinished | None = None - undefined_parameter_type: UndefinedParameterType | None = None +class UndefinedParameterType: + expression: str + name: str diff --git a/python/src/cucumber_messages/json_converter.py b/python/src/cucumber_messages/json_converter.py index 5bd24a96..5158e729 100644 --- a/python/src/cucumber_messages/json_converter.py +++ b/python/src/cucumber_messages/json_converter.py @@ -1,291 +1,258 @@ -import json -import re -import sys +import collections.abc import types -import typing -from dataclasses import MISSING, Field, fields, is_dataclass +from dataclasses import MISSING, fields, is_dataclass from datetime import date, datetime from enum import Enum -from typing import ( - Any, - ClassVar, - Dict, - List, - Optional, - Protocol, - Sequence, - Tuple, - Type, - TypeVar, - Union, - cast, - get_args, - get_origin, -) - - -class DataclassProtocol(Protocol): - """Protocol defining the structure required for dataclass instances.""" - - __dataclass_fields__: ClassVar[Dict[str, Field[Any]]] - - -def camel_to_snake(name: str) -> str: - """Convert string from camelCase to snake_case.""" - if not name or not (name[0].isalpha() or name[0] == "_") or not all(c.isalnum() or c == "_" for c in name): - raise ValueError(f"Invalid field name: {name}") - pattern = re.compile(r"(? str: - """Convert string from snake_case to camelCase.""" - if not name or not (name[0].isalpha() or name[0] == "_") or not all(c.isalnum() or c == "_" for c in name): - raise ValueError(f"Invalid field name: {name}") - components = name.split("_") +from types import GenericAlias +from typing import Any, Optional, Union, cast, get_args, get_origin + + +def camel_to_snake(s: str) -> str: + """Convert a camelCase string to snake_case.""" + if not s: + return s + result = [s[0].lower()] + for char in s[1:]: + if char.isupper(): + result.extend(["_", char.lower()]) + else: + result.append(char) + return "".join(result) + + +def snake_to_camel(s: str) -> str: + """Convert a snake_case string to camelCase.""" + if not s: + return s + components = s.split("_") return components[0] + "".join(x.title() for x in components[1:]) class TypeResolver: - """Resolves type annotations to their actual types.""" + """Resolves type hints to their concrete types using the module scope.""" - def __init__(self, module_scope: types.ModuleType) -> None: + def __init__(self, module_scope: types.ModuleType): self.module_scope = module_scope - self._type_cache: Dict[str, Any] = {} - - def _get_sequence_type(self, type_str: str) -> Optional[Tuple[Any, str]]: - sequence_match = re.match(r"Sequence\[(.*)\]", type_str) - list_legacy_match = re.match(r"List\[(.*)\]", type_str) - list_match = re.match(r"list\[(.*)\]", type_str) - if sequence_match: - return (Sequence, sequence_match.group(1)) - if list_legacy_match: - return (List, list_legacy_match.group(1)) - if list_match: - return (list, list_match.group(1)) - return None + self._type_cache: dict[str, Any] = {} + + def _resolve_forward_ref(self, type_hint: Any) -> Any: + """Handle forward reference types.""" + if not hasattr(type_hint, "__forward_arg__"): + return None + forward_arg = type_hint.__forward_arg__ + return getattr(self.module_scope, forward_arg, Any) + + def _resolve_union(self, type_hint: Any) -> Any: + """Handle Union types including Optional.""" + args = get_args(type_hint) + resolved_args = tuple(self.resolve_type(arg) for arg in args) + return Union[resolved_args] + + def _resolve_string_optional(self, type_str: str) -> Any: + """Handle Optional types defined as strings.""" + if not type_str.startswith("Optional["): + return None + inner_type = type_str[9:-1].strip("'\"") + resolved_inner = self.resolve_type(inner_type) + return Union[resolved_inner, type(None)] + + def _resolve_generic_collection(self, type_str: str) -> Any: + """Handle generic collection types like List[T] and Dict[K, V].""" + if "[" not in type_str: + return None + + base_type, inner = type_str.split("[", 1) + inner = inner.rstrip("]").strip() + + if base_type.lower() in {"list", "sequence"}: + resolved_type = self.resolve_type(inner.strip("'\"")) + return GenericAlias(list, (resolved_type,)) + + if base_type.lower() == "dict": + key_type_str, value_type_str = map(str.strip, inner.split(",", 1)) + resolved_key = self.resolve_type(key_type_str.strip("'\"")) + resolved_value = self.resolve_type(value_type_str.strip("'\"")) + return GenericAlias(dict, (resolved_key, resolved_value)) + + return Any def resolve_type(self, type_hint: Any) -> Any: """Resolve a type hint to its concrete type.""" if isinstance(type_hint, type): return type_hint - if not isinstance(type_hint, str): - return type_hint + resolved_forward = self._resolve_forward_ref(type_hint) + if resolved_forward is not None: + return resolved_forward - if type_hint == "Any": - return Any + if get_origin(type_hint) is Union: + return self._resolve_union(type_hint) - if type_hint == "None": - return type(None) + if not isinstance(type_hint, str): + return type_hint if type_hint in self._type_cache: return self._type_cache[type_hint] - if "|" in type_hint: - types_str = [t.strip() for t in type_hint.split("|")] - resolved_types = [] - for t in types_str: - if t == "None": - resolved_types.append(type(None)) - else: - resolved = self.resolve_type(t) - if resolved is not None: - resolved_types.append(resolved) - if resolved_types: - return cast(Any, Union[tuple(resolved_types)]) - return Any - - sequence_info = self._get_sequence_type(type_hint) - if sequence_info: - base_type, inner_type = sequence_info - resolved_inner = self.resolve_type(inner_type.strip()) - if resolved_inner is not None: - result = base_type[resolved_inner] # type: ignore - self._type_cache[type_hint] = result - return result - return List[Any] - - if hasattr(self.module_scope, type_hint): - resolved = getattr(self.module_scope, type_hint) - if isinstance(resolved, type): - self._type_cache[type_hint] = resolved - return resolved + clean_hint = type_hint.strip("'\"") + + resolved_optional = self._resolve_string_optional(clean_hint) + if resolved_optional is not None: + self._type_cache[type_hint] = resolved_optional + return resolved_optional + + resolved_collection = self._resolve_generic_collection(clean_hint) + if resolved_collection is not None: + self._type_cache[type_hint] = resolved_collection + return resolved_collection - try: - resolved = eval(type_hint, self.module_scope.__dict__) # noqa: PGH001 + if hasattr(self.module_scope, clean_hint): + resolved = getattr(self.module_scope, clean_hint) if isinstance(resolved, type): self._type_cache[type_hint] = resolved return resolved - except (NameError, SyntaxError): - pass return Any +class JsonDataclassConverter: + def __init__(self, module_scope: types.ModuleType): + self.type_resolver = TypeResolver(module_scope) -D = TypeVar("D") - - -class DataclassSerializer(typing.Generic[D]): - """Handles decoding of JSON data to dataclass instances.""" + def _convert_datetime(self, value: Any, target_type: Any) -> Any: + """Convert datetime and date values.""" + if target_type in (datetime, date) and isinstance(value, str): + return target_type.fromisoformat(value) + return None - def __init__(self, module_scope: Optional[types.ModuleType] = None) -> None: - self.module_scope = module_scope or sys.modules[__name__] - self.type_resolver = TypeResolver(self.module_scope) + def _convert_enum(self, value: Any, target_type: Any) -> Any: + """Convert enum values.""" + if isinstance(target_type, type) and issubclass(target_type, Enum): + return target_type(value) + return None - def from_dict(self, data: Any, target_type: Any) -> Any: - """Decode JSON data into a target type.""" - if data is None: + def _convert_sequence(self, value: Any, target_type: Any, field_name: Optional[str]) -> Any: + """Convert sequence values.""" + origin = get_origin(target_type) + if not (origin is not None and isinstance(origin, type) and issubclass(origin, collections.abc.Sequence)): return None - if isinstance(target_type, str): - target_type = self.type_resolver.resolve_type(target_type) + if isinstance(value, str): + return value - origin = get_origin(target_type) args = get_args(target_type) + item_type = args[0] if args else Any + return [self._convert_value(item, item_type, field_name) for item in value] - if origin is Union: - non_none_types = [t for t in args if t is not type(None)] - if not non_none_types: - return data - last_error: Optional[Exception] = None - for t in non_none_types: - try: - return self.from_dict(data, t) - except (ValueError, TypeError) as e: - last_error = e - raise ValueError(f"Could not decode value as any of the expected types. Last error: {str(last_error)}") - - if origin in (list, List, Sequence) or (isinstance(origin, type) and issubclass(origin, (list, Sequence))): - item_type = args[0] if args else Any - if isinstance(data, str): - return [data] - if not isinstance(data, list): - data = [data] if data is not None else [] - return [self.from_dict(item, item_type) for item in data] - - if origin in (dict, Dict) or (isinstance(origin, type) and issubclass(origin, dict)): - if not isinstance(data, dict): - raise TypeError(f"Expected dict but got {type(data)}") - key_type = args[0] if args else str - value_type = args[1] if len(args) > 1 else Any - return {self.from_dict(k, key_type): self.from_dict(v, value_type) for k, v in data.items()} - - if is_dataclass(target_type): - return self._decode_dataclass(data, cast(Type[D], target_type)) - - if isinstance(target_type, type): - if issubclass(target_type, Enum): - try: - if isinstance(data, str) and hasattr(target_type, data): - return getattr(target_type, data) - return target_type(data) - except ValueError: - valid_values = [e.value for e in target_type] - raise TypeError( - f"Invalid value for {target_type.__name__}: {data}. Valid values are: {valid_values}" - ) - - if issubclass(target_type, (datetime, date)): - return target_type.fromisoformat(data) if data else None - - return data - - @classmethod - def to_dict(cls, obj: Any) -> Any: - """Convert a dataclass instance to a JSON-compatible dictionary.""" - if obj is None: + def _convert_dict(self, value: Any, target_type: Any, field_name: Optional[str]) -> Any: + """Convert dictionary values.""" + if get_origin(target_type) is not dict: return None - if isinstance(obj, (str, int, float, bool)): - return obj + key_type, value_type = get_args(target_type) + return { + self._convert_value(k, key_type, field_name): self._convert_value(v, value_type, field_name) + for k, v in value.items() + } + + def _convert_dataclass(self, value: Any, target_type: Any) -> Any: + """Convert nested dataclass values.""" + if is_dataclass(target_type) and isinstance(value, dict): + # Cast target_type to Type[Any] to satisfy Mypy + return self.from_dict(value, cast(type[Any], target_type)) + return None - if isinstance(obj, (datetime, date)): - return obj.isoformat() + def _convert_optional(self, value: Any, target_type: Any, field_name: Optional[str]) -> Any: + """Convert Optional/Union values.""" + if get_origin(target_type) is not Union: + return None - if isinstance(obj, Enum): - return obj.value if hasattr(obj, "value") else str(obj) + args = get_args(target_type) + if value is None and type(None) in args: + return None - if isinstance(obj, (list, tuple, Sequence)): - return [cls.to_dict(item) for item in obj] + actual_type = next((t for t in args if t is not type(None)), Any) + return self._convert_value(value, actual_type, field_name) - if isinstance(obj, dict): - return {key: cls.to_dict(value) for key, value in obj.items()} + def _convert_value(self, value: Any, target_type: Any, field_name: Optional[str] = None) -> Any: + """Convert a single value to the target type.""" + if value is None: + return None - if is_dataclass(obj): - result: Dict[str, Any] = {} - for field in fields(obj): - value = getattr(obj, field.name) - if value is not None: - try: - result[snake_to_camel(field.name)] = cls.to_dict(value) - except ValueError as e: - raise ValueError(f"Error encoding field {field.name}: {str(e)}") - return result + converted = ( + self._convert_optional(value, target_type, field_name) + or self._convert_datetime(value, target_type) + or self._convert_enum(value, target_type) + or self._convert_sequence(value, target_type, field_name) + or self._convert_dict(value, target_type, field_name) + or self._convert_dataclass(value, target_type) + or value + ) + return converted + + def from_dict(self, data: Any, target_class: type[Any]) -> Any: + """Convert a dictionary to a dataclass instance.""" + if data is None: + return None - return str(obj) + if not is_dataclass(target_class): + return self._convert_value(data, target_class, None) - def _decode_dataclass(self, data: Dict[str, Any], target_class: Type[D]) -> D: - """Decode a dictionary into a dataclass instance.""" if not isinstance(data, dict): raise TypeError(f"Expected dict but got {type(data)}") - protocol_class = cast(DataclassProtocol, target_class) - class_fields = {field.name: field for field in fields(protocol_class)} - field_values: Dict[str, Any] = {} + class_fields = {field.name: field for field in fields(target_class)} + init_kwargs = {} - field_mapping: Dict[str, str] = {} - for field_name in class_fields: - try: - camel_name = snake_to_camel(field_name) - field_mapping[camel_name] = field_name - field_mapping[field_name] = field_name - except ValueError: + for key, value in data.items(): + field_name = camel_to_snake(key) + if field_name not in class_fields: continue - for key, value in data.items(): - mapped_name = None - if key in field_mapping: - mapped_name = field_mapping[key] - else: - try: - snake_key = camel_to_snake(key) - if snake_key in class_fields: - mapped_name = snake_key - except ValueError: - continue - - if mapped_name and mapped_name in class_fields: - field = class_fields[mapped_name] - try: - field_type = self.type_resolver.resolve_type(field.type) - field_values[mapped_name] = self.from_dict(value, field_type) - except Exception as e: - raise TypeError(f"Error decoding field {key}: {str(e)}") + field = class_fields[field_name] + field_type = self.type_resolver.resolve_type(field.type) + + try: + init_kwargs[field_name] = self._convert_value(value, field_type, field_name) + except Exception as e: + raise TypeError(f"Error converting field {key}: {str(e)}") missing_required = [ name for name, field in class_fields.items() - if name not in field_values and field.default is MISSING and field.default_factory is MISSING + if name not in init_kwargs and field.default is MISSING and field.default_factory is MISSING ] if missing_required: - raise TypeError(f"Missing required arguments: {', '.join(missing_required)}") - - self._apply_default_values(field_values, class_fields) - - try: - instance = target_class(**field_values) - return instance - except Exception as e: - raise TypeError(f"Error creating instance of {target_class.__name__}: {str(e)}") - - def _apply_default_values(self, field_values: Dict[str, Any], class_fields: Dict[str, Field[Any]]) -> None: - """Apply default values to fields.""" - for field_name, field in class_fields.items(): - if field_name not in field_values: - if field.default is not MISSING: - field_values[field_name] = field.default - elif field.default_factory is not MISSING: # type: ignore - field_values[field_name] = field.default_factory() # type: ignore + raise TypeError(f"Missing required fields: {', '.join(missing_required)}") + + return target_class(**init_kwargs) + + def to_dict(self, obj: Any) -> Any: + """Convert a dataclass instance to a dictionary.""" + if obj is None: + return None + + if isinstance(obj, (str, int, float, bool)): + return obj + + if isinstance(obj, Enum): + return obj.value + + if isinstance(obj, (datetime, date)): + return obj.isoformat() + + if isinstance(obj, (list, tuple)): + return [self.to_dict(item) for item in obj] + + if isinstance(obj, dict): + return {snake_to_camel(str(key)): self.to_dict(value) for key, value in obj.items()} + + if is_dataclass(obj): + return { + snake_to_camel(field.name): self.to_dict(getattr(obj, field.name)) + for field in fields(obj) + if getattr(obj, field.name) is not None + } + + return str(obj) diff --git a/python/tests/__init__.py b/python/tests/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/python/tests/data/message_samples/__init__.py b/python/tests/data/message_samples/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/python/tests/data/message_samples/minimal/minimal.feature.ndjson b/python/tests/data/message_samples/minimal/minimal.feature.ndjson deleted file mode 100644 index da9d4f22..00000000 --- a/python/tests/data/message_samples/minimal/minimal.feature.ndjson +++ /dev/null @@ -1,12 +0,0 @@ -{"meta":{"ci":{"buildNumber":"154666429","git":{"remote":"https://github.com/cucumber-ltd/shouty.rb.git","revision":"99684bcacf01d95875834d87903dcb072306c9ad"},"name":"GitHub Actions","url":"https://github.com/cucumber-ltd/shouty.rb/actions/runs/154666429"},"cpu":{"name":"x64"},"implementation":{"name":"fake-cucumber","version":"16.3.0"},"os":{"name":"darwin","version":"22.4.0"},"protocolVersion":"22.0.0","runtime":{"name":"node.js","version":"19.7.0"}}} -{"source":{"data":"Feature: minimal\n \n Cucumber doesn't execute this markdown, but @cucumber/react renders it\n \n * This is\n * a bullet\n * list\n \n Scenario: cukes\n Given I have 42 cukes in my belly\n","mediaType":"text/x.cucumber.gherkin+plain","uri":"samples/minimal/minimal.feature"}} -{"gherkinDocument":{"comments":[],"feature":{"children":[{"scenario":{"description":"","examples":[],"id":"2","keyword":"Scenario","location":{"column":3,"line":9},"name":"cukes","steps":[{"id":"1","keyword":"Given ","keywordType":"Context","location":{"column":5,"line":10},"text":"I have 42 cukes in my belly"}],"tags":[]}}],"description":" Cucumber doesn't execute this markdown, but @cucumber/react renders it\n \n * This is\n * a bullet\n * list","keyword":"Feature","language":"en","location":{"column":1,"line":1},"name":"minimal","tags":[]},"uri":"samples/minimal/minimal.feature"}} -{"pickle":{"astNodeIds":["2"],"id":"4","language":"en","name":"cukes","steps":[{"astNodeIds":["1"],"id":"3","text":"I have 42 cukes in my belly","type":"Context"}],"tags":[],"uri":"samples/minimal/minimal.feature"}} -{"stepDefinition":{"id":"0","pattern":{"source":"I have {int} cukes in my belly","type":"CUCUMBER_EXPRESSION"},"sourceReference":{"location":{"line":4},"uri":"samples/minimal/minimal.feature.ts"}}} -{"testRunStarted":{"timestamp":{"nanos":0,"seconds":0}}} -{"testCase":{"id":"6","pickleId":"4","testSteps":[{"id":"5","pickleStepId":"3","stepDefinitionIds":["0"],"stepMatchArgumentsLists":[{"stepMatchArguments":[{"group":{"children":[],"start":7,"value":"42"},"parameterTypeName":"int"}]}]}]}} -{"testCaseStarted":{"attempt":0,"id":"7","testCaseId":"6","timestamp":{"nanos":1000000,"seconds":0}}} -{"testStepStarted":{"testCaseStartedId":"7","testStepId":"5","timestamp":{"nanos":2000000,"seconds":0}}} -{"testStepFinished":{"testCaseStartedId":"7","testStepId":"5","testStepResult":{"duration":{"nanos":1000000,"seconds":0},"status":"PASSED"},"timestamp":{"nanos":3000000,"seconds":0}}} -{"testCaseFinished":{"testCaseStartedId":"7","timestamp":{"nanos":4000000,"seconds":0},"willBeRetried":false}} -{"testRunFinished":{"success":true,"timestamp":{"nanos":5000000,"seconds":0}}} diff --git a/python/tests/test_json_converter.py b/python/tests/test_json_converter.py index 36d0c4e3..24d6af0a 100644 --- a/python/tests/test_json_converter.py +++ b/python/tests/test_json_converter.py @@ -1,10 +1,13 @@ +import sys +from collections.abc import Sequence from dataclasses import dataclass from datetime import date, datetime from enum import Enum -from typing import Dict, List, Optional, Sequence +from typing import Optional import pytest -from cucumber_messages.json_converter import DataclassSerializer + +from cucumber_messages.json_converter import JsonDataclassConverter, camel_to_snake, snake_to_camel class SimpleEnum(Enum): @@ -14,11 +17,11 @@ class SimpleEnum(Enum): @dataclass class SimpleModel: - string_field: str # required - int_field: int # required - float_field: float # required - bool_field: bool # required - enum_field: SimpleEnum # required + string_field: str + int_field: int + float_field: float + bool_field: bool + enum_field: SimpleEnum optional_field: Optional[str] = None @@ -26,83 +29,31 @@ class SimpleModel: class NestedModel: name: str simple: SimpleModel - optional_simple: Optional[SimpleModel] = None + optional_simple: Optional["SimpleModel"] = None @dataclass class CollectionsModel: sequence_field: Sequence[str] - list_field: List[SimpleModel] - dict_field: Dict[str, SimpleModel] - optional_sequence: Optional[Sequence[str]] = None + list_field: list[SimpleModel] + dict_field: dict[str, SimpleModel] + optional_sequence: Optional[Sequence["str"]] = None @dataclass class DateTimeModel: datetime_field: datetime date_field: date - optional_datetime: Optional[datetime] = None + optional_datetime: Optional["datetime"] = None @pytest.fixture def serializer(): - return DataclassSerializer() - - -def test_simple_serialization(serializer): - model = SimpleModel( - string_field="test", - int_field=42, - float_field=3.14, - bool_field=True, - enum_field=SimpleEnum.VALUE1 - ) - - dict_data = serializer.to_dict(model) - assert dict_data["stringField"] == "test" - assert dict_data["intField"] == 42 - assert dict_data["floatField"] == 3.14 - assert dict_data["boolField"] is True - assert dict_data["enumField"] == "value1" - assert "optionalField" not in dict_data - - -def test_simple_deserialization(serializer): - data = { - "stringField": "test", - "intField": 42, - "floatField": 3.14, - "boolField": True, - "enumField": "value1", - "optionalField": "optional", - } - - model = serializer.from_dict(data, SimpleModel) - assert model.string_field == "test" - assert model.int_field == 42 - assert model.float_field == 3.14 - assert model.bool_field is True - assert model.enum_field == SimpleEnum.VALUE1 - assert model.optional_field == "optional" + return JsonDataclassConverter(module_scope=sys.modules[__name__]) -def test_nested_serialization(serializer): - simple = SimpleModel( - string_field="nested", - int_field=42, - float_field=3.14, - bool_field=True, - enum_field=SimpleEnum.VALUE1 - ) - model = NestedModel(name="test", simple=simple) - - dict_data = serializer.to_dict(model) - assert dict_data["name"] == "test" - assert dict_data["simple"]["stringField"] == "nested" - assert "optionalSimple" not in dict_data - - -def test_nested_deserialization(serializer): +def test_optional_field_types(serializer): + # Testing deserialization with non-quoted and quoted types data = { "name": "test", "simple": { @@ -112,38 +63,37 @@ def test_nested_deserialization(serializer): "boolField": True, "enumField": "value1", }, + "optionalSimple": { + "stringField": "optional", + "intField": 99, + "floatField": 2.71, + "boolField": False, + "enumField": "value2", + }, } model = serializer.from_dict(data, NestedModel) + + # Verify main fields assert model.name == "test" assert model.simple.string_field == "nested" assert model.simple.int_field == 42 - assert model.optional_simple is None - -def test_collections_serialization(serializer): - simple = SimpleModel( - string_field="item", - int_field=42, - float_field=3.14, - bool_field=True, - enum_field=SimpleEnum.VALUE1 - ) + # Verify optional fields are correctly deserialized + assert model.optional_simple is not None + assert model.optional_simple.string_field == "optional" + assert model.optional_simple.int_field == 99 - model = CollectionsModel( - sequence_field=["a", "b", "c"], - list_field=[simple], - dict_field={"key": simple} - ) + # Verify types explicitly + assert isinstance(model.optional_simple, SimpleModel) + assert isinstance(model.optional_simple.string_field, str) + assert isinstance(model.optional_simple.int_field, int) + assert isinstance(model.optional_simple.float_field, float) + assert isinstance(model.optional_simple.bool_field, bool) + assert isinstance(model.optional_simple.enum_field, SimpleEnum) - dict_data = serializer.to_dict(model) - assert dict_data["sequenceField"] == ["a", "b", "c"] - assert len(dict_data["listField"]) == 1 - assert dict_data["listField"][0]["stringField"] == "item" - assert dict_data["dictField"]["key"]["stringField"] == "item" - -def test_collections_deserialization(serializer): +def test_collections_with_optional_field_types(serializer): data = { "sequenceField": ["a", "b", "c"], "listField": [ @@ -152,162 +102,107 @@ def test_collections_deserialization(serializer): "intField": 42, "floatField": 3.14, "boolField": True, - "enumField": "value1" + "enumField": "value1", } ], "dictField": { "key": { - "stringField": "item", - "intField": 42, - "floatField": 3.14, - "boolField": True, - "enumField": "value1" + "stringField": "dict_item", + "intField": 84, + "floatField": 6.28, + "boolField": False, + "enumField": "value2", } }, + "optionalSequence": ["x", "y", "z"], } model = serializer.from_dict(data, CollectionsModel) + + # Verify main fields assert list(model.sequence_field) == ["a", "b", "c"] assert len(model.list_field) == 1 assert model.list_field[0].string_field == "item" - assert model.dict_field["key"].string_field == "item" - + assert model.dict_field["key"].string_field == "dict_item" -def test_datetime_serialization(serializer): - dt = datetime(2024, 1, 1, 12, 0) - d = date(2024, 1, 1) - model = DateTimeModel(datetime_field=dt, date_field=d) + # Verify optional fields + assert model.optional_sequence is not None + assert list(model.optional_sequence) == ["x", "y", "z"] - dict_data = serializer.to_dict(model) - assert dict_data["datetimeField"] == "2024-01-01T12:00:00" - assert dict_data["dateField"] == "2024-01-01" + # Verify types explicitly + assert isinstance(model.optional_sequence, Sequence) + assert all(isinstance(item, str) for item in model.optional_sequence) + assert isinstance(model.list_field[0], SimpleModel) + assert isinstance(model.dict_field["key"], SimpleModel) -def test_datetime_deserialization(serializer): +def test_datetime_with_optional_field_types(serializer): data = { "datetimeField": "2024-01-01T12:00:00", - "dateField": "2024-01-01" + "dateField": "2024-01-01", + "optionalDatetime": "2024-01-01T13:00:00", } model = serializer.from_dict(data, DateTimeModel) + + # Verify fields assert model.datetime_field == datetime(2024, 1, 1, 12, 0) assert model.date_field == date(2024, 1, 1) + assert model.optional_datetime == datetime(2024, 1, 1, 13, 0) - -def test_optional_fields(serializer): - model = SimpleModel( - string_field="test", - int_field=42, - float_field=3.14, - bool_field=True, - enum_field=SimpleEnum.VALUE1 - ) - - dict_data = serializer.to_dict(model) - assert "optionalField" not in dict_data - - model = serializer.from_dict(dict_data, SimpleModel) - assert model.optional_field is None + # Verify types explicitly + assert isinstance(model.datetime_field, datetime) + assert isinstance(model.date_field, date) + assert isinstance(model.optional_datetime, datetime) -def test_single_item_sequence(serializer): +def test_optional_field_absent(serializer): data = { - "sequenceField": "single", - "listField": [ - { - "stringField": "item", - "intField": 42, - "floatField": 3.14, - "boolField": True, - "enumField": "value1" - } - ], - "dictField": { - "key": { - "stringField": "item", - "intField": 42, - "floatField": 3.14, - "boolField": True, - "enumField": "value1" - } + "name": "test", + "simple": { + "stringField": "nested", + "intField": 42, + "floatField": 3.14, + "boolField": True, + "enumField": "value1", }, } - model = serializer.from_dict(data, CollectionsModel) - assert list(model.sequence_field) == ["single"] - - -def test_error_handling(serializer): - # Test invalid input type - with pytest.raises(TypeError, match="Expected dict but got"): - serializer.from_dict(["not", "a", "dict"], SimpleModel) - - # Test invalid enum value - with pytest.raises(TypeError, match="Error decoding field enumField"): - serializer.from_dict( - { - "stringField": "test", - "intField": 42, - "floatField": 3.14, - "boolField": True, - "enumField": "invalid_value" - }, - SimpleModel - ) - - # Test missing required fields with only optional field - with pytest.raises(TypeError, match="Missing required arguments:"): - serializer.from_dict({"optionalField": "only optional field provided"}, SimpleModel) - - -def test_camel_snake_case_conversion(serializer): - data = { - "string_field": "test", # snake_case - "intField": 42, # camelCase - "floatField": 3.14, # camelCase - "boolField": True, # camelCase - "enumField": "value1", # camelCase - } - - model = serializer.from_dict(data, SimpleModel) - assert model.string_field == "test" - assert model.int_field == 42 - assert model.float_field == 3.14 - assert model.bool_field is True - assert model.enum_field == SimpleEnum.VALUE1 - - dict_data = serializer.to_dict(model) - assert all(not "_" in key for key in dict_data.keys()) - + model = serializer.from_dict(data, NestedModel) -def test_invalid_field_names(serializer): - # Using completely invalid field names - data = { - "string.field-with.dots": "test", - "int space @ field": 42, - "@#invalid!": "value", - "optionalField": "this is valid but optional", - } + # Verify optional fields are None when absent + assert model.optional_simple is None - # Should skip invalid fields and fail due to missing required fields - with pytest.raises( - TypeError, - match="Missing required arguments: string_field, int_field, float_field, bool_field, enum_field" - ): - serializer.from_dict(data, SimpleModel) + # Verify types explicitly + assert isinstance(model.simple, SimpleModel) + assert model.optional_simple is None - # Test with mix of valid and invalid fields - data = { - "stringField": "test", # valid - "int@field": 42, # invalid - "float field": 3.14, # invalid - "bool.field": True, # invalid - "@enum": "value1", # invalid - } - # Should still fail due to missing required fields - with pytest.raises( - TypeError, - match="Missing required arguments: int_field, float_field, bool_field, enum_field" - ): - serializer.from_dict(data, SimpleModel) \ No newline at end of file +@pytest.mark.parametrize( + "input_str, expected", + [ + ("test", "test"), + ("test_test", "testTest"), + ("Test_TeSt", "TestTest"), + ("", ""), + ("test123test4_5_6_test", "test123test456Test"), + ("test-test", "test-test"), + ], +) +def test_camelize(input_str, expected): + assert snake_to_camel(input_str) == expected + + +@pytest.mark.parametrize( + "expected, input_str", + [ + ("test", "test"), + ("test_test", "testTest"), + ("test_test", "TestTest"), + ("", ""), + ("test123test456_test", "test123test456Test"), + ("test-test", "test-test"), + ], +) +def test_snaking(input_str, expected): + assert camel_to_snake(input_str) == expected diff --git a/python/tests/test_messages.py b/python/tests/test_messages.py new file mode 100644 index 00000000..dcd089f9 --- /dev/null +++ b/python/tests/test_messages.py @@ -0,0 +1,166 @@ +import pytest + +from cucumber_messages import Attachment, AttachmentContentEncoding, Envelope, SourceMediaType +from cucumber_messages import TestStepResultStatus as TTestStepResultStatus +from cucumber_messages import message_converter as default_converter + + +@pytest.fixture +def converter(): + return default_converter + + +def test_basic_attachment_serialization(converter): + data = {"body": "some body", "contentEncoding": "IDENTITY", "mediaType": "text/plain", "fileName": "myfile.txt"} + + attachment = converter.from_dict(data, Attachment) + assert attachment.body == "some body" + assert attachment.content_encoding == AttachmentContentEncoding.identity + assert attachment.media_type == "text/plain" + assert attachment.file_name == "myfile.txt" + + # Round-trip serialization + serialized = converter.to_dict(attachment) + assert serialized == data + + +def test_envelope_with_attachment(converter): + data = { + "attachment": {"body": "some body", "contentEncoding": "BASE64", "mediaType": "text/x.cucumber.gherkin+plain"} + } + + envelope = converter.from_dict(data, Envelope) + assert envelope.attachment is not None + assert envelope.attachment.body == "some body" + assert envelope.attachment.content_encoding == AttachmentContentEncoding.base64 + assert envelope.attachment.media_type == "text/x.cucumber.gherkin+plain" + + # Round-trip serialization + serialized = converter.to_dict(envelope) + assert serialized == data + + +def test_envelope_with_source(converter): + data = { + "source": { + "data": "Feature: Sample\nScenario: Test\n", + "mediaType": "text/x.cucumber.gherkin+plain", + "uri": "features/sample.feature", + } + } + + envelope = converter.from_dict(data, Envelope) + assert envelope.source is not None + assert envelope.source.data == "Feature: Sample\nScenario: Test\n" + assert envelope.source.media_type == SourceMediaType.text_x_cucumber_gherkin_plain + assert envelope.source.uri == "features/sample.feature" + + serialized = converter.to_dict(envelope) + assert serialized == data + + +def test_test_run_finished_with_optional_fields(converter): + data = { + "testRunFinished": { + "success": True, + "timestamp": {"seconds": 1700000000, "nanos": 123456789}, + # exception and message are omitted, should be None after deserialization + } + } + + envelope = converter.from_dict(data, Envelope) + assert envelope.test_run_finished is not None + assert envelope.test_run_finished.success is True + assert envelope.test_run_finished.timestamp.seconds == 1700000000 + assert envelope.test_run_finished.timestamp.nanos == 123456789 + assert envelope.test_run_finished.exception is None + assert envelope.test_run_finished.message is None + + # Round-trip serialization + serialized = converter.to_dict(envelope) + assert serialized == data + + +def test_test_case_finished(converter): + data = { + "testCaseFinished": { + "testCaseStartedId": "some_test_case_started_id", + "timestamp": {"seconds": 1600000000, "nanos": 500}, + "willBeRetried": False, + } + } + + envelope = converter.from_dict(data, Envelope) + assert envelope.test_case_finished is not None + assert envelope.test_case_finished.test_case_started_id == "some_test_case_started_id" + assert envelope.test_case_finished.timestamp.seconds == 1600000000 + assert envelope.test_case_finished.timestamp.nanos == 500 + assert envelope.test_case_finished.will_be_retried is False + + serialized = converter.to_dict(envelope) + assert serialized == data + + +def test_exception_serialization(converter): + data = { + "testRunFinished": { + "success": False, + "timestamp": {"seconds": 1700000001, "nanos": 1000}, + "exception": { + "type": "AssertionError", + "message": "Expected 'X' but got 'Y'", + "stackTrace": "Traceback (most recent call last): ...", + }, + } + } + + envelope = converter.from_dict(data, Envelope) + assert envelope.test_run_finished is not None + exc = envelope.test_run_finished.exception + assert exc is not None + assert exc.type == "AssertionError" + assert exc.message == "Expected 'X' but got 'Y'" + assert exc.stack_trace.startswith("Traceback (most recent call last)") + + serialized = converter.to_dict(envelope) + assert serialized == data + + +def test_test_step_result(converter): + data = { + "testStepFinished": { + "testCaseStartedId": "tcs_id_123", + "testStepId": "ts_id_456", + "testStepResult": { + "duration": {"seconds": 3, "nanos": 500000000}, + "status": "PASSED", + "message": "Step executed successfully", + }, + "timestamp": {"seconds": 1700000020, "nanos": 0}, + } + } + + envelope = converter.from_dict(data, Envelope) + assert envelope.test_step_finished is not None + result = envelope.test_step_finished.test_step_result + assert result.status == TTestStepResultStatus.passed + assert result.duration.seconds == 3 + assert result.duration.nanos == 500000000 + assert result.message == "Step executed successfully" + + serialized = converter.to_dict(envelope) + assert serialized == data + + +def test_missing_optional_fields(converter): + # No optional fields set, serializer should handle defaults + data = {"attachment": {"body": "no optional fields", "contentEncoding": "IDENTITY", "mediaType": "text/plain"}} + + envelope = converter.from_dict(data, Envelope) + assert envelope.attachment is not None + assert envelope.attachment.file_name is None + assert envelope.attachment.source is None + assert envelope.attachment.url is None + + serialized = converter.to_dict(envelope) + assert serialized == data diff --git a/python/tests/test_model_load.py b/python/tests/test_model_load.py index 63b83030..a5206776 100644 --- a/python/tests/test_model_load.py +++ b/python/tests/test_model_load.py @@ -1,234 +1,53 @@ import json +import re from pathlib import Path -from pytest import mark +from git import Repo +from packaging import version +from pytest import fixture + +from cucumber_messages import Envelope, message_converter + + +@fixture +def compatibility_kit_repo(tmpdir): + repo_path = Path(tmpdir) / "compatibility-kit" + repo = Repo.clone_from( + "https://github.com/cucumber/compatibility-kit.git", + str(repo_path), + branch="main", + ) + repo_tags = list(filter(lambda tag: tag is not None, map(lambda tag: getattr(tag.tag, "tag", None), repo.tags))) + + version_pattern = re.compile(r"((.*/)?)v(\d+\.\d+\.\d+)") + last_version = sorted( + map( + version.parse, + map( + lambda match: match.groups()[-1], + filter(lambda match: match is not None, map(lambda tag: re.match(version_pattern, tag), repo_tags)), + ), + ) + )[-1] -from cucumber_messages import ( - serializer, - Ci, - Duration, - Envelope, - ExpressionType, - Feature, - FeatureChild, - GherkinDocument, - Git, - Group, - KeywordType, - Location, - MediaType, - Meta, - Pickle, - PickleStep, - PickleStepType, - Product, - Scenario, - Source, - SourceReference, - Status, - Step, - StepDefinition, - StepDefinitionPattern, - StepMatchArgument, - StepMatchArgumentsList, - TestCase, - TestCaseFinished, - TestCaseStarted, - TestRunFinished, - TestRunStarted, - TestStep, - TestStepFinished, - TestStepResult, - TestStepStarted, - Timestamp, -) + last_version_tag = next(filter(lambda tag: re.search(re.escape(str(last_version)), tag), repo_tags)) -resource_path = Path(__file__).parent.absolute() / "data" + repo.git.checkout(last_version_tag) -with (resource_path / "message_samples/minimal/minimal.feature.ndjson").open(mode="r") as ast_file: - model_data = [*map(json.loads, ast_file)] + return repo_path -oracle_models = [ - Envelope( - meta=Meta( - protocol_version="22.0.0", - implementation=Product(name="fake-cucumber", version="16.3.0"), - runtime=Product(name="node.js", version="19.7.0"), - os=Product(name="darwin", version="22.4.0"), - cpu=Product(name="x64", version=None), - ci=Ci( - name="GitHub Actions", - url="https://github.com/cucumber-ltd/shouty.rb/actions/runs/154666429", - build_number="154666429", - git=Git( - remote="https://github.com/cucumber-ltd/shouty.rb.git", - revision="99684bcacf01d95875834d87903dcb072306c9ad", - tag=None, - ), - ), - ), - undefined_parameter_type=None, - ), - Envelope( - source=Source( - uri="samples/minimal/minimal.feature", - data="Feature: minimal\n \n Cucumber doesn't execute this markdown, but @cucumber/react renders it\n \n * This is\n * a bullet\n * list\n \n Scenario: cukes\n Given I have 42 cukes in my belly\n", - media_type=MediaType.text_x_cucumber_gherkin_plain, - ), - undefined_parameter_type=None, - ), - Envelope( - gherkin_document=GherkinDocument( - uri="samples/minimal/minimal.feature", - feature=Feature( - location=Location(line=1, column=1), - tags=[], - language="en", - keyword="Feature", - name="minimal", - description=" Cucumber doesn't execute this markdown, but @cucumber/react renders it\n \n * This is\n * a bullet\n * list", - children=[ - FeatureChild( - scenario=Scenario( - location=Location(line=9, column=3), - tags=[], - keyword="Scenario", - name="cukes", - description="", - steps=[ - Step( - location=Location(line=10, column=5), - keyword="Given ", - keyword_type=KeywordType.context, - text="I have 42 cukes in my belly", - id="1", - ) - ], - examples=[], - id="2", - ) - ) - ], - ), - comments=[], - ), - undefined_parameter_type=None, - ), - Envelope( - pickle=Pickle( - id="4", - uri="samples/minimal/minimal.feature", - name="cukes", - language="en", - steps=[ - PickleStep(ast_node_ids=["1"], id="3", type=PickleStepType.context, text="I have 42 cukes in my belly") - ], - tags=[], - ast_node_ids=["2"], - ), - undefined_parameter_type=None, - ), - Envelope( - step_definition=StepDefinition( - id="0", - pattern=StepDefinitionPattern( - source="I have {int} cukes in my belly", - type=ExpressionType.cucumber_expression - ), - source_reference=SourceReference( - uri="samples/minimal/minimal.feature.ts", - location=Location(line=4, column=None) - ), - ), - undefined_parameter_type=None, - ), - Envelope( - test_run_started=TestRunStarted( - timestamp=Timestamp(seconds=0, nanos=0) - ), - undefined_parameter_type=None - ), - Envelope( - test_case=TestCase( - id="6", - pickle_id="4", - test_steps=[ - TestStep( - id="5", - pickle_step_id="3", - step_definition_ids=["0"], - step_match_arguments_lists=[ - StepMatchArgumentsList( - step_match_arguments=[ - StepMatchArgument( - group=Group(children=[], start=7, value="42"), - parameter_type_name="int" - ) - ] - ) - ], - ) - ], - ), - undefined_parameter_type=None, - ), - Envelope( - test_case_started=TestCaseStarted( - attempt=0, - id="7", - test_case_id="6", - timestamp=Timestamp(seconds=0, nanos=1000000) - ), - undefined_parameter_type=None, - ), - Envelope( - test_step_started=TestStepStarted( - test_case_started_id="7", - test_step_id="5", - timestamp=Timestamp(seconds=0, nanos=2000000) - ), - undefined_parameter_type=None, - ), - Envelope( - test_step_finished=TestStepFinished( - test_case_started_id="7", - test_step_id="5", - test_step_result=TestStepResult( - duration=Duration(seconds=0, nanos=1000000), - status=Status.passed, - exception=None - ), - timestamp=Timestamp(seconds=0, nanos=3000000), - ), - undefined_parameter_type=None, - ), - Envelope( - test_case_finished=TestCaseFinished( - test_case_started_id="7", - timestamp=Timestamp(seconds=0, nanos=4000000), - will_be_retried=False - ), - undefined_parameter_type=None, - ), - Envelope( - test_run_finished=TestRunFinished( - success=True, - timestamp=Timestamp(seconds=0, nanos=5000000), - exception=None - ), - undefined_parameter_type=None, - ), -] +# Analog of "ruby/spec/cucumber/messages/acceptance_spec.rb" test +def test_simple_load_model(compatibility_kit_repo): + for ast_path in (compatibility_kit_repo / "devkit" / "samples").rglob("*.ndjson"): + print(f"Checking ${ast_path}") + with ast_path.open(mode="r") as ast_file: + for ast_line in ast_file: + model_datum = json.loads(ast_line) + model = message_converter.from_dict(model_datum, Envelope) -@mark.parametrize("model_datum, oracle_model", zip(model_data, oracle_models)) -def test_simple_load_model(model_datum, oracle_model): - # Test messages models load - model = serializer.from_dict(model_datum, Envelope) + assert isinstance(model, Envelope) - assert isinstance(model, Envelope) - # Models support deep-nested comparison - assert model == oracle_model + dumped_ast_datum = message_converter.to_dict(model) - # Serialized model must be the same to original non-restored model - assert model_datum == serializer.to_dict(model) \ No newline at end of file + assert model_datum == dumped_ast_datum