diff --git a/.github/workflows/lint_and_run_tests.yml b/.github/workflows/lint_and_run_tests.yml index dd91304..7c0b420 100644 --- a/.github/workflows/lint_and_run_tests.yml +++ b/.github/workflows/lint_and_run_tests.yml @@ -48,6 +48,9 @@ jobs: - name: Run mypy run: uv run python -m mypy src/ tests/ + - name: Run import linter + run: uv run lint-imports + test: runs-on: ${{ matrix.os }} if: | diff --git a/.importlinter b/.importlinter new file mode 100644 index 0000000..77a0db2 --- /dev/null +++ b/.importlinter @@ -0,0 +1,32 @@ +# SPDX-FileCopyrightText: TNG Technology Consulting GmbH +# +# SPDX-License-Identifier: Apache-2.0 + +[importlinter] +root_package = opossum_lib +# Optional: +include_external_packages = True +exclude_type_checking_imports = True + +[importlinter:contract:file-formats-independent] +name = The different file formats should be independent +type = independence +modules = + opossum_lib.input_formats.* + + +[importlinter:contract:core-should-not-depend -on-input-files] +name = Core should not depend on input files +type = forbidden +source_modules = + opossum_lib.core +forbidden_modules = + opossum_lib.input_formats + +[importlinter:contract:shared_objects_not_depend_on_input_formats] +name = Shared package not depend on input formats +type = forbidden +source_modules = + opossum_lib.shared +forbidden_modules = + opossum_lib.input_formats diff --git a/README.md b/README.md index 67050fd..e992241 100644 --- a/README.md +++ b/README.md @@ -64,8 +64,9 @@ Options: that you would like to include in the final output. Option can be repeated. -o, --outfile TEXT The file path to write the generated opossum document - to. If appropriate, the extension ".opossum" will be - appended. [default: output.opossum] + to. If appropriate, the extension ".opossum" is + appended. If the output file already exists, it is + overwritten. [default: output.opossum] --help Show this message and exit. diff --git a/pyproject.toml b/pyproject.toml index 90ee51c..fdb6fb7 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -12,7 +12,6 @@ dependencies = [ "click>=8.1.8,<9", "pydantic>=2.10.6", "pyinstaller>=6.11.1", - "faker>=35.0.0", ] [project.urls] @@ -22,11 +21,12 @@ Repository = "https://github.com/opossum-tool/opossum-file" opossum-file = "opossum_lib.cli:opossum_file" [dependency-groups] -test = ["pytest>=8.3.4,<9"] +test = ["pytest>=8.3.4,<9", "faker>=35.0.0",] dev = [ "mypy>=1.14.1,<2", "pre-commit>=4.1.0,<5", "ruff>=0.9.3", + "import-linter>=2.1", ] [tool.uv] diff --git a/src/opossum_lib/cli.py b/src/opossum_lib/cli.py index 7ff5c36..7cf60bd 100644 --- a/src/opossum_lib/cli.py +++ b/src/opossum_lib/cli.py @@ -3,19 +3,21 @@ # SPDX-FileCopyrightText: TNG Technology Consulting GmbH # # SPDX-License-Identifier: Apache-2.0 - - import logging import sys from pathlib import Path import click -from opossum_lib.opossum.file_generation import OpossumFileWriter -from opossum_lib.opossum.opossum_file_content import OpossumFileContent -from opossum_lib.opossum.read_opossum_file import read_opossum_file -from opossum_lib.scancode.convert_scancode_to_opossum import ( - convert_scancode_file_to_opossum, +from opossum_lib.core.services.generate_impl import ( + generate_impl, +) +from opossum_lib.core.services.input_reader import InputReader +from opossum_lib.input_formats.opossum.services.opossum_file_reader import ( + OpossumFileReader, +) +from opossum_lib.input_formats.scancode.services.scancode_file_reader import ( + ScancodeFileReader, ) @@ -47,12 +49,13 @@ def opossum_file() -> None: default="output.opossum", show_default=True, help="The file path to write the generated opossum document to. " - 'If appropriate, the extension ".opossum" will be appended.', + 'If appropriate, the extension ".opossum" is appended. ' + "If the output file already exists, it is overwritten.", ) def generate( - scancode_json_files: list[str], - opossum_files: list[str], - outfile: str, + scancode_json_files: list[Path], + opossum_files: list[Path], + outfile: Path, ) -> None: """ Generate an Opossum file from various other file formats. @@ -62,23 +65,7 @@ def generate( - ScanCode - Opossum """ - validate_input_and_exit_on_error(scancode_json_files, opossum_files) - opossum_file_content = convert_after_valid_input(scancode_json_files, opossum_files) - - if not outfile.endswith(".opossum"): - outfile += ".opossum" - if Path.is_file(Path(outfile)): - logging.warning(f"{outfile} already exists and will be overwritten.") - - OpossumFileWriter.write_opossum_information_to_file( - opossum_file_content, Path(outfile) - ) - - -def validate_input_and_exit_on_error( - scancode_json_files: list[str], opossum_files: list[str] -) -> None: total_number_of_files = len(scancode_json_files) + len(opossum_files) if total_number_of_files == 0: logging.warning("No input provided. Exiting.") @@ -86,17 +73,11 @@ def validate_input_and_exit_on_error( if total_number_of_files > 1: logging.error("Merging of multiple files not yet supported!") sys.exit(1) + input_readers: list[InputReader] = [] + input_readers += [ScancodeFileReader(path=path) for path in scancode_json_files] + input_readers += [OpossumFileReader(path=path) for path in opossum_files] - -def convert_after_valid_input( - scancode_json_files: list[str], opossum_files: list[str] -) -> OpossumFileContent: - if len(scancode_json_files) == 1: - scancode_json_input_file = scancode_json_files[0] - return convert_scancode_file_to_opossum(scancode_json_input_file) - else: - opossum_input_file = opossum_files[0] - return read_opossum_file(opossum_input_file) + generate_impl(input_readers=input_readers, output_file=Path(outfile)) if __name__ == "__main__": diff --git a/src/opossum_lib/opossum/__init__.py b/src/opossum_lib/core/__init__.py similarity index 100% rename from src/opossum_lib/opossum/__init__.py rename to src/opossum_lib/core/__init__.py diff --git a/src/opossum_lib/scancode/__init__.py b/src/opossum_lib/core/entities/__init__.py similarity index 100% rename from src/opossum_lib/scancode/__init__.py rename to src/opossum_lib/core/entities/__init__.py diff --git a/src/opossum_lib/core/entities/base_url_for_sources.py b/src/opossum_lib/core/entities/base_url_for_sources.py new file mode 100644 index 0000000..0e952bb --- /dev/null +++ b/src/opossum_lib/core/entities/base_url_for_sources.py @@ -0,0 +1,16 @@ +# SPDX-FileCopyrightText: TNG Technology Consulting GmbH +# # +# SPDX-License-Identifier: Apache-2.0 + +from __future__ import annotations + +from pydantic import BaseModel, ConfigDict + +from opossum_lib.shared.entities.opossum_input_file_model import BaseUrlsForSourcesModel + + +class BaseUrlsForSources(BaseModel): + model_config = ConfigDict(frozen=True, extra="allow") + + def to_opossum_model(self) -> BaseUrlsForSourcesModel: + return BaseUrlsForSourcesModel(**self.model_dump()) diff --git a/src/opossum_lib/core/entities/external_attribution_source.py b/src/opossum_lib/core/entities/external_attribution_source.py new file mode 100644 index 0000000..3fec815 --- /dev/null +++ b/src/opossum_lib/core/entities/external_attribution_source.py @@ -0,0 +1,25 @@ +# SPDX-FileCopyrightText: TNG Technology Consulting GmbH +# # +# SPDX-License-Identifier: Apache-2.0 + +from __future__ import annotations + +from pydantic import BaseModel, ConfigDict + +from opossum_lib.shared.entities.opossum_input_file_model import ( + ExternalAttributionSourceModel, +) + + +class ExternalAttributionSource(BaseModel): + model_config = ConfigDict(frozen=True, extra="forbid") + name: str + priority: int + is_relevant_for_preferred: bool | None = None + + def to_opossum_model(self) -> ExternalAttributionSourceModel: + return ExternalAttributionSourceModel( + name=self.name, + priority=self.priority, + is_relevant_for_preferred=self.is_relevant_for_preferred, + ) diff --git a/src/opossum_lib/core/entities/frequent_license.py b/src/opossum_lib/core/entities/frequent_license.py new file mode 100644 index 0000000..af3feea --- /dev/null +++ b/src/opossum_lib/core/entities/frequent_license.py @@ -0,0 +1,23 @@ +# SPDX-FileCopyrightText: TNG Technology Consulting GmbH +# # +# SPDX-License-Identifier: Apache-2.0 + +from __future__ import annotations + +from pydantic import BaseModel, ConfigDict + +from opossum_lib.shared.entities.opossum_input_file_model import FrequentLicenseModel + + +class FrequentLicense(BaseModel): + model_config = ConfigDict(frozen=True, extra="forbid") + full_name: str + short_name: str + default_text: str + + def to_opossum_model(self) -> FrequentLicenseModel: + return FrequentLicenseModel( + full_name=self.full_name, + short_name=self.short_name, + default_text=self.default_text, + ) diff --git a/src/opossum_lib/core/entities/metadata.py b/src/opossum_lib/core/entities/metadata.py new file mode 100644 index 0000000..463b59a --- /dev/null +++ b/src/opossum_lib/core/entities/metadata.py @@ -0,0 +1,22 @@ +# SPDX-FileCopyrightText: TNG Technology Consulting GmbH +# # +# SPDX-License-Identifier: Apache-2.0 + +from __future__ import annotations + +from pydantic import BaseModel, ConfigDict + +from opossum_lib.shared.entities.opossum_input_file_model import MetadataModel + + +class Metadata(BaseModel): + model_config = ConfigDict(frozen=True, extra="allow") + project_id: str + file_creation_date: str + project_title: str + project_version: str | None = None + expected_release_date: str | None = None + build_date: str | None = None + + def to_opossum_model(self) -> MetadataModel: + return MetadataModel(**self.model_dump()) diff --git a/src/opossum_lib/core/entities/opossum.py b/src/opossum_lib/core/entities/opossum.py new file mode 100644 index 0000000..e5a06db --- /dev/null +++ b/src/opossum_lib/core/entities/opossum.py @@ -0,0 +1,26 @@ +# SPDX-FileCopyrightText: TNG Technology Consulting GmbH +# +# SPDX-License-Identifier: Apache-2.0 + +from __future__ import annotations + +from pydantic import BaseModel, ConfigDict + +from opossum_lib.core.entities.scan_results import ScanResults +from opossum_lib.shared.entities.opossum_file_model import OpossumFileModel +from opossum_lib.shared.entities.opossum_output_file_model import OpossumOutputFileModel + +type OpossumPackageIdentifier = str +type ResourcePath = str + + +class Opossum(BaseModel): + model_config = ConfigDict(frozen=True, extra="forbid") + scan_results: ScanResults + review_results: OpossumOutputFileModel | None = None + + def to_opossum_model(self) -> OpossumFileModel: + return OpossumFileModel( + input_file=self.scan_results.to_opossum_model(), + output_file=self.review_results, + ) diff --git a/src/opossum_lib/core/entities/opossum_package.py b/src/opossum_lib/core/entities/opossum_package.py new file mode 100644 index 0000000..cffe9f6 --- /dev/null +++ b/src/opossum_lib/core/entities/opossum_package.py @@ -0,0 +1,60 @@ +# SPDX-FileCopyrightText: TNG Technology Consulting GmbH +# # +# SPDX-License-Identifier: Apache-2.0 + +from __future__ import annotations + +from typing import Literal + +from pydantic import BaseModel, ConfigDict + +from opossum_lib.core.entities.source_info import SourceInfo +from opossum_lib.shared.entities.opossum_input_file_model import OpossumPackageModel + + +class OpossumPackage(BaseModel): + model_config = ConfigDict(frozen=True, extra="forbid") + source: SourceInfo + attribution_confidence: int | None = None + comment: str | None = None + package_name: str | None = None + package_version: str | None = None + package_namespace: str | None = None + package_type: str | None = None + package_purl_appendix: str | None = None + copyright: str | None = None + license_name: str | None = None + license_text: str | None = None + url: str | None = None + first_party: bool | None = None + exclude_from_notice: bool | None = None + pre_selected: bool | None = None + follow_up: Literal["FOLLOW_UP"] | None = None + origin_id: str | None = None + origin_ids: tuple[str, ...] | None = None + criticality: Literal["high"] | Literal["medium"] | None = None + was_preferred: bool | None = None + + def to_opossum_model(self) -> OpossumPackageModel: + return OpossumPackageModel( + source=self.source.to_opossum_model(), + attribution_confidence=self.attribution_confidence, + comment=self.comment, + package_name=self.package_name, + package_version=self.package_version, + package_namespace=self.package_namespace, + package_type=self.package_type, + package_p_u_r_l_appendix=self.package_purl_appendix, + copyright=self.copyright, + license_name=self.license_name, + license_text=self.license_text, + url=self.url, + first_party=self.first_party, + exclude_from_notice=self.exclude_from_notice, + pre_selected=self.pre_selected, + follow_up=self.follow_up, + origin_id=self.origin_id, + origin_ids=self.origin_ids, + criticality=self.criticality, + was_preferred=self.was_preferred, + ) diff --git a/src/opossum_lib/core/entities/resource.py b/src/opossum_lib/core/entities/resource.py new file mode 100644 index 0000000..ef533c1 --- /dev/null +++ b/src/opossum_lib/core/entities/resource.py @@ -0,0 +1,84 @@ +# SPDX-FileCopyrightText: TNG Technology Consulting GmbH +# # +# SPDX-License-Identifier: Apache-2.0 + + +from __future__ import annotations + +from collections.abc import Iterable +from enum import Enum, auto +from pathlib import PurePath + +from pydantic import BaseModel, ConfigDict + +from opossum_lib.core.entities.opossum_package import OpossumPackage +from opossum_lib.shared.entities.opossum_input_file_model import ResourceInFileModel + + +def _convert_path_to_str(path: PurePath) -> str: + return str(path).replace("\\", "/") + + +class ResourceType(Enum): + FILE = auto() + FOLDER = auto() + + +class Resource(BaseModel): + model_config = ConfigDict(frozen=False, extra="forbid") + path: PurePath + type: ResourceType | None = None + attributions: list[OpossumPackage] = [] + children: dict[str, Resource] = {} + + def to_opossum_model(self) -> ResourceInFileModel: + if self.children or self.type == ResourceType.FOLDER: + return { + _convert_path_to_str( + child.path.relative_to(self.path) + ): child.to_opossum_model() + for child in self.children.values() + } + else: + return 1 + + def add_resource(self, resource: Resource) -> None: + if not resource.path.is_relative_to(self.path): + raise RuntimeError( + f"The path {resource.path} is not a child of this node at {self.path}." + ) + remaining_path_parts = resource.path.relative_to(self.path).parts + if remaining_path_parts: + self._add_resource(resource, remaining_path_parts) + else: + self._update(resource) + + def _add_resource( + self, resource: Resource, remaining_path_parts: Iterable[str] + ) -> None: + if not remaining_path_parts: + self._update(resource) + return + next, *rest_parts = remaining_path_parts + if next not in self.children: + self.children[next] = Resource(path=self.path / next) + self.children[next]._add_resource(resource, rest_parts) + + def _update(self, other: Resource) -> None: + if self.path != other.path: + raise RuntimeError( + "Trying to merge nodes with different paths: " + + f"{self.path} vs. {other.path}" + ) + if self.type and other.type and self.type != other.type: + raise RuntimeError( + "Trying to merge incompatible node types. " + + f"Current node is {self.type}. Other is {other.type}" + ) + self.type = self.type or other.type + self.attributions.extend(other.attributions) + for key, child in other.children.items(): + if key in self.children: + self.children[key]._update(child) + else: + self.children[key] = child diff --git a/src/opossum_lib/core/entities/scan_results.py b/src/opossum_lib/core/entities/scan_results.py new file mode 100644 index 0000000..6c7bfff --- /dev/null +++ b/src/opossum_lib/core/entities/scan_results.py @@ -0,0 +1,146 @@ +# SPDX-FileCopyrightText: TNG Technology Consulting GmbH +# # +# SPDX-License-Identifier: Apache-2.0 + +from __future__ import annotations + +import uuid +from collections import defaultdict +from copy import deepcopy +from dataclasses import field + +from pydantic import BaseModel, ConfigDict + +from opossum_lib.core.entities.base_url_for_sources import BaseUrlsForSources +from opossum_lib.core.entities.external_attribution_source import ( + ExternalAttributionSource, +) +from opossum_lib.core.entities.frequent_license import FrequentLicense +from opossum_lib.core.entities.metadata import Metadata +from opossum_lib.core.entities.opossum_package import OpossumPackage +from opossum_lib.core.entities.resource import Resource, _convert_path_to_str +from opossum_lib.shared.entities.opossum_input_file_model import ( + OpossumInputFileModel, + OpossumPackageIdentifierModel, + OpossumPackageModel, + ResourcePathModel, +) + + +def _default_attribution_id_mapper() -> dict[OpossumPackage, str]: + return defaultdict(lambda: str(uuid.uuid4())) + + +class ScanResults(BaseModel): + model_config = ConfigDict(frozen=True, extra="forbid") + metadata: Metadata + resources: list[Resource] + attribution_breakpoints: list[str] = [] + external_attribution_sources: dict[str, ExternalAttributionSource] = {} + frequent_licenses: list[FrequentLicense] | None = None + files_with_children: list[str] | None = None + base_urls_for_sources: BaseUrlsForSources | None = None + attribution_to_id: dict[OpossumPackage, str] = field( + default_factory=_default_attribution_id_mapper + ) + unassigned_attributions: list[OpossumPackage] = [] + + def to_opossum_model(self) -> OpossumInputFileModel: + external_attributions, resources_to_attributions = ( + self.create_attribution_mapping(self.resources) + ) + external_attributions.update(self._get_unassigned_attributions()) + + frequent_licenses = None + if self.frequent_licenses: + frequent_licenses = [ + license.to_opossum_model() for license in self.frequent_licenses + ] + base_urls_for_sources = ( + self.base_urls_for_sources and self.base_urls_for_sources.to_opossum_model() + ) + + external_attribution_sources = { + key: val.to_opossum_model() + for (key, val) in self.external_attribution_sources.items() + } + + return OpossumInputFileModel( + metadata=self.metadata.to_opossum_model(), + resources={ + str(resource.path): resource.to_opossum_model() + for resource in self.resources + }, + external_attributions=external_attributions, + resources_to_attributions=resources_to_attributions, + attribution_breakpoints=deepcopy(self.attribution_breakpoints), + external_attribution_sources=external_attribution_sources, + frequent_licenses=frequent_licenses, + files_with_children=deepcopy(self.files_with_children), + base_urls_for_sources=base_urls_for_sources, + ) + + def _get_unassigned_attributions( + self, + ) -> dict[OpossumPackageIdentifierModel, OpossumPackageModel]: + if self.unassigned_attributions: + result = {} + for unassigned_attribution in self.unassigned_attributions: + if unassigned_attribution in self.attribution_to_id: + package_identifier = self.attribution_to_id[unassigned_attribution] + result[package_identifier] = ( + unassigned_attribution.to_opossum_model() + ) + else: + package_identifier = str(uuid.uuid4()) + self.attribution_to_id[unassigned_attribution] = package_identifier + result[package_identifier] = ( + unassigned_attribution.to_opossum_model() + ) + return result + else: + return {} + + def create_attribution_mapping( + self, + root_nodes: list[Resource], + ) -> tuple[ + dict[OpossumPackageIdentifierModel, OpossumPackageModel], + dict[ResourcePathModel, list[OpossumPackageIdentifierModel]], + ]: + external_attributions: dict[ + OpossumPackageIdentifierModel, OpossumPackageModel + ] = {} + resources_to_attributions: dict[ + ResourcePathModel, list[OpossumPackageIdentifierModel] + ] = {} + + def process_node(node: Resource) -> None: + path = _convert_path_to_str(node.path) + if not path.startswith("/"): + # the / is required by OpossumUI + path = "/" + path + + node_attributions_by_id = { + self.get_attribution_key(a): a.to_opossum_model() + for a in node.attributions + } + external_attributions.update(node_attributions_by_id) + + if len(node_attributions_by_id) > 0: + resources_to_attributions[path] = list(node_attributions_by_id.keys()) + + for child in node.children.values(): + process_node(child) + + for root in root_nodes: + process_node(root) + + return external_attributions, resources_to_attributions + + def get_attribution_key( + self, attribution: OpossumPackage + ) -> OpossumPackageIdentifierModel: + id = self.attribution_to_id[attribution] + self.attribution_to_id[attribution] = id + return id diff --git a/src/opossum_lib/core/entities/source_info.py b/src/opossum_lib/core/entities/source_info.py new file mode 100644 index 0000000..f83aa6a --- /dev/null +++ b/src/opossum_lib/core/entities/source_info.py @@ -0,0 +1,23 @@ +# SPDX-FileCopyrightText: TNG Technology Consulting GmbH +# # +# SPDX-License-Identifier: Apache-2.0 + +from __future__ import annotations + +from pydantic import BaseModel, ConfigDict + +from opossum_lib.shared.entities.opossum_input_file_model import SourceInfoModel + + +class SourceInfo(BaseModel): + model_config = ConfigDict(frozen=True, extra="forbid") + name: str + document_confidence: int | float | None = 0 + additional_name: str | None = None + + def to_opossum_model(self) -> SourceInfoModel: + return SourceInfoModel( + name=self.name, + document_confidence=self.document_confidence, + additional_name=self.additional_name, + ) diff --git a/tests/opossum_model_generators/__init__.py b/src/opossum_lib/core/services/__init__.py similarity index 100% rename from tests/opossum_model_generators/__init__.py rename to src/opossum_lib/core/services/__init__.py diff --git a/src/opossum_lib/core/services/generate_impl.py b/src/opossum_lib/core/services/generate_impl.py new file mode 100644 index 0000000..977927e --- /dev/null +++ b/src/opossum_lib/core/services/generate_impl.py @@ -0,0 +1,16 @@ +# SPDX-FileCopyrightText: TNG Technology Consulting GmbH +# +# SPDX-License-Identifier: Apache-2.0 +from pathlib import Path + +from opossum_lib.core.services.input_reader import InputReader +from opossum_lib.core.services.write_opossum_file import write_opossum_file + + +def generate_impl(input_readers: list[InputReader], output_file: Path) -> None: + # currently this converts only one file (validated in the arguments) + # for the future a merge step is planned after reading the files + opossum = input_readers[0].read() + + opossum_file_content = opossum.to_opossum_model() + write_opossum_file(opossum_file_content, output_file) diff --git a/src/opossum_lib/core/services/input_reader.py b/src/opossum_lib/core/services/input_reader.py new file mode 100644 index 0000000..6ec0f46 --- /dev/null +++ b/src/opossum_lib/core/services/input_reader.py @@ -0,0 +1,12 @@ +# SPDX-FileCopyrightText: TNG Technology Consulting GmbH +# +# SPDX-License-Identifier: Apache-2.0 +from abc import abstractmethod +from asyncio import Protocol + +from opossum_lib.core.entities.opossum import Opossum + + +class InputReader(Protocol): + @abstractmethod + def read(self) -> Opossum: ... diff --git a/src/opossum_lib/core/services/write_opossum_file.py b/src/opossum_lib/core/services/write_opossum_file.py new file mode 100644 index 0000000..534a952 --- /dev/null +++ b/src/opossum_lib/core/services/write_opossum_file.py @@ -0,0 +1,49 @@ +# SPDX-FileCopyrightText: TNG Technology Consulting GmbH +# +# SPDX-License-Identifier: Apache-2.0 +from pathlib import Path +from zipfile import ZIP_DEFLATED, ZipFile + +from pydantic import BaseModel + +from opossum_lib.shared.constants import ( + COMPRESSION_LEVEL, + INPUT_JSON_NAME, + OUTPUT_JSON_NAME, +) +from opossum_lib.shared.entities.opossum_file_model import OpossumFileModel + + +def write_opossum_file(opossum_file_model: OpossumFileModel, file_path: Path) -> None: + file_path = _ensure_outfile_suffix(file_path) + with ZipFile( + file_path, "w", compression=ZIP_DEFLATED, compresslevel=COMPRESSION_LEVEL + ) as zip_file: + _write_input_json(opossum_file_model, zip_file) + _write_output_json_if_existing(opossum_file_model, zip_file) + + +def _write_output_json_if_existing( + opossum_file_model: OpossumFileModel, zip_file: ZipFile +) -> None: + if opossum_file_model.output_file: + _write_json_to_zip(zip_file, OUTPUT_JSON_NAME, opossum_file_model.output_file) + + +def _write_input_json(opossum_file_model: OpossumFileModel, zip_file: ZipFile) -> None: + _write_json_to_zip(zip_file, INPUT_JSON_NAME, opossum_file_model.input_file) + + +def _write_json_to_zip(zip_file: ZipFile, sub_file_name: str, model: BaseModel) -> None: + zip_file.writestr( + sub_file_name, + model.model_dump_json( + indent=4, + exclude_none=True, + by_alias=True, + ), + ) + + +def _ensure_outfile_suffix(outfile_path: Path) -> Path: + return outfile_path.with_suffix(".opossum") diff --git a/tests/test_opossum/__init__.py b/src/opossum_lib/input_formats/__init__.py similarity index 100% rename from tests/test_opossum/__init__.py rename to src/opossum_lib/input_formats/__init__.py diff --git a/tests/test_opossum/generators/__init__.py b/src/opossum_lib/input_formats/opossum/__init__.py similarity index 100% rename from tests/test_opossum/generators/__init__.py rename to src/opossum_lib/input_formats/opossum/__init__.py diff --git a/tests/test_scancode/__init__.py b/src/opossum_lib/input_formats/opossum/entities/__init__.py similarity index 100% rename from tests/test_scancode/__init__.py rename to src/opossum_lib/input_formats/opossum/entities/__init__.py diff --git a/tests/test_scancode/generators/__init__.py b/src/opossum_lib/input_formats/opossum/services/__init__.py similarity index 100% rename from tests/test_scancode/generators/__init__.py rename to src/opossum_lib/input_formats/opossum/services/__init__.py diff --git a/src/opossum_lib/input_formats/opossum/services/convert_to_opossum.py b/src/opossum_lib/input_formats/opossum/services/convert_to_opossum.py new file mode 100644 index 0000000..1712030 --- /dev/null +++ b/src/opossum_lib/input_formats/opossum/services/convert_to_opossum.py @@ -0,0 +1,271 @@ +# SPDX-FileCopyrightText: TNG Technology Consulting GmbH +# +# SPDX-License-Identifier: Apache-2.0 + +from copy import deepcopy +from pathlib import PurePath + +from opossum_lib.core.entities.base_url_for_sources import BaseUrlsForSources +from opossum_lib.core.entities.external_attribution_source import ( + ExternalAttributionSource, +) +from opossum_lib.core.entities.frequent_license import FrequentLicense +from opossum_lib.core.entities.metadata import Metadata +from opossum_lib.core.entities.opossum import ( + Opossum, +) +from opossum_lib.core.entities.opossum_package import OpossumPackage +from opossum_lib.core.entities.resource import ( + Resource, + ResourceType, + _convert_path_to_str, +) +from opossum_lib.core.entities.scan_results import ScanResults +from opossum_lib.core.entities.source_info import SourceInfo +from opossum_lib.shared.entities.opossum_file_model import OpossumFileModel +from opossum_lib.shared.entities.opossum_input_file_model import ( + ExternalAttributionSourceModel as FileExternalAttributionSource, +) +from opossum_lib.shared.entities.opossum_input_file_model import ( + FrequentLicenseModel, + MetadataModel, + OpossumInputFileModel, + OpossumPackageIdentifierModel, + OpossumPackageModel, + ResourceInFileModel, + ResourcePathModel, + SourceInfoModel, +) + + +def convert_to_opossum(opossum_file_model: OpossumFileModel) -> Opossum: + opossum = Opossum( + scan_results=_convert_to_scan_results(opossum_file_model.input_file), + review_results=opossum_file_model.output_file, + ) + return opossum + + +def _convert_to_scan_results( + opossum_input_file_model: OpossumInputFileModel, +) -> ScanResults: + resources, used_attribution_ids = _convert_to_resource_tree( + resources=opossum_input_file_model.resources, + external_attributions=opossum_input_file_model.external_attributions, + resources_to_attributions=opossum_input_file_model.resources_to_attributions, + ) + # fmt: off + frequent_licenses = ( + opossum_input_file_model.frequent_licenses + and _convert_frequent_licenses(opossum_input_file_model.frequent_licenses) + ) + # fmt: on + + base_urls_for_sources = ( + opossum_input_file_model.base_urls_for_sources + and BaseUrlsForSources( + **(opossum_input_file_model.base_urls_for_sources.model_dump()) + ) + ) + + file_attribution_sources = opossum_input_file_model.external_attribution_sources + external_attribution_sources = { + # noqa required due to clash between linter and formatter + name: _convert_external_attribution_source( # noqa: E501 + attribution_source + ) + for name, attribution_source in file_attribution_sources.items() + } + + attribution_with_id = _convert_to_attribution_with_id( + opossum_input_file_model.external_attributions + ) + return ScanResults( + metadata=_convert_to_metadata(opossum_input_file_model.metadata), + resources=resources, + attribution_breakpoints=deepcopy( + opossum_input_file_model.attribution_breakpoints + ), + external_attribution_sources=external_attribution_sources, + frequent_licenses=frequent_licenses, + files_with_children=deepcopy(opossum_input_file_model.files_with_children), + base_urls_for_sources=base_urls_for_sources, + attribution_to_id=attribution_with_id, + unassigned_attributions=_get_unassigned_attributions( + used_attribution_ids, opossum_input_file_model.external_attributions + ), + ) + + +def _get_unassigned_attributions( + used_attribution_ids: set[OpossumPackageIdentifierModel], + external_attributions: dict[ + OpossumPackageIdentifierModel, + OpossumPackageModel, + ], +) -> list[OpossumPackage] | None: + available_attribution_ids = external_attributions.keys() + unused_attributions_ids = set(available_attribution_ids) - used_attribution_ids + unused_attributions = [ + _convert_package(external_attributions[id]) for id in unused_attributions_ids + ] + return unused_attributions + + +def _convert_external_attribution_source( + external_attribution_source: FileExternalAttributionSource, +) -> ExternalAttributionSource: + return ExternalAttributionSource( + name=external_attribution_source.name, + priority=external_attribution_source.priority, + is_relevant_for_preferred=external_attribution_source.is_relevant_for_preferred, + ) + + +def _convert_frequent_licenses( + frequent_licenses_infile: list[FrequentLicenseModel], +) -> list[FrequentLicense]: + frequent_licenses: list[FrequentLicense] = [ + _convert_frequent_license(license) for license in frequent_licenses_infile + ] + return frequent_licenses + + +def _convert_to_metadata( + infile_metadata: MetadataModel, +) -> Metadata: + return Metadata(**infile_metadata.model_dump()) + + +def _convert_to_resource_tree( + resources: ResourceInFileModel, + external_attributions: dict[ + OpossumPackageIdentifierModel, + OpossumPackageModel, + ], + resources_to_attributions: dict[ + ResourcePathModel, + list[OpossumPackageIdentifierModel], + ], +) -> tuple[list[Resource], set[OpossumPackageIdentifierModel]]: + used_attribution_ids = set() + + def generate_child_resource( + current_path: PurePath, + to_insert: ResourceInFileModel, + ) -> Resource: + path = current_path + current_path_as_string = _convert_path_to_str(current_path) + if not current_path_as_string.startswith("/"): + current_path_as_string = "/" + current_path_as_string + attributions, attribution_ids = _get_applicable_attributions( + current_path_as_string + ) + used_attribution_ids.update(attribution_ids) + if isinstance(to_insert, int): + resource_type = ResourceType.FILE + return Resource( + type=resource_type, + path=path, + attributions=attributions, + ) + else: + resource_type = ResourceType.FOLDER + return Resource( + type=resource_type, + path=path, + attributions=attributions, + children={ + relative_path: generate_child_resource( + current_path / relative_path, child + ) + for relative_path, child in to_insert.items() + }, + ) + + def _get_applicable_attributions( + current_path_as_string: str, + ) -> tuple[list[OpossumPackage], set[OpossumPackageIdentifierModel]]: + attributions = [] + attribution_ids: list[str] = [] + if current_path_as_string in resources_to_attributions: + attribution_ids = resources_to_attributions[current_path_as_string] + attributions = [ + _convert_package(external_attributions[id]) for id in attribution_ids + ] + return attributions, set(attribution_ids) + + root_path = PurePath("") + + if isinstance(resources, dict): + return [ + generate_child_resource(root_path / relative_path, child) + for relative_path, child in resources.items() + ], used_attribution_ids + else: + raise RuntimeError("Root node must not be of file type") + + +def _convert_to_attribution_with_id( + external_attributions: dict[ + OpossumPackageIdentifierModel, + OpossumPackageModel, + ], +) -> dict[OpossumPackage, str]: + result = {} + for package_identifier, package in external_attributions.items(): + converted_package = _convert_package(package) + if converted_package not in result: + result[converted_package] = package_identifier + else: + raise RuntimeError( + "An attribution was duplicated in the scan breaking internal assertions" + ) + return result + + +def _convert_frequent_license( + infile_frequent_license: FrequentLicenseModel, +) -> FrequentLicense: + return FrequentLicense( + full_name=infile_frequent_license.full_name, + short_name=infile_frequent_license.short_name, + default_text=infile_frequent_license.default_text, + ) + + +def _convert_package( + infile_package: OpossumPackageModel, +) -> OpossumPackage: + return OpossumPackage( + source=_convert_source(infile_package.source), + attribution_confidence=infile_package.attribution_confidence, + comment=infile_package.comment, + package_name=infile_package.package_name, + package_version=infile_package.package_version, + package_namespace=infile_package.package_namespace, + package_type=infile_package.package_type, + package_purl_appendix=infile_package.package_p_u_r_l_appendix, + copyright=infile_package.copyright, + license_name=infile_package.license_name, + license_text=infile_package.license_text, + url=infile_package.url, + first_party=infile_package.first_party, + exclude_from_notice=infile_package.exclude_from_notice, + pre_selected=infile_package.pre_selected, + follow_up=infile_package.follow_up, + origin_id=infile_package.origin_id, + origin_ids=infile_package.origin_ids, + criticality=infile_package.criticality, + was_preferred=infile_package.was_preferred, + ) + + +def _convert_source( + infile_source_info: SourceInfoModel, +) -> SourceInfo: + return SourceInfo( + name=infile_source_info.name, + document_confidence=infile_source_info.document_confidence, + additional_name=infile_source_info.additional_name, + ) diff --git a/src/opossum_lib/input_formats/opossum/services/opossum_file_reader.py b/src/opossum_lib/input_formats/opossum/services/opossum_file_reader.py new file mode 100644 index 0000000..ba81d7b --- /dev/null +++ b/src/opossum_lib/input_formats/opossum/services/opossum_file_reader.py @@ -0,0 +1,24 @@ +# SPDX-FileCopyrightText: TNG Technology Consulting GmbH +# +# SPDX-License-Identifier: Apache-2.0 +from pathlib import Path + +from opossum_lib.core.entities.opossum import Opossum +from opossum_lib.core.services.input_reader import InputReader +from opossum_lib.input_formats.opossum.services.convert_to_opossum import ( # noqa: E501 + convert_to_opossum, +) +from opossum_lib.input_formats.opossum.services.read_opossum_file import ( + read_opossum_file, +) + + +class OpossumFileReader(InputReader): + path: Path + + def __init__(self, path: Path): + self.path = path + + def read(self) -> Opossum: + opossum_input_file = read_opossum_file(path=self.path) + return convert_to_opossum(opossum_input_file) diff --git a/src/opossum_lib/input_formats/opossum/services/read_opossum_file.py b/src/opossum_lib/input_formats/opossum/services/read_opossum_file.py new file mode 100644 index 0000000..535acad --- /dev/null +++ b/src/opossum_lib/input_formats/opossum/services/read_opossum_file.py @@ -0,0 +1,64 @@ +# SPDX-FileCopyrightText: TNG Technology Consulting GmbH +# +# SPDX-License-Identifier: Apache-2.0 + + +import json +import logging +import sys +from pathlib import Path +from zipfile import ZipFile + +from opossum_lib.shared.constants import ( + INPUT_JSON_NAME, + OUTPUT_JSON_NAME, +) +from opossum_lib.shared.entities.opossum_file_model import OpossumFileModel +from opossum_lib.shared.entities.opossum_input_file_model import OpossumInputFileModel +from opossum_lib.shared.entities.opossum_output_file_model import OpossumOutputFileModel + + +def read_opossum_file(path: Path) -> OpossumFileModel: + logging.info(f"Converting opossum to opossum {path}") + + try: + with ( + ZipFile(path, "r") as input_zip_file, + ): + _validate_zip_file_contents(input_zip_file) + input_file = _read_input_json_from_zip_file(input_zip_file) + return OpossumFileModel( + input_file=input_file, + output_file=_read_output_json_if_exists(input_zip_file), + ) + except Exception as e: + print(f"Error reading file {path}: {e}") + sys.exit(1) + + +def _read_input_json_from_zip_file(zip_file: ZipFile) -> OpossumInputFileModel: + with zip_file.open(INPUT_JSON_NAME) as input_json_file: + input_json = json.load(input_json_file) + input_file = OpossumInputFileModel.model_validate(input_json) + return input_file + + +def _read_output_json_if_exists( + input_zip_file: ZipFile, +) -> OpossumOutputFileModel | None: + if OUTPUT_JSON_NAME in input_zip_file.namelist(): + with input_zip_file.open(OUTPUT_JSON_NAME) as output_json_file: + output_json = json.load(output_json_file) + output_file = OpossumOutputFileModel.model_validate(output_json) + else: + output_file = None + return output_file + + +def _validate_zip_file_contents(input_zip_file: ZipFile) -> None: + if INPUT_JSON_NAME not in input_zip_file.namelist(): + logging.error( + f"Opossum file {input_zip_file.filename} is corrupt" + f" and does not contain '{INPUT_JSON_NAME}'" + ) + sys.exit(1) diff --git a/tests/test_setup/__init__.py b/src/opossum_lib/input_formats/scancode/__init__.py similarity index 100% rename from tests/test_setup/__init__.py rename to src/opossum_lib/input_formats/scancode/__init__.py diff --git a/src/opossum_lib/scancode/constants.py b/src/opossum_lib/input_formats/scancode/constants.py similarity index 100% rename from src/opossum_lib/scancode/constants.py rename to src/opossum_lib/input_formats/scancode/constants.py diff --git a/tests/util/__init__.py b/src/opossum_lib/input_formats/scancode/entities/__init__.py similarity index 100% rename from tests/util/__init__.py rename to src/opossum_lib/input_formats/scancode/entities/__init__.py diff --git a/src/opossum_lib/scancode/model.py b/src/opossum_lib/input_formats/scancode/entities/scancode_model.py similarity index 69% rename from src/opossum_lib/scancode/model.py rename to src/opossum_lib/input_formats/scancode/entities/scancode_model.py index 45b6031..1e82597 100644 --- a/src/opossum_lib/scancode/model.py +++ b/src/opossum_lib/input_formats/scancode/entities/scancode_model.py @@ -10,11 +10,11 @@ from pydantic import BaseModel -class Options(BaseModel, extra="ignore"): +class OptionsModel(BaseModel, extra="ignore"): input: list[str] -class SystemEnvironment(BaseModel): +class SystemEnvironmentModel(BaseModel): cpu_architecture: str operating_system: str platform: str @@ -22,20 +22,20 @@ class SystemEnvironment(BaseModel): python_version: str -class ExtraData(BaseModel): +class ExtraDataModel(BaseModel): files_count: int spdx_license_list_version: str - system_environment: SystemEnvironment + system_environment: SystemEnvironmentModel -class Header(BaseModel): +class HeaderModel(BaseModel): duration: float end_timestamp: str errors: list - extra_data: ExtraData + extra_data: ExtraDataModel message: Any notice: str - options: Options + options: OptionsModel output_format_version: str start_timestamp: str tool_name: str @@ -43,7 +43,7 @@ class Header(BaseModel): warnings: list -class ReferenceMatch(BaseModel): +class ReferenceMatchModel(BaseModel): end_line: int from_file: str license_expression: str @@ -58,7 +58,7 @@ class ReferenceMatch(BaseModel): start_line: int -class Match(BaseModel): +class MatchModel(BaseModel): end_line: int from_file: str license_expression: str @@ -73,64 +73,64 @@ class Match(BaseModel): start_line: int -class GlobalLicenseDetection(BaseModel): +class GlobalLicenseDetectionModel(BaseModel): detection_count: int identifier: str license_expression: str license_expression_spdx: str - reference_matches: list[ReferenceMatch] + reference_matches: list[ReferenceMatchModel] -class FileBasedLicenseDetection(BaseModel): +class FileBasedLicenseDetectionModel(BaseModel): license_expression: str license_expression_spdx: str - matches: list[Match] + matches: list[MatchModel] identifier: str -class Copyright(BaseModel): +class CopyrightModel(BaseModel): copyright: str end_line: int start_line: int -class Holder(BaseModel): +class HolderModel(BaseModel): end_line: int holder: str start_line: int -class Url(BaseModel): +class UrlModel(BaseModel): end_line: int start_line: int url: str -class Email(BaseModel): +class EmailModel(BaseModel): email: str end_line: int start_line: int -class FileType(Enum): +class FileTypeModel(Enum): FILE = "file" DIRECTORY = "directory" -class File(BaseModel): +class FileModel(BaseModel): authors: list base_name: str - copyrights: list[Copyright] + copyrights: list[CopyrightModel] date: str | None detected_license_expression: str | None detected_license_expression_spdx: str | None dirs_count: int - emails: list[Email] + emails: list[EmailModel] extension: str files_count: int file_type: str | None for_packages: list - holders: list[Holder] + holders: list[HolderModel] is_archive: bool is_binary: bool is_media: bool @@ -138,7 +138,7 @@ class File(BaseModel): is_source: bool is_text: bool license_clues: list - license_detections: list[FileBasedLicenseDetection] + license_detections: list[FileBasedLicenseDetectionModel] md5: str | None mime_type: str | None name: str @@ -151,13 +151,13 @@ class File(BaseModel): sha256: str | None size: int size_count: int - type: FileType - urls: list[Url] + type: FileTypeModel + urls: list[UrlModel] -class ScanCodeData(BaseModel): +class ScancodeModel(BaseModel): dependencies: list - files: list[File] - license_detections: list[GlobalLicenseDetection] - headers: list[Header] + files: list[FileModel] + license_detections: list[GlobalLicenseDetectionModel] + headers: list[HeaderModel] packages: list diff --git a/src/opossum_lib/input_formats/scancode/services/__init__.py b/src/opossum_lib/input_formats/scancode/services/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/opossum_lib/input_formats/scancode/services/convert_to_opossum.py b/src/opossum_lib/input_formats/scancode/services/convert_to_opossum.py new file mode 100644 index 0000000..19cb014 --- /dev/null +++ b/src/opossum_lib/input_formats/scancode/services/convert_to_opossum.py @@ -0,0 +1,94 @@ +# SPDX-FileCopyrightText: TNG Technology Consulting GmbH +# +# SPDX-License-Identifier: Apache-2.0 + +import logging +import sys +import uuid +from pathlib import PurePath + +from opossum_lib.core.entities.metadata import Metadata +from opossum_lib.core.entities.opossum import ( + Opossum, +) +from opossum_lib.core.entities.opossum_package import OpossumPackage +from opossum_lib.core.entities.resource import Resource, ResourceType +from opossum_lib.core.entities.scan_results import ScanResults +from opossum_lib.core.entities.source_info import SourceInfo +from opossum_lib.input_formats.scancode.constants import SCANCODE_SOURCE_NAME +from opossum_lib.input_formats.scancode.entities.scancode_model import ( + FileModel, + FileTypeModel, + HeaderModel, + ScancodeModel, +) + + +def convert_to_opossum(scancode_data: ScancodeModel) -> Opossum: + resources = _extract_opossum_resources(scancode_data) + + scancode_header = _extract_scancode_header(scancode_data) + metadata = Metadata( + project_id=str(uuid.uuid4()), + file_creation_date=scancode_header.end_timestamp, + project_title="ScanCode file", + ) + + return Opossum( + scan_results=ScanResults( + metadata=metadata, + resources=resources, + ) + ) + + +def _extract_scancode_header(scancode_data: ScancodeModel) -> HeaderModel: + if len(scancode_data.headers) != 1: + logging.error("Headers of ScanCode file are invalid.") + sys.exit(1) + return scancode_data.headers[0] + + +def _extract_opossum_resources( + scancode_data: ScancodeModel, +) -> list[Resource]: + temp_root = Resource(path=PurePath("")) + for file in scancode_data.files: + resource = Resource( + path=PurePath(file.path), + attributions=_get_attribution_info(file), + type=_convert_resource_type(file.type), + ) + temp_root.add_resource(resource) + + return list(temp_root.children.values()) + + +def _convert_resource_type(file_type: FileTypeModel) -> ResourceType: + if file_type == FileTypeModel.FILE: + return ResourceType.FILE + else: + return ResourceType.FOLDER + + +def _get_attribution_info(file: FileModel) -> list[OpossumPackage]: + if file.type == FileTypeModel.DIRECTORY: + return [] + copyright = "\n".join(c.copyright for c in file.copyrights) + source_info = SourceInfo(name=SCANCODE_SOURCE_NAME) + + attribution_infos = [] + for license_detection in file.license_detections: + license_name = license_detection.license_expression_spdx + max_score = max(m.score for m in license_detection.matches) + attribution_confidence = int(max_score) + + package = OpossumPackage( + source=source_info, + license_name=license_name, + attribution_confidence=attribution_confidence, + copyright=copyright, + ) + attribution_infos.append(package) + + return attribution_infos diff --git a/src/opossum_lib/input_formats/scancode/services/scancode_file_reader.py b/src/opossum_lib/input_formats/scancode/services/scancode_file_reader.py new file mode 100644 index 0000000..6309277 --- /dev/null +++ b/src/opossum_lib/input_formats/scancode/services/scancode_file_reader.py @@ -0,0 +1,48 @@ +# SPDX-FileCopyrightText: TNG Technology Consulting GmbH +# +# SPDX-License-Identifier: Apache-2.0 + +import json +import logging +import sys +from pathlib import Path + +from opossum_lib.core.entities.opossum import ( + Opossum, +) +from opossum_lib.core.services.input_reader import InputReader +from opossum_lib.input_formats.scancode.entities.scancode_model import ( + ScancodeModel, +) +from opossum_lib.input_formats.scancode.services.convert_to_opossum import ( # noqa: E501 + convert_to_opossum, +) + + +class ScancodeFileReader(InputReader): + path: Path + + def __init__(self, path: Path): + self.path = path + + def read(self) -> Opossum: + logging.info(f"Converting scancode to opossum {self.path}") + + scancode_data = self._load_scancode_json() + + return convert_to_opossum(scancode_data) + + def _load_scancode_json(self) -> ScancodeModel: + try: + with open(self.path) as input_file: + json_data = json.load(input_file) + except json.JSONDecodeError as e: + logging.error(f"Error decoding json for file {self.path}. Message: {e.msg}") + sys.exit(1) + except UnicodeDecodeError: + logging.error(f"Error decoding json for file {self.path}.") + sys.exit(1) + + scancode_data = ScancodeModel.model_validate(json_data) + + return scancode_data diff --git a/src/opossum_lib/opossum/file_generation.py b/src/opossum_lib/opossum/file_generation.py deleted file mode 100644 index 59092bc..0000000 --- a/src/opossum_lib/opossum/file_generation.py +++ /dev/null @@ -1,51 +0,0 @@ -# SPDX-FileCopyrightText: TNG Technology Consulting GmbH -# -# SPDX-License-Identifier: Apache-2.0 -from pathlib import Path -from zipfile import ZIP_DEFLATED, ZipFile - -from opossum_lib.opossum.constants import ( - COMPRESSION_LEVEL, - INPUT_JSON_NAME, - OUTPUT_JSON_NAME, -) -from opossum_lib.opossum.opossum_file_content import OpossumFileContent - - -class OpossumFileWriter: - @staticmethod - def write_opossum_information_to_file( - opossum_file_content: OpossumFileContent, file_path: Path - ) -> None: - with ZipFile( - file_path, "w", compression=ZIP_DEFLATED, compresslevel=COMPRESSION_LEVEL - ) as zip_file: - OpossumFileWriter._write_input_json(opossum_file_content, zip_file) - OpossumFileWriter._write_output_json_if_existing( - opossum_file_content, zip_file - ) - - @staticmethod - def _write_output_json_if_existing( - opossum_file_content: OpossumFileContent, zip_file: ZipFile - ) -> None: - if opossum_file_content.output_file: - zip_file.writestr( - OUTPUT_JSON_NAME, - opossum_file_content.output_file.model_dump_json( - exclude_none=True, indent=4, by_alias=True - ), - ) - - @staticmethod - def _write_input_json( - opossum_file_content: OpossumFileContent, zip_file: ZipFile - ) -> None: - zip_file.writestr( - INPUT_JSON_NAME, - opossum_file_content.input_file.model_dump_json( - indent=4, - exclude_none=True, - by_alias=True, - ), - ) diff --git a/src/opossum_lib/opossum/merger.py b/src/opossum_lib/opossum/merger.py deleted file mode 100644 index ec6e16d..0000000 --- a/src/opossum_lib/opossum/merger.py +++ /dev/null @@ -1,140 +0,0 @@ -# SPDX-FileCopyrightText: TNG Technology Consulting GmbH -# -# SPDX-License-Identifier: Apache-2.0 -from typing import Any - -from opossum_lib.opossum.opossum_file import ( - OpossumInformation, - OpossumPackageIdentifier, - Resource, - ResourcePath, - ResourceType, - convert_resource_in_file_to_resource, -) - - -def merge_opossum_information( - elements_to_merge: list[OpossumInformation], -) -> OpossumInformation: - expanded_opossum_information = [ - expand_opossum_package_identifier(opossum_information) - for opossum_information in elements_to_merge - ] - return OpossumInformation( - metadata=expanded_opossum_information[0].metadata, - resources=_merge_resources( - [ - convert_resource_in_file_to_resource(opossum_information.resources) - for opossum_information in expanded_opossum_information - ] - ).convert_to_file_resource(), - external_attributions=_merge_dicts_without_duplicates( - [ - opossum_information.external_attributions - for opossum_information in expanded_opossum_information - ] - ), - resources_to_attributions=_merge_resources_to_attributions( - [ - opossum_information.resources_to_attributions - for opossum_information in expanded_opossum_information - ] - ), - attribution_breakpoints=_merge_attribution_breakpoints( - [ - opossum_information.attribution_breakpoints - for opossum_information in expanded_opossum_information - ] - ), - external_attribution_sources=_merge_dicts_without_duplicates( - [ - opossum_information.external_attribution_sources - for opossum_information in expanded_opossum_information - ] - ), - ) - - -def expand_opossum_package_identifier( - opossum_information: OpossumInformation, -) -> OpossumInformation: - """IDs for the attributions should be unique per OpossumInformation. - To prevent possible duplicates we add the projectId of the - OpossumInformation to the IDs as a prefix.""" - prefix = opossum_information.metadata.project_id - extended_resources_to_attributions = dict() - for ( - resource_path, - identifiers, - ) in opossum_information.resources_to_attributions.items(): - extended_resources_to_attributions[resource_path] = [ - prefix + "-" + identifier for identifier in identifiers - ] - extended_external_attributions = dict() - for ( - identifier, - external_attribution, - ) in opossum_information.external_attributions.items(): - extended_external_attributions[prefix + "-" + identifier] = external_attribution - - return OpossumInformation( - metadata=opossum_information.metadata, - resources=opossum_information.resources, - external_attributions=extended_external_attributions, - resources_to_attributions=extended_resources_to_attributions, - attribution_breakpoints=opossum_information.attribution_breakpoints, - external_attribution_sources=opossum_information.external_attribution_sources, - ) - - -def _merge_resources(resources: list[Resource]) -> Resource: - merged_resource = Resource(type=ResourceType.TOP_LEVEL) - for resource in resources: - for path in resource.get_paths_of_all_leaf_nodes_with_types(): - merged_resource = merged_resource.add_path(path) - return merged_resource - - -def _merge_resources_to_attributions( - resources_to_attributions: list[dict[ResourcePath, list[OpossumPackageIdentifier]]], -) -> dict[ResourcePath, list[OpossumPackageIdentifier]]: - merged_resources_to_attributions: dict[ - ResourcePath, list[OpossumPackageIdentifier] - ] = dict() - for resource_to_attribution in resources_to_attributions: - for resource_path, identifiers in resource_to_attribution.items(): - identifiers_merged = merged_resources_to_attributions.get(resource_path, []) - identifiers_merged.extend( - [idf for idf in identifiers if idf not in identifiers_merged] - ) - merged_resources_to_attributions[resource_path] = identifiers_merged - - return merged_resources_to_attributions - - -def _merge_attribution_breakpoints( - attribution_breakpoints_to_merge: list[list[str]], -) -> list[str]: - merged_attribution_breakpoints = [] - for attribution_breakpoints in attribution_breakpoints_to_merge: - merged_attribution_breakpoints.extend( - [ - attribution_breakpoint - for attribution_breakpoint in attribution_breakpoints - if attribution_breakpoint not in merged_attribution_breakpoints - ] - ) - return merged_attribution_breakpoints - - -def _merge_dicts_without_duplicates(dicts: list[dict[str, Any]]) -> dict[str, Any]: - merged_dict: dict[str, Any] = dict() - for single_dict in dicts: - for key, value in single_dict.items(): - if key in merged_dict and merged_dict.get(key) != value: - raise TypeError( - "Couldn't merge and deduplicate: " - "Values for identical keys don't match." - ) - merged_dict.update({key: value}) - return merged_dict diff --git a/src/opossum_lib/opossum/opossum_file_content.py b/src/opossum_lib/opossum/opossum_file_content.py deleted file mode 100644 index 900553f..0000000 --- a/src/opossum_lib/opossum/opossum_file_content.py +++ /dev/null @@ -1,72 +0,0 @@ -# SPDX-FileCopyrightText: TNG Technology Consulting GmbH -# -# SPDX-License-Identifier: Apache-2.0 -from __future__ import annotations - -import json -import logging -import sys -from zipfile import ZipFile - -from pydantic import BaseModel, TypeAdapter - -from opossum_lib.opossum.constants import INPUT_JSON_NAME, OUTPUT_JSON_NAME -from opossum_lib.opossum.opossum_file import OpossumInformation -from opossum_lib.opossum.output_model import OpossumOutputFile - - -class OpossumFileContent(BaseModel): - input_file: OpossumInformation - output_file: OpossumOutputFile | None = None - - @staticmethod - def from_file(file_name: str) -> OpossumFileContent: - logging.info(f"Converting opossum to opossum {file_name}") - - try: - with ( - ZipFile(file_name, "r") as input_zip_file, - ): - OpossumFileContent._validate_zip_file_contents(input_zip_file) - input_file = OpossumFileContent._read_input_json_from_zip_file( - input_zip_file - ) - return OpossumFileContent( - input_file=input_file, - output_file=OpossumFileContent._read_output_json_if_exists( - input_zip_file - ), - ) - except Exception as e: - print(f"Error reading file {file_name}: {e}") - sys.exit(1) - - @staticmethod - def _read_input_json_from_zip_file(input_zip_file: ZipFile) -> OpossumInformation: - with input_zip_file.open(INPUT_JSON_NAME) as input_json_file: - input_json = json.load(input_json_file) - input_file = OpossumInformation.model_validate(input_json) - return input_file - - @staticmethod - def _read_output_json_if_exists( - input_zip_file: ZipFile, - ) -> OpossumOutputFile | None: - if OUTPUT_JSON_NAME in input_zip_file.namelist(): - with input_zip_file.open(OUTPUT_JSON_NAME) as output_json_file: - output_json = json.load(output_json_file) - output_file = TypeAdapter(OpossumOutputFile).validate_python( - output_json - ) - else: - output_file = None - return output_file - - @staticmethod - def _validate_zip_file_contents(input_zip_file: ZipFile) -> None: - if INPUT_JSON_NAME not in input_zip_file.namelist(): - logging.error( - f"Opossum file {input_zip_file.filename} is corrupt" - f" and does not contain '{INPUT_JSON_NAME}'" - ) - sys.exit(1) diff --git a/src/opossum_lib/opossum/opossum_file_to_opossum_converter.py b/src/opossum_lib/opossum/opossum_file_to_opossum_converter.py deleted file mode 100644 index 6f9a10d..0000000 --- a/src/opossum_lib/opossum/opossum_file_to_opossum_converter.py +++ /dev/null @@ -1,281 +0,0 @@ -# SPDX-FileCopyrightText: TNG Technology Consulting GmbH -# -# SPDX-License-Identifier: Apache-2.0 - -from copy import deepcopy -from pathlib import PurePath - -import opossum_lib.opossum.opossum_file -from opossum_lib.opossum.opossum_file import ( - ExternalAttributionSource as FileExternalAttributionSource, -) -from opossum_lib.opossum.opossum_file import OpossumInformation -from opossum_lib.opossum.opossum_file_content import OpossumFileContent -from opossum_lib.opossum_model import ( - BaseUrlsForSources, - ExternalAttributionSource, - FrequentLicense, - Metadata, - Opossum, - OpossumPackage, - OpossumPackageIdentifier, - Resource, - ResourceType, - ScanResults, - SourceInfo, - _convert_path_to_str, -) - - -class OpossumFileToOpossumConverter: - @staticmethod - def convert_to_opossum(opossum_file: OpossumFileContent) -> Opossum: - opossum = Opossum( - scan_results=OpossumFileToOpossumConverter._convert_to_opossum_scan_results( - opossum_file.input_file - ), - review_results=opossum_file.output_file, - ) - return opossum - - @staticmethod - def _convert_to_opossum_scan_results( - opossum_information: OpossumInformation, - ) -> ScanResults: - resources, used_attribution_ids = ( - OpossumFileToOpossumConverter._convert_to_opossum_model_resource_tree( - resources=opossum_information.resources, - external_attributions=opossum_information.external_attributions, - resources_to_attributions=opossum_information.resources_to_attributions, - ) - ) - # fmt: off - frequent_licenses = ( - opossum_information.frequent_licenses - and OpossumFileToOpossumConverter - ._convert_frequent_licenses_to_model_frequent_licenses( - opossum_information.frequent_licenses - ) - ) - # fmt: on - - base_urls_for_sources = ( - opossum_information.base_urls_for_sources - and BaseUrlsForSources( - **(opossum_information.base_urls_for_sources.model_dump()) - ) - ) - - file_attribution_sources = opossum_information.external_attribution_sources - external_attribution_sources = { - name: OpossumFileToOpossumConverter._convert_external_attribution_source( - attribution_source - ) - for name, attribution_source in file_attribution_sources.items() - } - - attribution_with_id = ( - OpossumFileToOpossumConverter._convert_to_attribution_with_id( - opossum_information.external_attributions - ) - ) - return ScanResults( - metadata=OpossumFileToOpossumConverter._convert_to_opossum_model_metadata( - opossum_information.metadata - ), - resources=resources, - attribution_breakpoints=deepcopy( - opossum_information.attribution_breakpoints - ), - external_attribution_sources=external_attribution_sources, - frequent_licenses=frequent_licenses, - files_with_children=deepcopy(opossum_information.files_with_children), - base_urls_for_sources=base_urls_for_sources, - attribution_to_id=attribution_with_id, - unassigned_attributions=OpossumFileToOpossumConverter._get_unassigned_attributions( - used_attribution_ids, opossum_information.external_attributions - ), - ) - - @staticmethod - def _get_unassigned_attributions( - used_attribution_ids: set[OpossumPackageIdentifier], - external_attributions: dict[ - opossum_lib.opossum.opossum_file.OpossumPackageIdentifier, - opossum_lib.opossum.opossum_file.OpossumPackage, - ], - ) -> list[OpossumPackage] | None: - available_attribution_ids = external_attributions.keys() - unused_attributions_ids = set(available_attribution_ids) - used_attribution_ids - unused_attributions = [ - OpossumFileToOpossumConverter._convert_package(external_attributions[id]) - for id in unused_attributions_ids - ] - return unused_attributions - - @staticmethod - def _convert_external_attribution_source( - external_attribution_source: FileExternalAttributionSource, - ) -> ExternalAttributionSource: - return ExternalAttributionSource( - name=external_attribution_source.name, - priority=external_attribution_source.priority, - is_relevant_for_preferred=external_attribution_source.is_relevant_for_preferred, - ) - - @staticmethod - def _convert_frequent_licenses_to_model_frequent_licenses( - frequent_licenses_infile: list[ - opossum_lib.opossum.opossum_file.FrequentLicense - ], - ) -> list[FrequentLicense]: - frequent_licenses: list[FrequentLicense] = [ - OpossumFileToOpossumConverter._convert_frequent_license(license) - for license in frequent_licenses_infile - ] - return frequent_licenses - - @staticmethod - def _convert_to_opossum_model_metadata( - infile_metadata: opossum_lib.opossum.opossum_file.Metadata, - ) -> Metadata: - return Metadata(**infile_metadata.model_dump()) - - @staticmethod - def _convert_to_opossum_model_resource_tree( - resources: opossum_lib.opossum.opossum_file.ResourceInFile, - external_attributions: dict[ - opossum_lib.opossum.opossum_file.OpossumPackageIdentifier, - opossum_lib.opossum.opossum_file.OpossumPackage, - ], - resources_to_attributions: dict[ - opossum_lib.opossum.opossum_file.ResourcePath, - list[opossum_lib.opossum.opossum_file.OpossumPackageIdentifier], - ], - ) -> tuple[list[Resource], set[OpossumPackageIdentifier]]: - used_attribution_ids = set() - - def generate_child_resource( - current_path: PurePath, - to_insert: opossum_lib.opossum.opossum_file.ResourceInFile, - ) -> Resource: - path = current_path - current_path_as_string = _convert_path_to_str(current_path) - if not current_path_as_string.startswith("/"): - current_path_as_string = "/" + current_path_as_string - attributions, attribution_ids = _get_applicable_attributions( - current_path_as_string - ) - used_attribution_ids.update(attribution_ids) - if isinstance(to_insert, int): - resource_type = ResourceType.FILE - return Resource( - type=resource_type, - path=path, - attributions=attributions, - ) - else: - resource_type = ResourceType.FOLDER - return Resource( - type=resource_type, - path=path, - attributions=attributions, - children={ - relative_path: generate_child_resource( - current_path / relative_path, child - ) - for relative_path, child in to_insert.items() - }, - ) - - def _get_applicable_attributions( - current_path_as_string: str, - ) -> tuple[list[OpossumPackage], set[OpossumPackageIdentifier]]: - attributions = [] - attribution_ids: list[str] = [] - if current_path_as_string in resources_to_attributions: - attribution_ids = resources_to_attributions[current_path_as_string] - attributions = [ - OpossumFileToOpossumConverter._convert_package( - external_attributions[id] - ) - for id in attribution_ids - ] - return attributions, set(attribution_ids) - - root_path = PurePath("") - - if isinstance(resources, dict): - return [ - generate_child_resource(root_path / relative_path, child) - for relative_path, child in resources.items() - ], used_attribution_ids - else: - raise RuntimeError("Root node must not be of file type") - - @staticmethod - def _convert_to_attribution_with_id( - external_attributions: dict[ - opossum_lib.opossum.opossum_file.OpossumPackageIdentifier, - opossum_lib.opossum.opossum_file.OpossumPackage, - ], - ) -> dict[OpossumPackage, str]: - result = {} - for package_identifier, package in external_attributions.items(): - converted_package = OpossumFileToOpossumConverter._convert_package(package) - if converted_package not in result: - result[converted_package] = package_identifier - else: - raise RuntimeError( - "An attribution was duplicated in the " - "scan breaking internal assertions" - ) - return result - - @staticmethod - def _convert_frequent_license( - infile_frequent_license: opossum_lib.opossum.opossum_file.FrequentLicense, - ) -> FrequentLicense: - return FrequentLicense( - full_name=infile_frequent_license.full_name, - short_name=infile_frequent_license.short_name, - default_text=infile_frequent_license.default_text, - ) - - @staticmethod - def _convert_package( - infile_package: opossum_lib.opossum.opossum_file.OpossumPackage, - ) -> OpossumPackage: - pass - return OpossumPackage( - source=OpossumFileToOpossumConverter._convert_source(infile_package.source), - attribution_confidence=infile_package.attribution_confidence, - comment=infile_package.comment, - package_name=infile_package.package_name, - package_version=infile_package.package_version, - package_namespace=infile_package.package_namespace, - package_type=infile_package.package_type, - package_purl_appendix=infile_package.package_p_u_r_l_appendix, - copyright=infile_package.copyright, - license_name=infile_package.license_name, - license_text=infile_package.license_text, - url=infile_package.url, - first_party=infile_package.first_party, - exclude_from_notice=infile_package.exclude_from_notice, - pre_selected=infile_package.pre_selected, - follow_up=infile_package.follow_up, - origin_id=infile_package.origin_id, - origin_ids=infile_package.origin_ids, - criticality=infile_package.criticality, - was_preferred=infile_package.was_preferred, - ) - - @staticmethod - def _convert_source( - infile_source_info: opossum_lib.opossum.opossum_file.SourceInfo, - ) -> SourceInfo: - return SourceInfo( - name=infile_source_info.name, - document_confidence=infile_source_info.document_confidence, - additional_name=infile_source_info.additional_name, - ) diff --git a/src/opossum_lib/opossum/read_opossum_file.py b/src/opossum_lib/opossum/read_opossum_file.py deleted file mode 100644 index 21b3f19..0000000 --- a/src/opossum_lib/opossum/read_opossum_file.py +++ /dev/null @@ -1,15 +0,0 @@ -# SPDX-FileCopyrightText: TNG Technology Consulting GmbH -# -# SPDX-License-Identifier: Apache-2.0 - -from opossum_lib.opossum.opossum_file_content import OpossumFileContent -from opossum_lib.opossum.opossum_file_to_opossum_converter import ( - OpossumFileToOpossumConverter, -) - - -def read_opossum_file(filename: str) -> OpossumFileContent: - opossum_input_file = OpossumFileContent.from_file(file_name=filename) - opossum = OpossumFileToOpossumConverter.convert_to_opossum(opossum_input_file) - - return opossum.to_opossum_file_format() diff --git a/src/opossum_lib/opossum_model.py b/src/opossum_lib/opossum_model.py deleted file mode 100644 index 43ece1f..0000000 --- a/src/opossum_lib/opossum_model.py +++ /dev/null @@ -1,334 +0,0 @@ -# SPDX-FileCopyrightText: TNG Technology Consulting GmbH -# -# SPDX-License-Identifier: Apache-2.0 - -from __future__ import annotations - -import uuid -from collections import defaultdict -from collections.abc import Iterable -from copy import deepcopy -from dataclasses import field -from enum import Enum, auto -from pathlib import PurePath -from typing import Literal - -from pydantic import BaseModel, ConfigDict - -import opossum_lib.opossum.opossum_file as opossum_file -from opossum_lib.opossum.opossum_file_content import OpossumFileContent -from opossum_lib.opossum.output_model import OpossumOutputFile - -type OpossumPackageIdentifier = str -type ResourcePath = str - - -def _convert_path_to_str(path: PurePath) -> str: - return str(path).replace("\\", "/") - - -def default_attribution_id_mapper() -> dict[OpossumPackage, str]: - return defaultdict(lambda: str(uuid.uuid4())) - - -class Opossum(BaseModel): - model_config = ConfigDict(frozen=True, extra="forbid") - scan_results: ScanResults - review_results: OpossumOutputFile | None = None - - def to_opossum_file_format(self) -> OpossumFileContent: - return OpossumFileContent( - input_file=self.scan_results.to_opossum_file_format(), - output_file=self.review_results, - ) - - -class ScanResults(BaseModel): - model_config = ConfigDict(frozen=True, extra="forbid") - metadata: Metadata - resources: list[Resource] - attribution_breakpoints: list[str] = [] - external_attribution_sources: dict[str, ExternalAttributionSource] = {} - frequent_licenses: list[FrequentLicense] | None = None - files_with_children: list[str] | None = None - base_urls_for_sources: BaseUrlsForSources | None = None - attribution_to_id: dict[OpossumPackage, str] = field( - default_factory=default_attribution_id_mapper - ) - unassigned_attributions: list[OpossumPackage] = [] - - def to_opossum_file_format(self) -> opossum_file.OpossumInformation: - external_attributions, resources_to_attributions = ( - self.create_attribution_mapping(self.resources) - ) - external_attributions.update(self._get_unassigned_attributions()) - - frequent_licenses = None - if self.frequent_licenses: - frequent_licenses = [ - license.to_opossum_file_format() for license in self.frequent_licenses - ] - base_urls_for_sources = ( - self.base_urls_for_sources - and self.base_urls_for_sources.to_opossum_file_format() - ) - - external_attribution_sources = { - key: val.to_opossum_file_format() - for (key, val) in self.external_attribution_sources.items() - } - - return opossum_file.OpossumInformation( - metadata=self.metadata.to_opossum_file_format(), - resources={ - str(resource.path): resource.to_opossum_file_format() - for resource in self.resources - }, - external_attributions=external_attributions, - resources_to_attributions=resources_to_attributions, - attribution_breakpoints=deepcopy(self.attribution_breakpoints), - external_attribution_sources=external_attribution_sources, - frequent_licenses=frequent_licenses, - files_with_children=deepcopy(self.files_with_children), - base_urls_for_sources=base_urls_for_sources, - ) - - def _get_unassigned_attributions( - self, - ) -> dict[opossum_file.OpossumPackageIdentifier, opossum_file.OpossumPackage]: - if self.unassigned_attributions: - result = {} - for unassigned_attribution in self.unassigned_attributions: - if unassigned_attribution in self.attribution_to_id: - package_identifier = self.attribution_to_id[unassigned_attribution] - result[package_identifier] = ( - unassigned_attribution.to_opossum_file_format() - ) - else: - package_identifier = str(uuid.uuid4()) - self.attribution_to_id[unassigned_attribution] = package_identifier - result[package_identifier] = ( - unassigned_attribution.to_opossum_file_format() - ) - return result - else: - return {} - - def create_attribution_mapping( - self, - root_nodes: list[Resource], - ) -> tuple[ - dict[opossum_file.OpossumPackageIdentifier, opossum_file.OpossumPackage], - dict[opossum_file.ResourcePath, list[opossum_file.OpossumPackageIdentifier]], - ]: - external_attributions: dict[ - opossum_file.OpossumPackageIdentifier, opossum_file.OpossumPackage - ] = {} - resources_to_attributions: dict[ - opossum_file.ResourcePath, list[opossum_file.OpossumPackageIdentifier] - ] = {} - - def process_node(node: Resource) -> None: - path = _convert_path_to_str(node.path) - if not path.startswith("/"): - # the / is required by OpossumUI - path = "/" + path - - node_attributions_by_id = { - self.get_attribution_key(a): a.to_opossum_file_format() - for a in node.attributions - } - external_attributions.update(node_attributions_by_id) - - if len(node_attributions_by_id) > 0: - resources_to_attributions[path] = list(node_attributions_by_id.keys()) - - for child in node.children.values(): - process_node(child) - - for root in root_nodes: - process_node(root) - - return external_attributions, resources_to_attributions - - def get_attribution_key( - self, attribution: OpossumPackage - ) -> OpossumPackageIdentifier: - id = self.attribution_to_id[attribution] - self.attribution_to_id[attribution] = id - return id - - -class ResourceType(Enum): - FILE = auto() - FOLDER = auto() - - -class Resource(BaseModel): - model_config = ConfigDict(frozen=False, extra="forbid") - path: PurePath - type: ResourceType | None = None - attributions: list[OpossumPackage] = [] - children: dict[str, Resource] = {} - - def to_opossum_file_format(self) -> opossum_file.ResourceInFile: - if self.children or self.type == ResourceType.FOLDER: - return { - _convert_path_to_str( - child.path.relative_to(self.path) - ): child.to_opossum_file_format() - for child in self.children.values() - } - else: - return 1 - - def add_resource(self, resource: Resource) -> None: - if not resource.path.is_relative_to(self.path): - raise RuntimeError( - f"The path {resource.path} is not a child of this node at {self.path}." - ) - remaining_path_parts = resource.path.relative_to(self.path).parts - if remaining_path_parts: - self._add_resource(resource, remaining_path_parts) - else: - self._update(resource) - - def _add_resource( - self, resource: Resource, remaining_path_parts: Iterable[str] - ) -> None: - if not remaining_path_parts: - self._update(resource) - return - next, *rest_parts = remaining_path_parts - if next not in self.children: - self.children[next] = Resource(path=self.path / next) - self.children[next]._add_resource(resource, rest_parts) - - def _update(self, other: Resource) -> None: - if self.path != other.path: - raise RuntimeError( - "Trying to merge nodes with different paths: " - + f"{self.path} vs. {other.path}" - ) - if self.type and other.type and self.type != other.type: - raise RuntimeError( - "Trying to merge incompatible node types. " - + f"Current node is {self.type}. Other is {other.type}" - ) - self.type = self.type or other.type - self.attributions.extend(other.attributions) - for key, child in other.children.items(): - if key in self.children: - self.children[key]._update(child) - else: - self.children[key] = child - - -class BaseUrlsForSources(BaseModel): - model_config = ConfigDict(frozen=True, extra="allow") - - def to_opossum_file_format(self) -> opossum_file.BaseUrlsForSources: - return opossum_file.BaseUrlsForSources(**self.model_dump()) - - -class FrequentLicense(BaseModel): - model_config = ConfigDict(frozen=True, extra="forbid") - full_name: str - short_name: str - default_text: str - - def to_opossum_file_format(self) -> opossum_file.FrequentLicense: - return opossum_file.FrequentLicense( - full_name=self.full_name, - short_name=self.short_name, - default_text=self.default_text, - ) - - -class SourceInfo(BaseModel): - model_config = ConfigDict(frozen=True, extra="forbid") - name: str - document_confidence: int | float | None = 0 - additional_name: str | None = None - - def to_opossum_file_format(self) -> opossum_file.SourceInfo: - return opossum_file.SourceInfo( - name=self.name, - document_confidence=self.document_confidence, - additional_name=self.additional_name, - ) - - -class OpossumPackage(BaseModel): - model_config = ConfigDict(frozen=True, extra="forbid") - source: SourceInfo - attribution_confidence: int | None = None - comment: str | None = None - package_name: str | None = None - package_version: str | None = None - package_namespace: str | None = None - package_type: str | None = None - package_purl_appendix: str | None = None - copyright: str | None = None - license_name: str | None = None - license_text: str | None = None - url: str | None = None - first_party: bool | None = None - exclude_from_notice: bool | None = None - pre_selected: bool | None = None - follow_up: Literal["FOLLOW_UP"] | None = None - origin_id: str | None = None - origin_ids: tuple[str, ...] | None = None - criticality: Literal["high"] | Literal["medium"] | None = None - was_preferred: bool | None = None - - def to_opossum_file_format(self) -> opossum_file.OpossumPackage: - return opossum_file.OpossumPackage( - source=self.source.to_opossum_file_format(), - attribution_confidence=self.attribution_confidence, - comment=self.comment, - package_name=self.package_name, - package_version=self.package_version, - package_namespace=self.package_namespace, - package_type=self.package_type, - package_p_u_r_l_appendix=self.package_purl_appendix, - copyright=self.copyright, - license_name=self.license_name, - license_text=self.license_text, - url=self.url, - first_party=self.first_party, - exclude_from_notice=self.exclude_from_notice, - pre_selected=self.pre_selected, - follow_up=self.follow_up, - origin_id=self.origin_id, - origin_ids=self.origin_ids, - criticality=self.criticality, - was_preferred=self.was_preferred, - ) - - -class Metadata(BaseModel): - model_config = ConfigDict(frozen=True, extra="allow") - project_id: str - file_creation_date: str - project_title: str - project_version: str | None = None - expected_release_date: str | None = None - build_date: str | None = None - - def to_opossum_file_format(self) -> opossum_file.Metadata: - return opossum_file.Metadata(**self.model_dump()) - - -class ExternalAttributionSource(BaseModel): - model_config = ConfigDict(frozen=True, extra="forbid") - name: str - priority: int - is_relevant_for_preferred: bool | None = None - - def to_opossum_file_format(self) -> opossum_file.ExternalAttributionSource: - return opossum_file.ExternalAttributionSource( - name=self.name, - priority=self.priority, - is_relevant_for_preferred=self.is_relevant_for_preferred, - ) diff --git a/src/opossum_lib/scancode/convert_scancode_to_opossum.py b/src/opossum_lib/scancode/convert_scancode_to_opossum.py deleted file mode 100644 index 7f6e2dc..0000000 --- a/src/opossum_lib/scancode/convert_scancode_to_opossum.py +++ /dev/null @@ -1,109 +0,0 @@ -# SPDX-FileCopyrightText: TNG Technology Consulting GmbH -# -# SPDX-License-Identifier: Apache-2.0 - - -import json -import logging -import sys -import uuid -from pathlib import PurePath - -import opossum_lib.opossum_model as opossum_model -from opossum_lib.opossum.opossum_file_content import OpossumFileContent -from opossum_lib.scancode.constants import SCANCODE_SOURCE_NAME -from opossum_lib.scancode.model import File, FileType, Header, ScanCodeData - - -def convert_scancode_file_to_opossum(filename: str) -> OpossumFileContent: - logging.info(f"Converting scancode to opossum {filename}") - - scancode_data = load_scancode_json(filename) - - return convert_scancode_to_opossum(scancode_data).to_opossum_file_format() - - -def convert_scancode_to_opossum(scancode_data: ScanCodeData) -> opossum_model.Opossum: - resources = extract_opossum_resources(scancode_data) - - scancode_header = extract_scancode_header(scancode_data) - metadata = opossum_model.Metadata( - project_id=str(uuid.uuid4()), - file_creation_date=scancode_header.end_timestamp, - project_title="ScanCode file", - ) - - return opossum_model.Opossum( - scan_results=opossum_model.ScanResults( - metadata=metadata, - resources=resources, - ) - ) - - -def load_scancode_json(filename: str) -> ScanCodeData: - try: - with open(filename) as inp: - json_data = json.load(inp) - except json.JSONDecodeError as e: - logging.error(f"Error decoding json for file {filename}. Message: {e.msg}") - sys.exit(1) - except UnicodeDecodeError: - logging.error(f"Error decoding json for file {filename}.") - sys.exit(1) - - scancode_data = ScanCodeData.model_validate(json_data) - - return scancode_data - - -def extract_scancode_header(scancode_data: ScanCodeData) -> Header: - if len(scancode_data.headers) != 1: - logging.error("Headers of ScanCode file are invalid.") - sys.exit(1) - return scancode_data.headers[0] - - -def extract_opossum_resources( - scancode_data: ScanCodeData, -) -> list[opossum_model.Resource]: - temp_root = opossum_model.Resource(path=PurePath("")) - for file in scancode_data.files: - resource = opossum_model.Resource( - path=PurePath(file.path), - attributions=get_attribution_info(file), - type=convert_resource_type(file.type), - ) - temp_root.add_resource(resource) - - return list(temp_root.children.values()) - - -def convert_resource_type(file_type: FileType) -> opossum_model.ResourceType: - if file_type == FileType.FILE: - return opossum_model.ResourceType.FILE - else: - return opossum_model.ResourceType.FOLDER - - -def get_attribution_info(file: File) -> list[opossum_model.OpossumPackage]: - if file.type == FileType.DIRECTORY: - return [] - copyright = "\n".join(c.copyright for c in file.copyrights) - source_info = opossum_model.SourceInfo(name=SCANCODE_SOURCE_NAME) - - attribution_infos = [] - for license_detection in file.license_detections: - license_name = license_detection.license_expression_spdx - max_score = max(m.score for m in license_detection.matches) - attribution_confidence = int(max_score) - - package = opossum_model.OpossumPackage( - source=source_info, - license_name=license_name, - attribution_confidence=attribution_confidence, - copyright=copyright, - ) - attribution_infos.append(package) - - return attribution_infos diff --git a/src/opossum_lib/shared/__init__.py b/src/opossum_lib/shared/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/opossum_lib/opossum/constants.py b/src/opossum_lib/shared/constants.py similarity index 100% rename from src/opossum_lib/opossum/constants.py rename to src/opossum_lib/shared/constants.py diff --git a/src/opossum_lib/shared/entities/__init__.py b/src/opossum_lib/shared/entities/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/opossum_lib/shared/entities/camel_base_model.py b/src/opossum_lib/shared/entities/camel_base_model.py new file mode 100644 index 0000000..4ef53ee --- /dev/null +++ b/src/opossum_lib/shared/entities/camel_base_model.py @@ -0,0 +1,14 @@ +# SPDX-FileCopyrightText: TNG Technology Consulting GmbH +# # +# SPDX-License-Identifier: Apache-2.0 + +from __future__ import annotations + +from pydantic import BaseModel, ConfigDict +from pydantic.alias_generators import to_camel + + +class CamelBaseModel(BaseModel): + model_config = ConfigDict( + alias_generator=to_camel, populate_by_name=True, extra="forbid", frozen=True + ) diff --git a/src/opossum_lib/shared/entities/opossum_file_model.py b/src/opossum_lib/shared/entities/opossum_file_model.py new file mode 100644 index 0000000..ea114f3 --- /dev/null +++ b/src/opossum_lib/shared/entities/opossum_file_model.py @@ -0,0 +1,14 @@ +# SPDX-FileCopyrightText: TNG Technology Consulting GmbH +# +# SPDX-License-Identifier: Apache-2.0 +from __future__ import annotations + +from pydantic import BaseModel + +from opossum_lib.shared.entities.opossum_input_file_model import OpossumInputFileModel +from opossum_lib.shared.entities.opossum_output_file_model import OpossumOutputFileModel + + +class OpossumFileModel(BaseModel): + input_file: OpossumInputFileModel + output_file: OpossumOutputFileModel | None = None diff --git a/src/opossum_lib/opossum/opossum_file.py b/src/opossum_lib/shared/entities/opossum_input_file_model.py similarity index 65% rename from src/opossum_lib/opossum/opossum_file.py rename to src/opossum_lib/shared/entities/opossum_input_file_model.py index bffd41f..b664e36 100644 --- a/src/opossum_lib/opossum/opossum_file.py +++ b/src/opossum_lib/shared/entities/opossum_input_file_model.py @@ -6,37 +6,34 @@ from copy import deepcopy from dataclasses import field from enum import Enum, auto -from typing import Literal, cast +from typing import Literal -from pydantic import BaseModel, ConfigDict, model_serializer -from pydantic.alias_generators import to_camel +from pydantic import ConfigDict, model_serializer -type OpossumPackageIdentifier = str -type ResourcePath = str -type ResourceInFile = dict[str, ResourceInFile] | int +from opossum_lib.shared.entities.camel_base_model import CamelBaseModel - -class CamelBaseModel(BaseModel): - model_config = ConfigDict( - alias_generator=to_camel, populate_by_name=True, extra="forbid", frozen=True - ) +type OpossumPackageIdentifierModel = str +type ResourcePathModel = str +type ResourceInFileModel = dict[str, ResourceInFileModel] | int -class OpossumInformation(CamelBaseModel): - metadata: Metadata - resources: ResourceInFile - external_attributions: dict[OpossumPackageIdentifier, OpossumPackage] - resources_to_attributions: dict[ResourcePath, list[OpossumPackageIdentifier]] +class OpossumInputFileModel(CamelBaseModel): + metadata: MetadataModel + resources: ResourceInFileModel + external_attributions: dict[OpossumPackageIdentifierModel, OpossumPackageModel] + resources_to_attributions: dict[ + ResourcePathModel, list[OpossumPackageIdentifierModel] + ] attribution_breakpoints: list[str] = field(default_factory=list) - external_attribution_sources: dict[str, ExternalAttributionSource] = field( + external_attribution_sources: dict[str, ExternalAttributionSourceModel] = field( default_factory=dict ) - frequent_licenses: list[FrequentLicense] | None = None + frequent_licenses: list[FrequentLicenseModel] | None = None files_with_children: list[str] | None = None - base_urls_for_sources: BaseUrlsForSources | None = None + base_urls_for_sources: BaseUrlsForSourcesModel | None = None -class BaseUrlsForSources(CamelBaseModel): +class BaseUrlsForSourcesModel(CamelBaseModel): @model_serializer def serialize(self) -> dict: # hack to override not serializing keys with corresponding value none: @@ -46,20 +43,20 @@ def serialize(self) -> dict: model_config = ConfigDict(extra="allow") -class FrequentLicense(CamelBaseModel): +class FrequentLicenseModel(CamelBaseModel): full_name: str short_name: str default_text: str -class SourceInfo(CamelBaseModel): +class SourceInfoModel(CamelBaseModel): name: str document_confidence: int | float | None = 0 additional_name: str | None = None -class OpossumPackage(CamelBaseModel): - source: SourceInfo +class OpossumPackageModel(CamelBaseModel): + source: SourceInfoModel attribution_confidence: int | None = None comment: str | None = None package_name: str | None = None @@ -81,7 +78,7 @@ class OpossumPackage(CamelBaseModel): was_preferred: bool | None = None -class Metadata(CamelBaseModel): +class MetadataModel(CamelBaseModel): model_config = ConfigDict(extra="allow") project_id: str file_creation_date: str @@ -91,20 +88,20 @@ class Metadata(CamelBaseModel): build_date: str | None = None -class ResourceType(Enum): +class ResourceTypeModel(Enum): FILE = auto() FOLDER = auto() TOP_LEVEL = auto() OTHER = auto() -class Resource(CamelBaseModel): - type: ResourceType - children: dict[str, Resource] = field(default_factory=dict) +class ResourceModel(CamelBaseModel): + type: ResourceTypeModel + children: dict[str, ResourceModel] = field(default_factory=dict) def add_path( - self, path_with_resource_types: list[tuple[str, ResourceType]] - ) -> Resource: + self, path_with_resource_types: list[tuple[str, ResourceTypeModel]] + ) -> ResourceModel: resource = deepcopy(self) if len(path_with_resource_types) == 0: return resource @@ -118,21 +115,21 @@ def add_path( " the same path differ." ) if first not in self.children: - resource.children[first] = Resource(type=resource_type) + resource.children[first] = ResourceModel(type=resource_type) resource.children[first] = resource.children[first].add_path(rest) return resource def element_exists_but_resource_type_differs( - self, element: str, resource_type: ResourceType + self, element: str, resource_type: ResourceTypeModel ) -> bool: if element in self.children: return self.children[element].type != resource_type return False def drop_element( - self, path_to_element_to_drop: list[tuple[str, ResourceType]] - ) -> Resource: + self, path_to_element_to_drop: list[tuple[str, ResourceTypeModel]] + ) -> ResourceModel: paths_in_resource = self.get_paths_of_all_leaf_nodes_with_types() if path_to_element_to_drop not in paths_in_resource: raise ValueError( @@ -140,7 +137,7 @@ def drop_element( ) else: - resource = Resource(type=ResourceType.TOP_LEVEL) + resource = ResourceModel(type=ResourceTypeModel.TOP_LEVEL) paths_in_resource.remove(path_to_element_to_drop) paths_in_resource.append(path_to_element_to_drop[:-1]) @@ -149,9 +146,9 @@ def drop_element( return resource - def to_dict(self) -> ResourceInFile: + def to_dict(self) -> ResourceInFileModel: if not self.has_children(): - if self.type == ResourceType.FOLDER: + if self.type == ResourceTypeModel.FOLDER: return {} else: return 1 @@ -162,7 +159,7 @@ def to_dict(self) -> ResourceInFile: def get_paths_of_all_leaf_nodes_with_types( self, - ) -> list[list[tuple[str, ResourceType]]]: + ) -> list[list[tuple[str, ResourceTypeModel]]]: paths = [] for name, resource in self.children.items(): path = [(name, resource.type)] @@ -180,33 +177,12 @@ def get_paths_of_all_leaf_nodes_with_types( def has_children(self) -> bool: return len(self.children) > 0 - def convert_to_file_resource(self) -> ResourceInFile: + def convert_to_file_resource(self) -> ResourceInFileModel: return self.to_dict() -class ExternalAttributionSource(CamelBaseModel): +class ExternalAttributionSourceModel(CamelBaseModel): model_config = ConfigDict(frozen=True) name: str priority: int is_relevant_for_preferred: bool | None = None - - -def _build_resource_tree(resource: ResourceInFile) -> Resource: - if isinstance(resource, int): - return Resource(type=ResourceType.FILE) - else: - result = Resource(type=ResourceType.FOLDER) - for name, child_resource in resource.items(): - result.children[name] = _build_resource_tree(child_resource) - return result - - -def convert_resource_in_file_to_resource(resource: ResourceInFile) -> Resource: - root_node = Resource(type=ResourceType.TOP_LEVEL) - - if isinstance(resource, dict): - dict_resource = cast(dict[str, ResourceInFile], resource) - for name, child_resource in dict_resource.items(): - root_node.children[name] = _build_resource_tree(child_resource) - - return root_node diff --git a/src/opossum_lib/opossum/output_model.py b/src/opossum_lib/shared/entities/opossum_output_file_model.py similarity index 94% rename from src/opossum_lib/opossum/output_model.py rename to src/opossum_lib/shared/entities/opossum_output_file_model.py index a00121f..e9f9337 100644 --- a/src/opossum_lib/opossum/output_model.py +++ b/src/opossum_lib/shared/entities/opossum_output_file_model.py @@ -9,14 +9,9 @@ from enum import Enum from typing import Annotated -from pydantic import BaseModel, ConfigDict, Field -from pydantic.alias_generators import to_camel +from pydantic import ConfigDict, Field - -class CamelBaseModel(BaseModel): - model_config = ConfigDict( - alias_generator=to_camel, populate_by_name=True, extra="forbid", frozen=True - ) +from opossum_lib.shared.entities.camel_base_model import CamelBaseModel class Metadata(CamelBaseModel): @@ -129,7 +124,7 @@ class ManualAttributions(CamelBaseModel): ) -class OpossumOutputFile(CamelBaseModel): +class OpossumOutputFileModel(CamelBaseModel): model_config = ConfigDict(extra="allow") metadata: Metadata diff --git a/src/opossum_lib/shared/services/__init__.py b/src/opossum_lib/shared/services/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/conftest.py b/tests/conftest.py index 83850a8..2e2ad4c 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -5,12 +5,12 @@ import pytest from faker.proxy import Faker -from tests.test_setup.opossum_faker_setup import OpossumFaker, setup_opossum_faker -from tests.test_setup.opossum_file_faker_setup import ( +from tests.setup.opossum_faker_setup import OpossumFaker, setup_opossum_faker +from tests.setup.opossum_file_faker_setup import ( OpossumFileFaker, setup_opossum_file_faker, ) -from tests.test_setup.scancode_faker_setup import ScanCodeFaker, setup_scancode_faker +from tests.setup.scancode_faker_setup import ScanCodeFaker, setup_scancode_faker @pytest.fixture diff --git a/tests/core/__init__.py b/tests/core/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/core/entities/__init__.py b/tests/core/entities/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/core/entities/generators/__init__.py b/tests/core/entities/generators/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/opossum_model_generators/external_attribution_source_provider.py b/tests/core/entities/generators/external_attribution_source_provider.py similarity index 90% rename from tests/opossum_model_generators/external_attribution_source_provider.py rename to tests/core/entities/generators/external_attribution_source_provider.py index 5856e1e..10699df 100644 --- a/tests/opossum_model_generators/external_attribution_source_provider.py +++ b/tests/core/entities/generators/external_attribution_source_provider.py @@ -7,8 +7,10 @@ from faker.providers.misc import Provider as MiscProvider from faker.providers.person import Provider as PersonProvider -from opossum_lib.opossum_model import ExternalAttributionSource -from tests.util.generator_helpers import entry_or_none +from opossum_lib.core.entities.external_attribution_source import ( + ExternalAttributionSource, +) +from tests.shared.generator_helpers import entry_or_none class ExternalAttributionSourceProvider(BaseProvider): diff --git a/tests/opossum_model_generators/metadata_provider.py b/tests/core/entities/generators/metadata_provider.py similarity index 94% rename from tests/opossum_model_generators/metadata_provider.py rename to tests/core/entities/generators/metadata_provider.py index 4905079..2be31f7 100644 --- a/tests/opossum_model_generators/metadata_provider.py +++ b/tests/core/entities/generators/metadata_provider.py @@ -8,8 +8,8 @@ from faker.providers.lorem.en_US import Provider as LoremProvider from faker.providers.misc import Provider as MiscProvider -from opossum_lib.opossum_model import Metadata -from tests.util.generator_helpers import entry_or_none +from opossum_lib.core.entities.metadata import Metadata +from tests.shared.generator_helpers import entry_or_none class MetadataProvider(BaseProvider): diff --git a/tests/opossum_model_generators/opossum_provider.py b/tests/core/entities/generators/opossum_provider.py similarity index 67% rename from tests/opossum_model_generators/opossum_provider.py rename to tests/core/entities/generators/opossum_provider.py index 0151cda..8f0ab05 100644 --- a/tests/opossum_model_generators/opossum_provider.py +++ b/tests/core/entities/generators/opossum_provider.py @@ -5,10 +5,11 @@ from faker.providers import BaseProvider -from opossum_lib.opossum.output_model import OpossumOutputFile -from opossum_lib.opossum_model import Opossum, ScanResults -from tests.opossum_model_generators.scan_results_provider import ScanResultsProvider -from tests.test_opossum.generators.generate_outfile_information import ( +from opossum_lib.core.entities.opossum import Opossum +from opossum_lib.core.entities.scan_results import ScanResults +from opossum_lib.shared.entities.opossum_output_file_model import OpossumOutputFileModel +from tests.core.entities.generators.scan_results_provider import ScanResultsProvider +from tests.input_formats.opossum.entities.generators.generate_outfile_information import ( # noqa: E501 OpossumOutputFileProvider, ) @@ -25,7 +26,7 @@ def __init__(self, generator: Any): def opossum( self, scan_results: ScanResults | None = None, - review_results: OpossumOutputFile | None = None, + review_results: OpossumOutputFileModel | None = None, ) -> Opossum: return Opossum( scan_results=scan_results or self.scan_results_provider.scan_results(), diff --git a/tests/opossum_model_generators/package_provider.py b/tests/core/entities/generators/package_provider.py similarity index 94% rename from tests/opossum_model_generators/package_provider.py rename to tests/core/entities/generators/package_provider.py index 6e23216..1878085 100644 --- a/tests/opossum_model_generators/package_provider.py +++ b/tests/core/entities/generators/package_provider.py @@ -10,9 +10,10 @@ from faker.providers.misc.en_US import Provider as MiscProvider from faker.providers.person.en_US import Provider as PersonProvider -from opossum_lib.opossum_model import OpossumPackage, SourceInfo -from tests.opossum_model_generators.source_info_provider import SourceInfoProvider -from tests.util.generator_helpers import entry_or_none, random_list +from opossum_lib.core.entities.opossum_package import OpossumPackage +from opossum_lib.core.entities.source_info import SourceInfo +from tests.core.entities.generators.source_info_provider import SourceInfoProvider +from tests.shared.generator_helpers import entry_or_none, random_list class PackageProvider(BaseProvider): diff --git a/tests/opossum_model_generators/resource_provider.py b/tests/core/entities/generators/resource_provider.py similarity index 91% rename from tests/opossum_model_generators/resource_provider.py rename to tests/core/entities/generators/resource_provider.py index dffae12..79bf4f9 100644 --- a/tests/opossum_model_generators/resource_provider.py +++ b/tests/core/entities/generators/resource_provider.py @@ -8,9 +8,10 @@ from faker.providers.file import Provider as FileProvider from faker.providers.misc import Provider as MiscProvider -from opossum_lib.opossum_model import OpossumPackage, Resource, ResourceType -from tests.opossum_model_generators.package_provider import PackageProvider -from tests.util.generator_helpers import random_list +from opossum_lib.core.entities.opossum_package import OpossumPackage +from opossum_lib.core.entities.resource import Resource, ResourceType +from tests.core.entities.generators.package_provider import PackageProvider +from tests.shared.generator_helpers import random_list class ResourceProvider(BaseProvider): diff --git a/tests/opossum_model_generators/scan_results_provider.py b/tests/core/entities/generators/scan_results_provider.py similarity index 86% rename from tests/opossum_model_generators/scan_results_provider.py rename to tests/core/entities/generators/scan_results_provider.py index f62781f..00d2a43 100644 --- a/tests/opossum_model_generators/scan_results_provider.py +++ b/tests/core/entities/generators/scan_results_provider.py @@ -8,23 +8,22 @@ from faker.providers.file.en_US import Provider as FileProvider from faker.providers.misc.en_US import Provider as MiscProvider -from opossum_lib.opossum_model import ( - BaseUrlsForSources, +from opossum_lib.core.entities.base_url_for_sources import BaseUrlsForSources +from opossum_lib.core.entities.external_attribution_source import ( ExternalAttributionSource, - FrequentLicense, - Metadata, - OpossumPackage, - Resource, - ResourceType, - ScanResults, ) -from tests.opossum_model_generators.external_attribution_source_provider import ( +from opossum_lib.core.entities.frequent_license import FrequentLicense +from opossum_lib.core.entities.metadata import Metadata +from opossum_lib.core.entities.opossum_package import OpossumPackage +from opossum_lib.core.entities.resource import Resource, ResourceType +from opossum_lib.core.entities.scan_results import ScanResults +from tests.core.entities.generators.external_attribution_source_provider import ( ExternalAttributionSourceProvider, ) -from tests.opossum_model_generators.metadata_provider import MetadataProvider -from tests.opossum_model_generators.package_provider import PackageProvider -from tests.opossum_model_generators.resource_provider import ResourceProvider -from tests.util.generator_helpers import entry_or_none, random_list +from tests.core.entities.generators.metadata_provider import MetadataProvider +from tests.core.entities.generators.package_provider import PackageProvider +from tests.core.entities.generators.resource_provider import ResourceProvider +from tests.shared.generator_helpers import entry_or_none, random_list class ScanResultsProvider(BaseProvider): diff --git a/tests/opossum_model_generators/source_info_provider.py b/tests/core/entities/generators/source_info_provider.py similarity index 90% rename from tests/opossum_model_generators/source_info_provider.py rename to tests/core/entities/generators/source_info_provider.py index abd8f17..a61a0ca 100644 --- a/tests/opossum_model_generators/source_info_provider.py +++ b/tests/core/entities/generators/source_info_provider.py @@ -7,8 +7,8 @@ from faker.providers.misc import Provider as MiscProvider from faker.providers.person.en_US import Provider as PersonProvider -from opossum_lib.opossum_model import SourceInfo -from tests.util.generator_helpers import entry_or_none +from opossum_lib.core.entities.source_info import SourceInfo +from tests.shared.generator_helpers import entry_or_none class SourceInfoProvider(BaseProvider): diff --git a/tests/test_opossum_model.py b/tests/core/entities/test_opossum.py similarity index 76% rename from tests/test_opossum_model.py rename to tests/core/entities/test_opossum.py index f6928fb..0a26459 100644 --- a/tests/test_opossum_model.py +++ b/tests/core/entities/test_opossum.py @@ -4,17 +4,17 @@ import json from copy import deepcopy -from opossum_lib.opossum.opossum_file_to_opossum_converter import ( - OpossumFileToOpossumConverter, +from opossum_lib.input_formats.opossum.services.convert_to_opossum import ( # noqa: E501 + convert_to_opossum, ) -from tests.test_setup.opossum_faker_setup import OpossumFaker +from tests.setup.opossum_faker_setup import OpossumFaker -class TestOpossumModelToOpossumFileConversion: +class TestOpossumToOpossumModelConversion: def test_moves_outfile(self, opossum_faker: OpossumFaker) -> None: opossum = opossum_faker.opossum() - result = opossum.to_opossum_file_format() + result = opossum.to_opossum_model() assert result.output_file == opossum.review_results @@ -22,9 +22,9 @@ def test_roundtrip(self, opossum_faker: OpossumFaker) -> None: opossum = opossum_faker.opossum() expected_result = deepcopy(opossum) - opossum_file = opossum.to_opossum_file_format() + opossum_file = opossum.to_opossum_model() - result = OpossumFileToOpossumConverter.convert_to_opossum(opossum_file) + result = convert_to_opossum(opossum_file) ## this can change due to the generation of new ids result_json = result.model_dump_json() @@ -51,8 +51,8 @@ def test_roundtrip_with_resource_ids(self, opossum_faker: OpossumFaker) -> None: ) expected_result = deepcopy(opossum) - opossum_file = opossum.to_opossum_file_format() + opossum_file = opossum.to_opossum_model() - result = OpossumFileToOpossumConverter.convert_to_opossum(opossum_file) + result = convert_to_opossum(opossum_file) assert result == expected_result diff --git a/tests/core/services/__init__.py b/tests/core/services/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/core/services/test_write_opossum_file.py b/tests/core/services/test_write_opossum_file.py new file mode 100644 index 0000000..8a8260f --- /dev/null +++ b/tests/core/services/test_write_opossum_file.py @@ -0,0 +1,41 @@ +# SPDX-FileCopyrightText: TNG Technology Consulting GmbH +# +# SPDX-License-Identifier: Apache-2.0 + +from pathlib import Path +from zipfile import ZipFile + +from opossum_lib.core.services.write_opossum_file import write_opossum_file +from opossum_lib.shared.constants import ( + INPUT_JSON_NAME, + OUTPUT_JSON_NAME, +) +from opossum_lib.shared.entities.opossum_file_model import OpossumFileModel +from tests.setup.opossum_file_faker_setup import OpossumFileFaker + + +class TestWriteOpossumFile: + def test_only_input_information_available_writes_only_input_information( + self, tmp_path: Path, opossum_file_faker: OpossumFileFaker + ) -> None: + opossum_file_content = OpossumFileModel( + input_file=opossum_file_faker.opossum_file_information() + ) + output_path = tmp_path / "output.opossum" + + write_opossum_file(opossum_file_content, output_path) + + with ZipFile(output_path, "r") as zip_file: + assert zip_file.namelist() == [INPUT_JSON_NAME] + + def test_input_and_output_information_available_writes_both( + self, tmp_path: Path, opossum_file_faker: OpossumFileFaker + ) -> None: + opossum_file_content = opossum_file_faker.opossum_file_content() + output_path = tmp_path / "output.opossum" + + write_opossum_file(opossum_file_content, output_path) + + with ZipFile(output_path, "r") as zip_file: + assert INPUT_JSON_NAME in zip_file.namelist() + assert OUTPUT_JSON_NAME in zip_file.namelist() diff --git a/tests/input_formats/__init__.py b/tests/input_formats/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/input_formats/opossum/__init__.py b/tests/input_formats/opossum/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/input_formats/opossum/entities/__init__.py b/tests/input_formats/opossum/entities/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/input_formats/opossum/entities/generators/__init__.py b/tests/input_formats/opossum/entities/generators/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/test_opossum/generators/generate_file_information.py b/tests/input_formats/opossum/entities/generators/generate_file_information.py similarity index 87% rename from tests/test_opossum/generators/generate_file_information.py rename to tests/input_formats/opossum/entities/generators/generate_file_information.py index 1664ea9..0541822 100644 --- a/tests/test_opossum/generators/generate_file_information.py +++ b/tests/input_formats/opossum/entities/generators/generate_file_information.py @@ -13,19 +13,19 @@ from faker.providers.misc import Provider as MiscProvider from faker.providers.person import Provider as PersonProvider -from opossum_lib.opossum.opossum_file import ( - BaseUrlsForSources, - ExternalAttributionSource, - FrequentLicense, - Metadata, - OpossumInformation, - OpossumPackage, - OpossumPackageIdentifier, - ResourceInFile, - ResourcePath, - SourceInfo, +from opossum_lib.shared.entities.opossum_input_file_model import ( + BaseUrlsForSourcesModel, + ExternalAttributionSourceModel, + FrequentLicenseModel, + MetadataModel, + OpossumInputFileModel, + OpossumPackageIdentifierModel, + OpossumPackageModel, + ResourceInFileModel, + ResourcePathModel, + SourceInfoModel, ) -from tests.util.generator_helpers import entry_or_none, random_list +from tests.shared.generator_helpers import entry_or_none, random_list class MetadataProvider(BaseProvider): @@ -48,8 +48,8 @@ def opossum_input_metadata( project_version: str | None = None, expected_release_date: str | None = None, build_date: str | None = None, - ) -> Metadata: - return Metadata( + ) -> MetadataModel: + return MetadataModel( project_id=project_id or "project-id-" + self.lorem_provider.word(), file_creation_date=file_creation_date or self.date_time_provider.date_time().isoformat(), @@ -87,24 +87,26 @@ def __init__(self, generator: Any): def opossum_file_information( self, *, - metadata: Metadata | None = None, - resources: ResourceInFile | None = None, - external_attributions: dict[OpossumPackageIdentifier, OpossumPackage] + metadata: MetadataModel | None = None, + resources: ResourceInFileModel | None = None, + external_attributions: dict[OpossumPackageIdentifierModel, OpossumPackageModel] | None = None, - resources_to_attributions: dict[ResourcePath, list[OpossumPackageIdentifier]] + resources_to_attributions: dict[ + ResourcePathModel, list[OpossumPackageIdentifierModel] + ] | None = None, attribution_breakpoints: list[str] | None = None, - external_attribution_sources: dict[str, ExternalAttributionSource] + external_attribution_sources: dict[str, ExternalAttributionSourceModel] | None = None, - frequent_licenses: list[FrequentLicense] | None = None, + frequent_licenses: list[FrequentLicenseModel] | None = None, files_with_children: list[str] | None = None, - base_urls_for_sources: BaseUrlsForSources | None = None, - ) -> OpossumInformation: + base_urls_for_sources: BaseUrlsForSourcesModel | None = None, + ) -> OpossumInputFileModel: generated_resources = resources or self.resource_in_file() attributions = external_attributions or self.external_attributions( min_number_of_attributions=25 ) - return OpossumInformation( + return OpossumInputFileModel( metadata=metadata or self.metadata_provider.opossum_input_metadata(), resources=generated_resources, external_attributions=attributions, @@ -126,7 +128,7 @@ def resource_in_file( depth: int = 3, max_folders_per_level: int = 3, max_files_per_level: int = 3, - ) -> ResourceInFile: + ) -> ResourceInFileModel: if depth == 0: files = self.random_int(0, max_files_per_level) return { @@ -153,8 +155,8 @@ def source_info( name: str | None = None, document_confidence: int | float | None = None, additional_name: str | None = None, - ) -> SourceInfo: - return SourceInfo( + ) -> SourceInfoModel: + return SourceInfoModel( name=name or self.person_provider.name(), document_confidence=document_confidence or entry_or_none(self.misc_provider, self.random_int(0, 100)), @@ -164,7 +166,7 @@ def source_info( def opossum_package( self, - source: SourceInfo | None = None, + source: SourceInfoModel | None = None, attribution_confidence: int | None = None, comment: str | None = None, package_name: str | None = None, @@ -184,8 +186,8 @@ def opossum_package( origin_ids: list[str] | None = None, criticality: Literal["high"] | Literal["medium"] | None = None, was_preferred: bool | None = None, - ) -> OpossumPackage: - return OpossumPackage( + ) -> OpossumPackageModel: + return OpossumPackageModel( source=source or self.source_info(), attribution_confidence=attribution_confidence or entry_or_none(self.misc_provider, self.random_int()), @@ -242,7 +244,7 @@ def opossum_package( def external_attributions( self, max_number_of_attributions: int = 50, min_number_of_attributions: int = 5 - ) -> dict[OpossumPackageIdentifier, OpossumPackage]: + ) -> dict[OpossumPackageIdentifierModel, OpossumPackageModel]: number_of_attributions = self.random_int( min_number_of_attributions, max_number_of_attributions ) @@ -253,10 +255,12 @@ def external_attributions( def resources_to_attributions( self, - resources: ResourceInFile, - external_attributions: dict[OpossumPackageIdentifier, OpossumPackage], - ) -> dict[ResourcePath, list[OpossumPackageIdentifier]]: - def get_file_paths(resource: ResourceInFile, current_path: str) -> list[str]: + resources: ResourceInFileModel, + external_attributions: dict[OpossumPackageIdentifierModel, OpossumPackageModel], + ) -> dict[ResourcePathModel, list[OpossumPackageIdentifierModel]]: + def get_file_paths( + resource: ResourceInFileModel, current_path: str + ) -> list[str]: if isinstance(resource, int): return [] resulting_file_paths = [] @@ -294,8 +298,8 @@ def external_attribution_source( name: str | None = None, priority: int | None = None, is_relevant_for_preferred: bool | None = None, - ) -> ExternalAttributionSource: - return ExternalAttributionSource( + ) -> ExternalAttributionSourceModel: + return ExternalAttributionSourceModel( name=name or self.person_provider.name(), priority=priority or self.random_int(1, 100), is_relevant_for_preferred=is_relevant_for_preferred @@ -304,7 +308,7 @@ def external_attribution_source( def external_attribution_sources( self, max_nb_of_external_attributions: int = 5 - ) -> dict[str, ExternalAttributionSource]: + ) -> dict[str, ExternalAttributionSourceModel]: nb_of_external_attributions = self.random_int( 1, max_nb_of_external_attributions ) diff --git a/tests/test_opossum/generators/generate_opossum_file_content.py b/tests/input_formats/opossum/entities/generators/generate_opossum_file_content.py similarity index 57% rename from tests/test_opossum/generators/generate_opossum_file_content.py rename to tests/input_formats/opossum/entities/generators/generate_opossum_file_content.py index 9ec17e6..3d6a77e 100644 --- a/tests/test_opossum/generators/generate_opossum_file_content.py +++ b/tests/input_formats/opossum/entities/generators/generate_opossum_file_content.py @@ -5,13 +5,13 @@ from faker.providers import BaseProvider -from opossum_lib.opossum.opossum_file import OpossumInformation -from opossum_lib.opossum.opossum_file_content import OpossumFileContent -from opossum_lib.opossum.output_model import OpossumOutputFile -from tests.test_opossum.generators.generate_file_information import ( +from opossum_lib.shared.entities.opossum_file_model import OpossumFileModel +from opossum_lib.shared.entities.opossum_input_file_model import OpossumInputFileModel +from opossum_lib.shared.entities.opossum_output_file_model import OpossumOutputFileModel +from tests.input_formats.opossum.entities.generators.generate_file_information import ( FileInformationProvider, ) -from tests.test_opossum.generators.generate_outfile_information import ( +from tests.input_formats.opossum.entities.generators.generate_outfile_information import ( # noqa: E501 OpossumOutputFileProvider, ) @@ -27,10 +27,10 @@ def __init__(self, generator: Any): def opossum_file_content( self, - in_file: OpossumInformation | None = None, - out_file: OpossumOutputFile | None = None, - ) -> OpossumFileContent: - return OpossumFileContent( + in_file: OpossumInputFileModel | None = None, + out_file: OpossumOutputFileModel | None = None, + ) -> OpossumFileModel: + return OpossumFileModel( input_file=in_file or self.infile_provider.opossum_file_information(), output_file=out_file or self.outfile_provider.output_file(), ) diff --git a/tests/test_opossum/generators/generate_outfile_information.py b/tests/input_formats/opossum/entities/generators/generate_outfile_information.py similarity index 89% rename from tests/test_opossum/generators/generate_outfile_information.py rename to tests/input_formats/opossum/entities/generators/generate_outfile_information.py index dc2a578..3a56e00 100644 --- a/tests/test_opossum/generators/generate_outfile_information.py +++ b/tests/input_formats/opossum/entities/generators/generate_outfile_information.py @@ -9,12 +9,12 @@ from faker.providers.lorem.en_US import Provider as LoremProvider from faker.providers.misc import Provider as MiscProvider -from opossum_lib.opossum.output_model import ( +from opossum_lib.shared.entities.opossum_output_file_model import ( ManualAttributions, Metadata, - OpossumOutputFile, + OpossumOutputFileModel, ) -from tests.util.generator_helpers import entry_or_none +from tests.shared.generator_helpers import entry_or_none class OpossumOutputFileProvider(BaseProvider): @@ -34,8 +34,8 @@ def output_file( manual_attributions: dict[str, ManualAttributions] | None = None, resources_to_attributions: dict[str, list[str]] | None = None, resolved_external_attributions: list[str] | None = None, - ) -> OpossumOutputFile: - return OpossumOutputFile( + ) -> OpossumOutputFileModel: + return OpossumOutputFileModel( metadata=metadata or self.outfile_metadata(), manual_attributions=manual_attributions or {}, resources_to_attributions=resources_to_attributions or {}, diff --git a/tests/input_formats/opossum/services/__init__.py b/tests/input_formats/opossum/services/__init__.py new file mode 100644 index 0000000..698616e --- /dev/null +++ b/tests/input_formats/opossum/services/__init__.py @@ -0,0 +1,4 @@ +# SPDX-FileCopyrightText: TNG Technology Consulting GmbH +# # +# SPDX-License-Identifier: Apache-2.0 + diff --git a/tests/test_opossum/test_conversion_roundtrip.py b/tests/input_formats/opossum/services/test_conversion_roundtrip.py similarity index 69% rename from tests/test_opossum/test_conversion_roundtrip.py rename to tests/input_formats/opossum/services/test_conversion_roundtrip.py index a96ac68..d0ac02f 100644 --- a/tests/test_opossum/test_conversion_roundtrip.py +++ b/tests/input_formats/opossum/services/test_conversion_roundtrip.py @@ -3,11 +3,11 @@ # SPDX-License-Identifier: Apache-2.0 from copy import deepcopy -from opossum_lib.opossum.opossum_file_content import OpossumFileContent -from opossum_lib.opossum.opossum_file_to_opossum_converter import ( - OpossumFileToOpossumConverter, +from opossum_lib.input_formats.opossum.services.convert_to_opossum import ( # noqa: E501 + convert_to_opossum, ) -from tests.test_setup.opossum_file_faker_setup import OpossumFileFaker +from opossum_lib.shared.entities.opossum_file_model import OpossumFileModel +from tests.setup.opossum_file_faker_setup import OpossumFileFaker class TestConversionRoundtrip: @@ -16,7 +16,7 @@ def test_roundtrip(self, opossum_file_faker: OpossumFileFaker) -> None: TestConversionRoundtrip._check_round_trip(start_file_content) def test_input_file_only(self, opossum_file_faker: OpossumFileFaker) -> None: - start_file_content = OpossumFileContent( + start_file_content = OpossumFileModel( input_file=opossum_file_faker.opossum_file_information() ) TestConversionRoundtrip._check_round_trip(start_file_content) @@ -29,9 +29,7 @@ def test_surplus_attributions(self, opossum_file_faker: OpossumFileFaker) -> Non TestConversionRoundtrip._check_round_trip(start_file_content) @staticmethod - def _check_round_trip(start_file_content: OpossumFileContent) -> None: + def _check_round_trip(start_file_content: OpossumFileModel) -> None: expected_file_content = deepcopy(start_file_content) - result = OpossumFileToOpossumConverter.convert_to_opossum( - start_file_content - ).to_opossum_file_format() + result = convert_to_opossum(start_file_content).to_opossum_model() assert result == expected_file_content diff --git a/tests/test_opossum/test_opossum_file_to_opossum_converter.py b/tests/input_formats/opossum/services/test_convert_to_opossum.py similarity index 73% rename from tests/test_opossum/test_opossum_file_to_opossum_converter.py rename to tests/input_formats/opossum/services/test_convert_to_opossum.py index f62de67..d0975c9 100644 --- a/tests/test_opossum/test_opossum_file_to_opossum_converter.py +++ b/tests/input_formats/opossum/services/test_convert_to_opossum.py @@ -3,19 +3,22 @@ # SPDX-License-Identifier: Apache-2.0 import pytest -from opossum_lib.opossum.opossum_file import OpossumPackage, OpossumPackageIdentifier -from opossum_lib.opossum.opossum_file_to_opossum_converter import ( - OpossumFileToOpossumConverter, +from opossum_lib.input_formats.opossum.services.convert_to_opossum import ( # noqa: E501 + convert_to_opossum, ) -from tests.test_setup.opossum_file_faker_setup import OpossumFileFaker +from opossum_lib.shared.entities.opossum_input_file_model import ( + OpossumPackageIdentifierModel, + OpossumPackageModel, +) +from tests.setup.opossum_file_faker_setup import OpossumFileFaker -class TestOpossumFileToOpossumConverter: +class TestConvertToOpossum: def test_output_file_moved(self, opossum_file_faker: OpossumFileFaker) -> None: output_file = opossum_file_faker.output_file() input_file = opossum_file_faker.opossum_file_content(out_file=output_file) - result = OpossumFileToOpossumConverter.convert_to_opossum(input_file) + result = convert_to_opossum(input_file) assert result.review_results == output_file @@ -31,12 +34,12 @@ def test_throws_on_duplicate_attributions( input_file = opossum_file_faker.opossum_file_content(in_file=file_information) with pytest.raises(RuntimeError, match=r".*attribution was duplicated.*"): - OpossumFileToOpossumConverter.convert_to_opossum(input_file) + convert_to_opossum(input_file) @staticmethod def _fake_duplicate_external_attributions( opossum_file_faker: OpossumFileFaker, - ) -> dict[OpossumPackageIdentifier, OpossumPackage]: + ) -> dict[OpossumPackageIdentifierModel, OpossumPackageModel]: external_attributions = opossum_file_faker.external_attributions( min_number_of_attributions=2 ) diff --git a/tests/input_formats/opossum/services/test_opossum_file_reader.py b/tests/input_formats/opossum/services/test_opossum_file_reader.py new file mode 100644 index 0000000..a5741c1 --- /dev/null +++ b/tests/input_formats/opossum/services/test_opossum_file_reader.py @@ -0,0 +1,36 @@ +# SPDX-FileCopyrightText: TNG Technology Consulting GmbH +# # +# SPDX-License-Identifier: Apache-2.0 +# +# SPDX-License-Identifier: Apache-2.0 +from pathlib import Path + +import pytest +from _pytest.logging import LogCaptureFixture + +from opossum_lib.input_formats.opossum.services.opossum_file_reader import ( + OpossumFileReader, +) + +TEST_DATA_DIR = Path(__file__).resolve().parent.parent.parent.parent / "data" + + +class TestOpossumFileReader: + def test_read_corrupted_file_exits_1(self, caplog: LogCaptureFixture) -> None: + input_path = TEST_DATA_DIR / "opossum_input_corrupt.opossum" + opossum_format_reader = OpossumFileReader(input_path) + + with pytest.raises(SystemExit) as system_exit: + opossum_format_reader.read() + assert system_exit.value.code == 1 + assert "is corrupt and does not contain 'input.json'" in caplog.messages[0] + + def test_read_with_output_json(self) -> None: + input_path = TEST_DATA_DIR / "opossum_input_with_result.opossum" + opossum_format_reader = OpossumFileReader(input_path) + + result = opossum_format_reader.read() + + assert result is not None + assert result.scan_results is not None + assert result.review_results is not None diff --git a/tests/input_formats/scancode/__init__.py b/tests/input_formats/scancode/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/input_formats/scancode/entities/__init__.py b/tests/input_formats/scancode/entities/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/input_formats/scancode/entities/generators/__init__.py b/tests/input_formats/scancode/entities/generators/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/test_scancode/generators/generate_scancode_file.py b/tests/input_formats/scancode/entities/generators/generate_scancode_file.py similarity index 88% rename from tests/test_scancode/generators/generate_scancode_file.py rename to tests/input_formats/scancode/entities/generators/generate_scancode_file.py index ddcf007..9327293 100644 --- a/tests/test_scancode/generators/generate_scancode_file.py +++ b/tests/input_formats/scancode/entities/generators/generate_scancode_file.py @@ -20,24 +20,24 @@ from faker.providers.lorem.en_US import Provider as LoremProvider from faker.providers.misc import Provider as MiscProvider -from opossum_lib.scancode.model import ( - Copyright, - Email, - ExtraData, - File, - FileBasedLicenseDetection, - FileType, - GlobalLicenseDetection, - Header, - Holder, - Match, - Options, - ReferenceMatch, - ScanCodeData, - SystemEnvironment, - Url, +from opossum_lib.input_formats.scancode.entities.scancode_model import ( + CopyrightModel, + EmailModel, + ExtraDataModel, + FileBasedLicenseDetectionModel, + FileModel, + FileTypeModel, + GlobalLicenseDetectionModel, + HeaderModel, + HolderModel, + MatchModel, + OptionsModel, + ReferenceMatchModel, + ScancodeModel, + SystemEnvironmentModel, + UrlModel, ) -from tests.test_scancode.generators.helpers import entry_or_none, random_list +from tests.shared.generator_helpers import entry_or_none, random_list type TempPathTree = dict[str, TempPathTree | None] @@ -63,12 +63,12 @@ def scancode_data( self, *, dependencies: list | None = None, - files: list[File] | None = None, - license_detections: list[GlobalLicenseDetection] | None = None, - headers: list[Header] | None = None, + files: list[FileModel] | None = None, + license_detections: list[GlobalLicenseDetectionModel] | None = None, + headers: list[HeaderModel] | None = None, packages: list | None = None, - options: Options | None = None, - ) -> ScanCodeData: + options: OptionsModel | None = None, + ) -> ScancodeModel: # TODO: #184 depending on which options are passed in additional_options # we need to generate different fields, e.g. --licenses # out of scope for now @@ -76,7 +76,7 @@ def scancode_data( license_detections = license_detections or self.global_license_detections(files) if headers is None: headers = [self.header(options=options)] - return ScanCodeData( + return ScancodeModel( dependencies=dependencies or [], files=files, license_detections=license_detections, @@ -90,17 +90,17 @@ def header( duration: float | None = None, end_timestamp: str | None = None, errors: list | None = None, - extra_data: ExtraData | None = None, + extra_data: ExtraDataModel | None = None, message: Any | None = None, notice: str | None = None, - options: Options | None = None, + options: OptionsModel | None = None, output_format_version: str | None = None, start_timestamp: str | None = None, tool_name: str | None = None, tool_version: str | None = None, warnings: list | None = None, - ) -> Header: - return Header( + ) -> HeaderModel: + return HeaderModel( duration=duration or self.random_int(max=9999999) / 1e3, end_timestamp=end_timestamp or self.date_provider.iso8601(), errors=errors or [], @@ -117,8 +117,8 @@ def header( def options( self, *, input: list[str] | None = None, **additional_options: Any - ) -> Options: - return Options( + ) -> OptionsModel: + return OptionsModel( input=input or [ self.file_provider.file_path( @@ -133,9 +133,9 @@ def extra_data( *, files_count: int | None = None, spdx_license_list_version: str | None = None, - system_environment: SystemEnvironment | None = None, - ) -> ExtraData: - return ExtraData( + system_environment: SystemEnvironmentModel | None = None, + ) -> ExtraDataModel: + return ExtraDataModel( files_count=files_count or self.random_int(), spdx_license_list_version=spdx_license_list_version or self.numerify("#.##"), @@ -150,11 +150,11 @@ def system_environment( platform: str | None = None, platform_version: str | None = None, python_version: str | None = None, - ) -> SystemEnvironment: + ) -> SystemEnvironmentModel: operating_system = operating_system or self.random_element( ["linux", "windows", "macos"] ) - return SystemEnvironment( + return SystemEnvironmentModel( cpu_architecture=cpu_architecture or self.random_element(["32", "64"]), operating_system=operating_system, platform=platform @@ -164,10 +164,10 @@ def system_environment( ) def global_license_detections( - self, files: list[File] - ) -> list[GlobalLicenseDetection]: + self, files: list[FileModel] + ) -> list[GlobalLicenseDetectionModel]: license_counter: dict[str, int] = defaultdict(int) - id_to_license_detection: dict[str, FileBasedLicenseDetection] = {} + id_to_license_detection: dict[str, FileBasedLicenseDetectionModel] = {} for file in files: for ld in file.license_detections: license_counter[ld.identifier] += 1 @@ -176,13 +176,13 @@ def global_license_detections( global_license_detections = [] for id, count in license_counter.items(): ld = id_to_license_detection[id] - gld = GlobalLicenseDetection( + gld = GlobalLicenseDetectionModel( detection_count=count, license_expression=ld.license_expression, license_expression_spdx=ld.license_expression_spdx, identifier=ld.identifier, reference_matches=[ - ReferenceMatch( + ReferenceMatchModel( end_line=match.end_line, from_file=match.from_file, license_expression=match.license_expression, @@ -228,11 +228,11 @@ def generate_path_structure( folders[folder_name] = children return {**files, **folders} - def files(self, path_tree: TempPathTree | None = None) -> list[File]: + def files(self, path_tree: TempPathTree | None = None) -> list[FileModel]: path_tree = path_tree or self.generate_path_structure() - def process_path(current_path: str, path_tree: TempPathTree) -> list[File]: - files: list[File] = [] + def process_path(current_path: str, path_tree: TempPathTree) -> list[FileModel]: + files: list[FileModel] = [] for name, data in path_tree.items(): path = current_path + name if data: @@ -240,8 +240,8 @@ def process_path(current_path: str, path_tree: TempPathTree) -> list[File]: child_types = [c.type for c in child_files] folder = self.single_folder( path=path, - dirs_count=child_types.count(FileType.DIRECTORY), - files_count=child_types.count(FileType.FILE), + dirs_count=child_types.count(FileTypeModel.DIRECTORY), + files_count=child_types.count(FileTypeModel.FILE), size_count=sum(c.size for c in child_files), ) files.append(folder) @@ -261,17 +261,17 @@ def single_folder( path: str, authors: list | None = None, base_name: str | None = None, - copyrights: list[Copyright] | None = None, + copyrights: list[CopyrightModel] | None = None, date: str | None = None, detected_license_expression: str | None = None, detected_license_expression_spdx: str | None = None, dirs_count: int = 0, - emails: list[Email] | None = None, + emails: list[EmailModel] | None = None, extension: str = "", files_count: int = 0, file_type: str | None = None, for_packages: list | None = None, - holders: list[Holder] | None = None, + holders: list[HolderModel] | None = None, is_archive: bool = False, is_binary: bool = False, is_media: bool = False, @@ -279,7 +279,7 @@ def single_folder( is_source: bool = False, is_text: bool = False, license_clues: list | None = None, - license_detections: list[FileBasedLicenseDetection] | None = None, + license_detections: list[FileBasedLicenseDetectionModel] | None = None, md5: str | None = None, mime_type: str | None = None, name: str | None = None, @@ -291,9 +291,9 @@ def single_folder( sha256: str | None = None, size: int = 0, size_count: int = 0, - urls: list[Url] | None = None, - ) -> File: - return File( + urls: list[UrlModel] | None = None, + ) -> FileModel: + return FileModel( authors=authors or [], base_name=base_name or PurePath(PurePath(path).name).stem, copyrights=copyrights or [], @@ -327,7 +327,7 @@ def single_folder( sha256=sha256, size=size, size_count=size_count, - type=FileType.DIRECTORY, + type=FileTypeModel.DIRECTORY, urls=urls or [], ) @@ -337,17 +337,17 @@ def single_file( path: str, authors: list | None = None, base_name: str | None = None, - copyrights: list[Copyright] | None = None, + copyrights: list[CopyrightModel] | None = None, date: str | None = None, detected_license_expression: str | None = None, detected_license_expression_spdx: str | None = None, dirs_count: int = 0, - emails: list[Email] | None = None, + emails: list[EmailModel] | None = None, extension: str | None = None, files_count: int = 0, file_type: str | None = None, for_packages: list | None = None, - holders: list[Holder] | None = None, + holders: list[HolderModel] | None = None, is_archive: bool | None = None, is_binary: bool | None = None, is_media: bool | None = None, @@ -355,7 +355,7 @@ def single_file( is_source: bool | None = None, is_text: bool | None = None, license_clues: list | None = None, - license_detections: list[FileBasedLicenseDetection] | None = None, + license_detections: list[FileBasedLicenseDetectionModel] | None = None, md5: str | None = None, mime_type: str | None = None, name: str | None = None, @@ -367,14 +367,14 @@ def single_file( sha256: str | None = None, size: int | None = None, size_count: int = 0, - urls: list[Url] | None = None, - ) -> File: + urls: list[UrlModel] | None = None, + ) -> FileModel: if copyrights is None and holders is None: holders = [] for _ in range(self.random_int(max=3)): start_line = self.random_int() end_line = start_line + self.random_int(max=2) - holder = Holder( + holder = HolderModel( holder=self.company_provider.company(), start_line=start_line, end_line=end_line, @@ -383,7 +383,7 @@ def single_file( if copyrights is None: assert holders is not None # can never trigger but makes mypy happy copyrights = [ - Copyright( + CopyrightModel( copyright="Copyright " + h.holder, start_line=h.start_line, end_line=h.end_line, @@ -392,7 +392,7 @@ def single_file( ] if holders is None: holders = [ - Holder( + HolderModel( holder=cr.copyright, start_line=cr.start_line, end_line=cr.end_line, @@ -435,7 +435,7 @@ def single_file( self.misc_provider, self.random_element(["Java", "Typescript", "HTML", "Python"]), ) - return File( + return FileModel( authors=authors or [], base_name=base_name or PurePath(PurePath(path).name).stem, copyrights=copyrights, @@ -469,7 +469,7 @@ def single_file( sha256=sha256 if sha256 is not None else self.misc_provider.sha256(), size=size if size is not None else self.random_int(max=10**9), size_count=size_count, - type=FileType.FILE, + type=FileTypeModel.FILE, urls=urls if urls is not None else random_list(self, self.url), ) @@ -478,10 +478,10 @@ def copyright( copyright: str | None = None, end_line: int | None = None, start_line: int | None = None, - ) -> Copyright: + ) -> CopyrightModel: start_line = start_line or self.random_int() end_line = start_line + self.random_int(max=50) - return Copyright( + return CopyrightModel( copyright=copyright or "Copyright " + self.company_provider.company(), end_line=end_line, start_line=start_line, @@ -492,10 +492,10 @@ def email( email: str | None = None, end_line: int | None = None, start_line: int | None = None, - ) -> Email: + ) -> EmailModel: start_line = start_line or self.random_int() end_line = start_line + self.random_int(max=2) - return Email( + return EmailModel( email=email or self.internet_provider.email(), end_line=end_line, start_line=start_line, @@ -506,10 +506,10 @@ def url( url: str | None = None, end_line: int | None = None, start_line: int | None = None, - ) -> Url: + ) -> UrlModel: start_line = start_line or self.random_int() end_line = start_line + self.random_int(max=2) - return Url( + return UrlModel( url=url or self.internet_provider.url(), end_line=end_line, start_line=start_line, @@ -519,10 +519,10 @@ def license_detection( self, license_expression: str | None = None, license_expression_spdx: str | None = None, - matches: list[Match] | None = None, + matches: list[MatchModel] | None = None, identifier: str | None = None, path: str | None = None, - ) -> FileBasedLicenseDetection: + ) -> FileBasedLicenseDetectionModel: if path is None and matches is None: raise RuntimeError( "Neither path nor matches given which is likely a user error. " @@ -546,7 +546,7 @@ def license_detection( ), min_number_of_entries=1, ) - return FileBasedLicenseDetection( + return FileBasedLicenseDetectionModel( license_expression=license_expression, license_expression_spdx=license_expression_spdx, matches=matches, @@ -568,12 +568,12 @@ def match( rule_url: Any | None = None, score: float | None = None, start_line: int | None = None, - ) -> Match: + ) -> MatchModel: start_line = start_line or self.random_int() end_line = start_line + self.random_int() if license_expression_spdx is None: license_expression_spdx = self.lexify("???? License") - return Match( + return MatchModel( end_line=end_line, from_file=from_file, license_expression=license_expression or "", diff --git a/tests/input_formats/scancode/services/__init__.py b/tests/input_formats/scancode/services/__init__.py new file mode 100644 index 0000000..698616e --- /dev/null +++ b/tests/input_formats/scancode/services/__init__.py @@ -0,0 +1,4 @@ +# SPDX-FileCopyrightText: TNG Technology Consulting GmbH +# # +# SPDX-License-Identifier: Apache-2.0 + diff --git a/tests/input_formats/scancode/services/test_convert_to_opossum.py b/tests/input_formats/scancode/services/test_convert_to_opossum.py new file mode 100644 index 0000000..71ac4be --- /dev/null +++ b/tests/input_formats/scancode/services/test_convert_to_opossum.py @@ -0,0 +1,88 @@ +# SPDX-FileCopyrightText: TNG Technology Consulting GmbH +# +# SPDX-License-Identifier: Apache-2.0 + + +import pytest +from _pytest.logging import LogCaptureFixture + +from opossum_lib.core.entities.resource import Resource +from opossum_lib.input_formats.scancode.services.convert_to_opossum import ( # noqa: E501 + convert_to_opossum, +) +from tests.setup.scancode_faker_setup import ScanCodeFaker + + +class TestExtractScancodeHeader: + def test_produces_expected_result( + self, + scancode_faker: ScanCodeFaker, + ) -> None: + scancode_data = scancode_faker.scancode_data() + opossum = convert_to_opossum( + scancode_data, + ) + metadata = opossum.scan_results.metadata + header = scancode_data.headers[0] + assert metadata.file_creation_date == header.end_timestamp + assert metadata.project_title == "ScanCode file" + + def test_errors_with_missing_header( + self, caplog: LogCaptureFixture, scancode_faker: ScanCodeFaker + ) -> None: + scancode_data = scancode_faker.scancode_data(headers=[]) + + with pytest.raises(SystemExit): + convert_to_opossum(scancode_data) + + assert "header" in caplog.messages[0].lower() + + def test_error_with_multiple_headers( + self, caplog: LogCaptureFixture, scancode_faker: ScanCodeFaker + ) -> None: + header1 = scancode_faker.header() + header2 = scancode_faker.header() + scancode_data = scancode_faker.scancode_data(headers=[header1, header2]) + + with pytest.raises(SystemExit): + convert_to_opossum(scancode_data) + + assert "header" in caplog.messages[0].lower() + + +class TestConvertToOpossumFull: + @staticmethod + def _count_resources(resource: Resource) -> int: + return 1 + sum( + TestConvertToOpossumFull._count_resources(child) + for child in resource.children.values() + ) + + @staticmethod + def _count_attributions(resource: Resource) -> int: + return len(resource.attributions) + sum( + TestConvertToOpossumFull._count_attributions(child) + for child in resource.children.values() + ) + + def test_convert( + self, + scancode_faker: ScanCodeFaker, + ) -> None: + scancode_data = scancode_faker.scancode_data() + opossum_data = convert_to_opossum(scancode_data) + + assert opossum_data.review_results is None + scan_results = opossum_data.scan_results + assert sum( + TestConvertToOpossumFull._count_resources(res) + for res in scan_results.resources + ) == len(scancode_data.files) + num_attributions = sum( + TestConvertToOpossumFull._count_attributions(res) + for res in scan_results.resources + ) + num_license_detections = sum( + len(f.license_detections) for f in scancode_data.files + ) + assert num_attributions == num_license_detections diff --git a/tests/input_formats/scancode/services/test_get_attribution_info.py b/tests/input_formats/scancode/services/test_get_attribution_info.py new file mode 100644 index 0000000..7a30c39 --- /dev/null +++ b/tests/input_formats/scancode/services/test_get_attribution_info.py @@ -0,0 +1,96 @@ +# SPDX-FileCopyrightText: TNG Technology Consulting GmbH +# # +# SPDX-License-Identifier: Apache-2.0 +# +# SPDX-License-Identifier: Apache-2.0 + + +from opossum_lib.input_formats.scancode.constants import SCANCODE_SOURCE_NAME +from opossum_lib.input_formats.scancode.services.convert_to_opossum import ( # noqa: E501 + convert_to_opossum, +) +from opossum_lib.shared.entities.opossum_input_file_model import ( + OpossumPackageModel, + SourceInfoModel, +) +from tests.setup.scancode_faker_setup import ScanCodeFaker + + +class TestGetAttributionInfo: + def test_get_attribution_info_directory( + self, scancode_faker: ScanCodeFaker + ) -> None: + folder = scancode_faker.single_folder(path="some/single/folder") + scancode_data = scancode_faker.scancode_data(files=[folder]) + opossum = convert_to_opossum(scancode_data) + assert len(opossum.scan_results.resources) == 1 + assert opossum.scan_results.resources[0].attributions == [] + + def test_get_attribution_info_from_file_without_detections( + self, + scancode_faker: ScanCodeFaker, + ) -> None: + file = scancode_faker.single_file( + path="some/single/file", license_detections=[] + ) + scancode_data = scancode_faker.scancode_data(files=[file]) + opossum = convert_to_opossum(scancode_data) + assert len(opossum.scan_results.resources) == 1 + assert opossum.scan_results.resources[0].attributions == [] + + def test_get_attribution_info_file_multiple( + self, scancode_faker: ScanCodeFaker + ) -> None: + match1 = scancode_faker.match( + license_expression_spdx="Apache-2.0", + from_file="A", + score=75, + rule_relevance=50, + ) + match2 = scancode_faker.match( + license_expression_spdx="Apache-2.0", + from_file="A", + score=95, + rule_relevance=50, + ) + match3 = scancode_faker.match( + license_expression_spdx="MIT", + from_file="A", + score=50, + rule_relevance=50, + ) + license1 = scancode_faker.license_detection( + license_expression_spdx="Apache-2.0", + matches=[match1, match2], + ) + license2 = scancode_faker.license_detection( + license_expression_spdx="MIT", + matches=[match3], + ) + copyright1 = scancode_faker.copyright(copyright="Me") + copyright2 = scancode_faker.copyright(copyright="Myself") + copyright3 = scancode_faker.copyright(copyright="I") + file = scancode_faker.single_file( + path="A", + license_detections=[license1, license2], + copyrights=[copyright1, copyright2, copyright3], + ) + scancode_data = scancode_faker.scancode_data(files=[file]) + opossum = convert_to_opossum(scancode_data) + attributions = ( + opossum.to_opossum_model().input_file.external_attributions.values() + ) + + expected1 = OpossumPackageModel( + source=SourceInfoModel(name=SCANCODE_SOURCE_NAME), + license_name="Apache-2.0", + copyright="Me\nMyself\nI", + attribution_confidence=95, + ) + expected2 = OpossumPackageModel( + source=SourceInfoModel(name=SCANCODE_SOURCE_NAME), + license_name="MIT", + copyright="Me\nMyself\nI", + attribution_confidence=50, + ) + assert set(attributions) == {expected1, expected2} diff --git a/tests/setup/__init__.py b/tests/setup/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/test_setup/opossum_faker_setup.py b/tests/setup/opossum_faker_setup.py similarity index 91% rename from tests/test_setup/opossum_faker_setup.py rename to tests/setup/opossum_faker_setup.py index 72ae37c..3dbabf0 100644 --- a/tests/test_setup/opossum_faker_setup.py +++ b/tests/setup/opossum_faker_setup.py @@ -7,8 +7,8 @@ from faker import Faker, Generator -from tests.opossum_model_generators.opossum_provider import OpossumProvider -from tests.opossum_model_generators.scan_results_provider import ScanResultsProvider +from tests.core.entities.generators.opossum_provider import OpossumProvider +from tests.core.entities.generators.scan_results_provider import ScanResultsProvider class OpossumFaker(Faker): diff --git a/tests/test_setup/opossum_file_faker_setup.py b/tests/setup/opossum_file_faker_setup.py similarity index 90% rename from tests/test_setup/opossum_file_faker_setup.py rename to tests/setup/opossum_file_faker_setup.py index f1a1ea6..b471c2d 100644 --- a/tests/test_setup/opossum_file_faker_setup.py +++ b/tests/setup/opossum_file_faker_setup.py @@ -8,14 +8,14 @@ from faker import Faker, Generator -from tests.test_opossum.generators.generate_file_information import ( +from tests.input_formats.opossum.entities.generators.generate_file_information import ( FileInformationProvider, MetadataProvider, ) -from tests.test_opossum.generators.generate_opossum_file_content import ( +from tests.input_formats.opossum.entities.generators.generate_opossum_file_content import ( # noqa: E501 OpossumFileContentProvider, ) -from tests.test_opossum.generators.generate_outfile_information import ( +from tests.input_formats.opossum.entities.generators.generate_outfile_information import ( # noqa: E501 OpossumOutputFileProvider, ) diff --git a/tests/test_setup/scancode_faker_setup.py b/tests/setup/scancode_faker_setup.py similarity index 94% rename from tests/test_setup/scancode_faker_setup.py rename to tests/setup/scancode_faker_setup.py index 80fc39d..07da5f9 100644 --- a/tests/test_setup/scancode_faker_setup.py +++ b/tests/setup/scancode_faker_setup.py @@ -8,7 +8,9 @@ from faker import Faker, Generator -from tests.test_scancode.generators.generate_scancode_file import ScanCodeDataProvider +from tests.input_formats.scancode.entities.generators.generate_scancode_file import ( + ScanCodeDataProvider, +) class ScanCodeFaker(Faker): diff --git a/tests/shared/__init__.py b/tests/shared/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/util/generator_helpers.py b/tests/shared/generator_helpers.py similarity index 84% rename from tests/util/generator_helpers.py rename to tests/shared/generator_helpers.py index 4e9dddd..903163b 100644 --- a/tests/util/generator_helpers.py +++ b/tests/shared/generator_helpers.py @@ -20,6 +20,7 @@ def random_list[T]( faker: BaseProvider, entry_generator: Callable[[], T], max_number_of_entries: int = 3, + min_number_of_entries: int = 1, ) -> list[T]: - number_of_entries = faker.random_int(1, max_number_of_entries) + number_of_entries = faker.random_int(min_number_of_entries, max_number_of_entries) return [entry_generator() for _ in range(number_of_entries)] diff --git a/tests/test_cli.py b/tests/test_cli.py index d6870d2..a513091 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -1,6 +1,8 @@ # SPDX-FileCopyrightText: TNG Technology Consulting GmbH # # SPDX-License-Identifier: Apache-2.0 +from __future__ import annotations + import json from pathlib import Path from typing import Any @@ -11,141 +13,141 @@ from click.testing import CliRunner, Result from opossum_lib.cli import generate -from opossum_lib.opossum.constants import INPUT_JSON_NAME, OUTPUT_JSON_NAME -from opossum_lib.opossum.file_generation import OpossumFileWriter -from opossum_lib.opossum.opossum_file import OpossumPackage -from tests.test_setup.opossum_file_faker_setup import OpossumFileFaker +from opossum_lib.core.services.write_opossum_file import write_opossum_file +from opossum_lib.shared.constants import ( + INPUT_JSON_NAME, + OUTPUT_JSON_NAME, +) +from opossum_lib.shared.entities.opossum_input_file_model import OpossumPackageModel +from tests.setup.opossum_file_faker_setup import OpossumFileFaker test_data_path = Path(__file__).resolve().parent / "data" -def generate_valid_scan_code_argument( - filename: str = "scancode_input.json", -) -> list[str]: - return ["--scan-code-json", str(test_data_path / filename)] - - -def generate_valid_opossum_argument( - filename: str = "opossum_input.opossum", -) -> list[str]: - return ["--opossum", str(test_data_path / filename)] - - def run_with_command_line_arguments(cmd_line_arguments: list[str]) -> Result: runner = CliRunner() result = runner.invoke(generate, cmd_line_arguments) return result -def test_successful_conversion_of_input_only_opossum_file(tmp_path: Path) -> None: - output_file = str(tmp_path / "output_opossum.opossum") - result = run_with_command_line_arguments( - [ - "--opossum", - str(test_data_path / "opossum_input.opossum"), - "-o", - output_file, - ], - ) - - assert result.exit_code == 0 - expected_opossum_dict = read_json_from_file("opossum_input.json") - opossum_dict = read_input_json_from_opossum(output_file) - - # Doing individual asserts as otherwise the diff viewer does no longer work - # in case of errors - assert_expected_file_equals_generated_file(expected_opossum_dict, opossum_dict) - - -def test_successful_conversion_of_scancode_file(tmp_path: Path) -> None: - output_file = str(tmp_path / "output_scancode.opossum") - result = run_with_command_line_arguments( - [ - "--scan-code-json", - str(test_data_path / "scancode_input.json"), - "-o", - output_file, - ], - ) - - assert result.exit_code == 0 - expected_opossum_dict = read_json_from_file("expected_scancode.json") - opossum_dict = read_input_json_from_opossum(output_file) - - md = opossum_dict.pop("metadata") - expected_md = expected_opossum_dict.pop("metadata") - md["projectId"] = expected_md["projectId"] - assert md == expected_md - - # Python has hash salting, which means the hashes changes between sessions. - # This means that the IDs of the attributions change as they are based on hashes - # Thus we need to compare externalAttributions and resourcesToAttributions - # structurally - resources_inlined = inline_attributions_into_resources( - resources_with_ids=opossum_dict.pop("resourcesToAttributions"), - all_attributions=opossum_dict.pop("externalAttributions"), - ) - expected_resources_inlined = inline_attributions_into_resources( - resources_with_ids=expected_opossum_dict.pop("resourcesToAttributions"), - all_attributions=expected_opossum_dict.pop("externalAttributions"), - ) - assert resources_inlined == expected_resources_inlined - assert_expected_file_equals_generated_file(expected_opossum_dict, opossum_dict) - - -def test_successful_conversion_of_input_and_output_opossum_file(tmp_path: Path) -> None: - output_file = str(tmp_path / "output_opossum.opossum") - result = run_with_command_line_arguments( - [ - "--opossum", - str(test_data_path / "opossum_input_with_result.opossum"), - "-o", - output_file, - ], - ) - - assert result.exit_code == 0 - - # Doing individual asserts as otherwise the diff viewer does no longer work - # in case of errors - assert_input_json_matches_expectations(output_file) - assert_output_json_matches_expectations(output_file) - +class TestConvertOpossumFiles: + def test_successful_conversion_of_input_only_opossum_file( + self, tmp_path: Path + ) -> None: + output_file = str(tmp_path / "output_opossum.opossum") + result = run_with_command_line_arguments( + [ + "--opossum", + str(test_data_path / "opossum_input.opossum"), + "-o", + output_file, + ], + ) -def assert_input_json_matches_expectations(output_file: str) -> None: - expected_opossum_dict = read_json_from_file("opossum_input.json") - opossum_dict = read_input_json_from_opossum(output_file) - assert_expected_file_equals_generated_file(expected_opossum_dict, opossum_dict) + assert result.exit_code == 0 + expected_opossum_dict = _read_json_from_file("opossum_input.json") + opossum_dict = _read_input_json_from_opossum(output_file) + + # Doing individual asserts as otherwise the diff viewer does no longer work + # in case of errors + _assert_expected_file_equals_generated_file(expected_opossum_dict, opossum_dict) + + def test_successful_conversion_of_input_and_output_opossum_file( + self, tmp_path: Path + ) -> None: + output_file = str(tmp_path / "output_opossum.opossum") + result = run_with_command_line_arguments( + [ + "--opossum", + str(test_data_path / "opossum_input_with_result.opossum"), + "-o", + output_file, + ], + ) + assert result.exit_code == 0 -def assert_output_json_matches_expectations(output_file: str) -> None: - expected_opossum_dict = read_json_from_file("opossum_output.json") - opossum_dict = read_output_json_from_opossum(output_file) - assert_expected_file_equals_generated_file(expected_opossum_dict, opossum_dict) + # Doing individual asserts as otherwise the diff viewer does no longer work + # in case of errors + TestConvertOpossumFiles._assert_input_json_matches_expectations(output_file) + TestConvertOpossumFiles._assert_output_json_matches_expectations(output_file) + + @staticmethod + def _assert_input_json_matches_expectations(output_file: str) -> None: + expected_opossum_dict = _read_json_from_file("opossum_input.json") + opossum_dict = _read_input_json_from_opossum(output_file) + _assert_expected_file_equals_generated_file(expected_opossum_dict, opossum_dict) + + @staticmethod + def _assert_output_json_matches_expectations(output_file: str) -> None: + expected_opossum_dict = _read_json_from_file("opossum_output.json") + opossum_dict = _read_output_json_from_opossum(output_file) + _assert_expected_file_equals_generated_file(expected_opossum_dict, opossum_dict) + + +class TestConvertScancodeFiles: + def test_successful_conversion_of_scancode_file(self, tmp_path: Path) -> None: + output_file = str(tmp_path / "output_scancode.opossum") + result = run_with_command_line_arguments( + [ + "--scan-code-json", + str(test_data_path / "scancode_input.json"), + "-o", + output_file, + ], + ) + assert result.exit_code == 0 + expected_opossum_dict = _read_json_from_file("expected_scancode.json") + opossum_dict = _read_input_json_from_opossum(output_file) + + md = opossum_dict.pop("metadata") + expected_md = expected_opossum_dict.pop("metadata") + md["projectId"] = expected_md["projectId"] + assert md == expected_md + + # Python has hash salting, which means the hashes changes between sessions. + # This means that the IDs of the attributions change as they are based on hashes + # Thus we need to compare externalAttributions and resourcesToAttributions + # structurally + resources_inlined = ( + TestConvertScancodeFiles._inline_attributions_into_resources( + resources_with_ids=opossum_dict.pop("resourcesToAttributions"), + all_attributions=opossum_dict.pop("externalAttributions"), + ) + ) + expected_resources_inlined = ( + TestConvertScancodeFiles._inline_attributions_into_resources( + resources_with_ids=expected_opossum_dict.pop("resourcesToAttributions"), + all_attributions=expected_opossum_dict.pop("externalAttributions"), + ) + ) + assert resources_inlined == expected_resources_inlined + _assert_expected_file_equals_generated_file(expected_opossum_dict, opossum_dict) -def inline_attributions_into_resources( - *, resources_with_ids: dict[str, list[str]], all_attributions: dict[str, Any] -) -> dict[str, set[OpossumPackage]]: - resource_with_inlined_attributions = {} - for path, ids in resources_with_ids.items(): - attributions = [] - for id in ids: - attribution = OpossumPackage(**all_attributions[id]) - attributions.append(attribution) - resource_with_inlined_attributions[path] = set(attributions) - return resource_with_inlined_attributions + @staticmethod + def _inline_attributions_into_resources( + *, resources_with_ids: dict[str, list[str]], all_attributions: dict[str, Any] + ) -> dict[str, set[OpossumPackageModel]]: + resource_with_inlined_attributions = {} + for path, ids in resources_with_ids.items(): + attributions = [] + for id in ids: + attribution = OpossumPackageModel(**all_attributions[id]) + attributions.append(attribution) + resource_with_inlined_attributions[path] = set(attributions) + return resource_with_inlined_attributions -def read_input_json_from_opossum(output_file_path: str) -> Any: - return read_json_from_zip_file(output_file_path, INPUT_JSON_NAME) +def _read_input_json_from_opossum(output_file_path: str) -> Any: + return _read_json_from_zip_file(output_file_path, INPUT_JSON_NAME) -def read_output_json_from_opossum(output_file_path: str) -> Any: - return read_json_from_zip_file(output_file_path, OUTPUT_JSON_NAME) +def _read_output_json_from_opossum(output_file_path: str) -> Any: + return _read_json_from_zip_file(output_file_path, OUTPUT_JSON_NAME) -def read_json_from_zip_file(output_file_path: str, file_name: str) -> Any: +def _read_json_from_zip_file(output_file_path: str, file_name: str) -> Any: with ( ZipFile(output_file_path, "r") as z, z.open(file_name) as file, @@ -154,13 +156,13 @@ def read_json_from_zip_file(output_file_path: str, file_name: str) -> Any: return opossum_dict -def read_json_from_file(filename: str) -> Any: +def _read_json_from_file(filename: str) -> Any: with open(test_data_path / filename, encoding="utf-8") as file: expected_opossum_dict = json.load(file) return expected_opossum_dict -def assert_expected_file_equals_generated_file( +def _assert_expected_file_equals_generated_file( expected_opossum_dict: Any, opossum_dict: Any ) -> None: assert expected_opossum_dict.keys() == opossum_dict.keys() @@ -171,69 +173,59 @@ def assert_expected_file_equals_generated_file( assert opossum_dict.get(field, None) == expected_opossum_dict.get(field, None) -def test_cli_no_output_file_provided(opossum_file_faker: OpossumFileFaker) -> None: - runner = CliRunner() - - with runner.isolated_filesystem(): - file_path = "input.opossum" - opossum_file = opossum_file_faker.opossum_file_content() - OpossumFileWriter.write_opossum_information_to_file( - opossum_file, Path(file_path) - ) - result = runner.invoke( - generate, - "--opossum " + file_path, - ) - - assert result.exit_code == 0 - assert Path.is_file(Path("output.opossum")) - - -def test_cli_warning_if_outfile_already_exists( - caplog: LogCaptureFixture, opossum_file_faker: OpossumFileFaker -) -> None: - runner = CliRunner() - - with runner.isolated_filesystem(): - file_path = "input.opossum" - opossum_file = opossum_file_faker.opossum_file_content() - OpossumFileWriter.write_opossum_information_to_file( - opossum_file, Path(file_path) - ) - with open("output.opossum", "w") as f: - f.write("") - result = runner.invoke( - generate, - "--opossum " + file_path + " -o output.opossum", - ) - - assert result.exit_code == 0 - - assert caplog.messages == ["output.opossum already exists and will be overwritten."] - - -@pytest.mark.parametrize( - "options", - [ - generate_valid_opossum_argument() + generate_valid_opossum_argument(), - generate_valid_opossum_argument() + generate_valid_scan_code_argument(), - generate_valid_scan_code_argument() + generate_valid_scan_code_argument(), - ], -) -def test_cli_with_multiple_files(caplog: LogCaptureFixture, options: list[str]) -> None: - result = run_with_command_line_arguments(options) - assert result.exit_code == 1 - - assert caplog.messages == ["Merging of multiple files not yet supported!"] - - -def test_cli_without_inputs(caplog: LogCaptureFixture) -> None: - result = run_with_command_line_arguments( +class TestCliValidations: + @staticmethod + def generate_valid_scan_code_argument( + filename: str = "scancode_input.json", + ) -> list[str]: + return ["--scan-code-json", str(test_data_path / filename)] + + @staticmethod + def generate_valid_opossum_argument( + filename: str = "opossum_input.opossum", + ) -> list[str]: + return ["--opossum", str(test_data_path / filename)] + + def test_cli_no_output_file_provided( + self, opossum_file_faker: OpossumFileFaker + ) -> None: + runner = CliRunner() + + with runner.isolated_filesystem(): + file_path = "input.opossum" + opossum_file = opossum_file_faker.opossum_file_content() + write_opossum_file(opossum_file, Path(file_path)) + result = runner.invoke( + generate, + "--opossum " + file_path, + ) + + assert result.exit_code == 0 + assert Path.is_file(Path("output.opossum")) + + @pytest.mark.parametrize( + "options", [ - "-o", - "output.opossum", + generate_valid_opossum_argument() + generate_valid_opossum_argument(), + generate_valid_opossum_argument() + generate_valid_scan_code_argument(), + generate_valid_scan_code_argument() + generate_valid_scan_code_argument(), ], ) - assert result.exit_code == 1 + def test_cli_with_multiple_files( + self, caplog: LogCaptureFixture, options: list[str] + ) -> None: + result = run_with_command_line_arguments(options) + assert result.exit_code == 1 + + assert caplog.messages == ["Merging of multiple files not yet supported!"] + + def test_cli_without_inputs(self, caplog: LogCaptureFixture) -> None: + result = run_with_command_line_arguments( + [ + "-o", + "output.opossum", + ], + ) + assert result.exit_code == 1 - assert caplog.messages == ["No input provided. Exiting."] + assert caplog.messages == ["No input provided. Exiting."] diff --git a/tests/test_opossum/test_file_generation.py b/tests/test_opossum/test_file_generation.py deleted file mode 100644 index 8ae81dd..0000000 --- a/tests/test_opossum/test_file_generation.py +++ /dev/null @@ -1,42 +0,0 @@ -# SPDX-FileCopyrightText: TNG Technology Consulting GmbH -# -# SPDX-License-Identifier: Apache-2.0 - -from pathlib import Path -from zipfile import ZipFile - -from opossum_lib.opossum.constants import INPUT_JSON_NAME, OUTPUT_JSON_NAME -from opossum_lib.opossum.file_generation import OpossumFileWriter -from opossum_lib.opossum.opossum_file_content import OpossumFileContent -from tests.test_setup.opossum_file_faker_setup import OpossumFileFaker - - -def test_only_input_information_available_writes_only_input_information( - tmp_path: Path, opossum_file_faker: OpossumFileFaker -) -> None: - opossum_file_content = OpossumFileContent( - input_file=opossum_file_faker.opossum_file_information() - ) - output_path = tmp_path / "output.opossum" - - OpossumFileWriter.write_opossum_information_to_file( - opossum_file_content, output_path - ) - - with ZipFile(output_path, "r") as zip_file: - assert zip_file.namelist() == [INPUT_JSON_NAME] - - -def test_input_and_output_information_available_writes_both( - tmp_path: Path, opossum_file_faker: OpossumFileFaker -) -> None: - opossum_file_content = opossum_file_faker.opossum_file_content() - output_path = tmp_path / "output.opossum" - - OpossumFileWriter.write_opossum_information_to_file( - opossum_file_content, output_path - ) - - with ZipFile(output_path, "r") as zip_file: - assert INPUT_JSON_NAME in zip_file.namelist() - assert OUTPUT_JSON_NAME in zip_file.namelist() diff --git a/tests/test_opossum/test_merge.py b/tests/test_opossum/test_merge.py deleted file mode 100644 index 941b16c..0000000 --- a/tests/test_opossum/test_merge.py +++ /dev/null @@ -1,210 +0,0 @@ -# SPDX-FileCopyrightText: TNG Technology Consulting GmbH -# -# SPDX-License-Identifier: Apache-2.0 -from unittest import mock - -import pytest - -from opossum_lib.opossum.merger import ( - _merge_dicts_without_duplicates, - _merge_resources, - _merge_resources_to_attributions, - expand_opossum_package_identifier, - merge_opossum_information, -) -from opossum_lib.opossum.opossum_file import ( - Metadata, - OpossumInformation, - OpossumPackage, - OpossumPackageIdentifier, - Resource, - ResourcePath, - ResourceType, - SourceInfo, -) - - -def test_merge_opossum_information() -> None: - opossum_package = OpossumPackage(source=SourceInfo(name="source")) - opossum_information = OpossumInformation( - metadata=Metadata( - project_id="project-id", - file_creation_date="30-05-2023", - project_title="test data", - ), - resources={"A": {"B": {}}}, - external_attributions={"SPDXRef-Package": opossum_package}, - resources_to_attributions={"/A/B/": ["SPDXRef-Package"]}, - ) - - opossum_information_2 = OpossumInformation( - metadata=Metadata( - project_id="test-data-id", - file_creation_date="29-05-2023", - project_title="second test data", - ), - resources={"A": {"D": {"C": 1}}}, - external_attributions={"SPDXRef-File": opossum_package}, - resources_to_attributions={"/A/D/C": ["SPDXRef-File"]}, - ) - - merged_information = merge_opossum_information( - [opossum_information, opossum_information_2] - ) - - assert merged_information.metadata == opossum_information.metadata - assert merged_information.resources == { - "A": { - "B": {}, - "D": {"C": 1}, - } - } - assert merged_information.external_attributions == { - "project-id-SPDXRef-Package": opossum_package, - "test-data-id-SPDXRef-File": opossum_package, - } - assert merged_information.resources_to_attributions == { - "/A/B/": ["project-id-SPDXRef-Package"], - "/A/D/C": ["test-data-id-SPDXRef-File"], - } - - -def test_merge_resources() -> None: - list_of_paths_with_resource_types = [ - [("A", ResourceType.FOLDER)], - [ - ("A", ResourceType.FOLDER), - ("B", ResourceType.FOLDER), - ("C", ResourceType.FILE), - ], - [("A", ResourceType.FOLDER), ("D", ResourceType.FILE)], - ] - - resource = Resource(type=ResourceType.TOP_LEVEL) - for path in list_of_paths_with_resource_types: - resource = resource.add_path(path) - - list_of_paths_with_resource_type = [ - [("A", ResourceType.FOLDER)], - [ - ("A", ResourceType.FOLDER), - ("B", ResourceType.FOLDER), - ("C", ResourceType.FILE), - ], - [("A", ResourceType.FOLDER), ("D", ResourceType.FILE)], - [ - ("C", ResourceType.FOLDER), - ("D", ResourceType.FOLDER), - ("E", ResourceType.FOLDER), - ], - ] - resource2 = Resource(type=ResourceType.TOP_LEVEL) - for path in list_of_paths_with_resource_type: - resource2 = resource2.add_path(path) - - resources = [resource, resource2] - merged_resource = _merge_resources(resources) - - assert merged_resource == Resource( - type=ResourceType.TOP_LEVEL, - children={ - "A": Resource( - type=ResourceType.FOLDER, - children={ - "B": Resource( - type=ResourceType.FOLDER, - children={"C": Resource(type=ResourceType.FILE)}, - ), - "D": Resource(type=ResourceType.FILE), - }, - ), - "C": Resource( - type=ResourceType.FOLDER, - children={ - "D": Resource( - type=ResourceType.FOLDER, - children={"E": Resource(type=ResourceType.FOLDER)}, - ) - }, - ), - }, - ) - - -@pytest.mark.parametrize( - "resources_to_attributions, expected_resources_to_attributions", - [ - ( - [ - {"resources/Path": ["identifier-A", "identifier-B"]}, - {"resources/Path": ["identifier-C"]}, - {"resources/Path/different": ["identifier-A"]}, - ], - { - "resources/Path": ["identifier-A", "identifier-B", "identifier-C"], - "resources/Path/different": ["identifier-A"], - }, - ), - ( - [{"resources/Path": ["uuid_1"]}, {"resources/Path": ["uuid_1", "uuid_2"]}], - { - "resources/Path": ["uuid_1", "uuid_2"], - }, - ), - ], -) -def test_merge_resources_to_attributions( - resources_to_attributions: list[dict[ResourcePath, list[OpossumPackageIdentifier]]], - expected_resources_to_attributions: dict[ - ResourcePath, list[OpossumPackageIdentifier] - ], -) -> None: - merged_resources_to_attributions = _merge_resources_to_attributions( - resources_to_attributions - ) - assert merged_resources_to_attributions == expected_resources_to_attributions - - -@mock.patch("opossum_lib.opossum.opossum_file.OpossumPackage", autospec=True) -def test_merge_dicts_without_duplicates(opossum_package: OpossumPackage) -> None: - dicts = [{"A": opossum_package}, {"B": opossum_package}] - merged_dict = _merge_dicts_without_duplicates(dicts) - - assert merged_dict == {"A": opossum_package, "B": opossum_package} - - -@mock.patch("opossum_lib.opossum.opossum_file.SourceInfo", autospec=True) -def test_merge_dicts_without_duplicates_type_error( - source_info: SourceInfo, -) -> None: - dicts = [ - {"A": OpossumPackage(source=source_info, comment="test package 1")}, - {"A": OpossumPackage(source=source_info, comment="test package 2")}, - ] - with pytest.raises(TypeError): - _merge_dicts_without_duplicates(dicts) - - -def test_expand_opossum_package_identifier() -> None: - opossum_package = OpossumPackage(source=SourceInfo(name="source-info")) - opossum_information_expanded = expand_opossum_package_identifier( - OpossumInformation( - metadata=Metadata( - project_id="project-id", - file_creation_date="2022-03-02", - project_title="project title", - ), - resources={}, - external_attributions={"SPDXRef-Package": opossum_package}, - resources_to_attributions={"/path/to/resource": ["SPDXRef-Package"]}, - attribution_breakpoints=[], - external_attribution_sources={}, - ) - ) - - assert opossum_information_expanded.resources_to_attributions == { - "/path/to/resource": ["project-id-SPDXRef-Package"] - } - assert opossum_information_expanded.external_attributions == { - "project-id-SPDXRef-Package": opossum_package - } diff --git a/tests/test_opossum/test_read_opossum_file.py b/tests/test_opossum/test_read_opossum_file.py deleted file mode 100644 index 2586f06..0000000 --- a/tests/test_opossum/test_read_opossum_file.py +++ /dev/null @@ -1,30 +0,0 @@ -# SPDX-FileCopyrightText: TNG Technology Consulting GmbH -# -# SPDX-License-Identifier: Apache-2.0 -from pathlib import Path - -import pytest -from _pytest.logging import LogCaptureFixture - -from opossum_lib.opossum.read_opossum_file import read_opossum_file - -TEST_DATA_DIRECTORY = Path(__file__).resolve().parent.parent / "data" - - -def test_read_opossum_file_corrupted_file_exits_1(caplog: LogCaptureFixture) -> None: - input_path = TEST_DATA_DIRECTORY / "opossum_input_corrupt.opossum" - - with pytest.raises(SystemExit) as system_exit: - read_opossum_file(str(input_path)) - assert system_exit.value.code == 1 - assert "is corrupt and does not contain 'input.json'" in caplog.messages[0] - - -def test_read_opossum_file_containing_output_json() -> None: - input_path = TEST_DATA_DIRECTORY / "opossum_input_with_result.opossum" - - result = read_opossum_file(str(input_path)) - - assert result is not None - assert result.input_file is not None - assert result.output_file is not None diff --git a/tests/test_scancode/generators/helpers.py b/tests/test_scancode/generators/helpers.py deleted file mode 100644 index 6161cf5..0000000 --- a/tests/test_scancode/generators/helpers.py +++ /dev/null @@ -1,26 +0,0 @@ -# SPDX-FileCopyrightText: TNG Technology Consulting GmbH -# -# SPDX-License-Identifier: Apache-2.0 -from collections.abc import Callable - -from faker.providers import BaseProvider -from faker.providers.misc import Provider as MiscProvider - - -def entry_or_none[T]( - faker: MiscProvider, entry: T, chance_of_getting_entry: int = 50 -) -> T | None: - if faker.boolean(chance_of_getting_entry): - return entry - else: - return None - - -def random_list[T]( - faker: BaseProvider, - entry_generator: Callable[[], T], - min_number_of_entries: int = 0, - max_number_of_entries: int = 3, -) -> list[T]: - number_of_entries = faker.random_int(min_number_of_entries, max_number_of_entries) - return [entry_generator() for _ in range(number_of_entries)] diff --git a/tests/test_scancode/model_helpers.py b/tests/test_scancode/model_helpers.py deleted file mode 100644 index 40a3a1b..0000000 --- a/tests/test_scancode/model_helpers.py +++ /dev/null @@ -1,128 +0,0 @@ -# SPDX-FileCopyrightText: TNG Technology Consulting GmbH -# -# SPDX-License-Identifier: Apache-2.0 - - -from pathlib import PurePath - -from opossum_lib.scancode.model import ( - Copyright, - File, - FileBasedLicenseDetection, - FileType, - Holder, - Url, -) - - -def _create_reference_scancode_files() -> list[File]: - return [ - _create_file("A", FileType.DIRECTORY), - _create_file("A/B", FileType.DIRECTORY), - _create_file("A/file1", FileType.FILE), - _create_file("A/file2.txt", FileType.FILE), - _create_file("A/B/file3", FileType.FILE), - ] - - -def _create_file( - path: str, - type: FileType, - *, - name: str | None = None, - base_name: str | None = None, - extension: str | None = None, - size: int = 0, - date: str | None = None, - sha1: str | None = None, - md5: str | None = None, - sha256: str | None = None, - mime_type: str | None = None, - file_type: str | None = None, - programming_language: str | None = None, - is_binary: bool = False, - is_text: bool = False, - is_archive: bool = False, - is_media: bool = False, - is_source: bool = False, - is_script: bool = False, - package_data: list | None = None, - for_packages: list | None = None, - detected_license_expression: str | None = None, - detected_license_expression_spdx: str | None = None, - license_detections: list[FileBasedLicenseDetection] | None = None, - license_clues: list | None = None, - percentage_of_license_text: float = 0.0, - copyrights: list[Copyright] | None = None, - holders: list[Holder] | None = None, - authors: list | None = None, - emails: list | None = None, - urls: list[Url] | None = None, - files_count: int = 0, - dirs_count: int = 0, - size_count: int = 0, - scan_errors: list | None = None, -) -> File: - if package_data is None: - package_data = [] - if for_packages is None: - for_packages = [] - if license_clues is None: - license_clues = [] - if license_detections is None: - license_detections = [] - if scan_errors is None: - scan_errors = [] - if urls is None: - urls = [] - if emails is None: - emails = [] - if authors is None: - authors = [] - if holders is None: - holders = [] - if copyrights is None: - copyrights = [] - if name is None: - name = PurePath(path).name - if base_name is None: - base_name = PurePath(PurePath(path).name).stem - if extension is None: - extension = PurePath(path).suffix - return File( - authors=authors, - base_name=base_name, - copyrights=copyrights, - date=date, - detected_license_expression=detected_license_expression, - detected_license_expression_spdx=detected_license_expression_spdx, - dirs_count=dirs_count, - emails=emails, - extension=extension, - files_count=files_count, - file_type=file_type, - for_packages=for_packages, - holders=holders, - is_archive=is_archive, - is_binary=is_binary, - is_media=is_media, - is_script=is_script, - is_source=is_source, - is_text=is_text, - license_clues=license_clues, - license_detections=license_detections, - md5=md5, - mime_type=mime_type, - name=name, - package_data=package_data, - path=path, - percentage_of_license_text=percentage_of_license_text, - programming_language=programming_language, - scan_errors=scan_errors, - sha1=sha1, - sha256=sha256, - size=size, - size_count=size_count, - type=type, - urls=urls, - ) diff --git a/tests/test_scancode/test_convert_scancode_to_opossum.py b/tests/test_scancode/test_convert_scancode_to_opossum.py deleted file mode 100644 index dcfe731..0000000 --- a/tests/test_scancode/test_convert_scancode_to_opossum.py +++ /dev/null @@ -1,76 +0,0 @@ -# SPDX-FileCopyrightText: TNG Technology Consulting GmbH -# -# SPDX-License-Identifier: Apache-2.0 - - -import pytest -from _pytest.logging import LogCaptureFixture - -from opossum_lib.opossum_model import Resource -from opossum_lib.scancode.convert_scancode_to_opossum import ( - convert_scancode_to_opossum, -) -from tests.test_setup.scancode_faker_setup import ScanCodeFaker - - -def test_extract_scancode_header_produces_expected_result( - scancode_faker: ScanCodeFaker, -) -> None: - scancode_data = scancode_faker.scancode_data() - opossum = convert_scancode_to_opossum( - scancode_data, - ) - metadata = opossum.scan_results.metadata - header = scancode_data.headers[0] - assert metadata.file_creation_date == header.end_timestamp - assert metadata.project_title == "ScanCode file" - - -def test_extract_scancode_header_errors_with_missing_header( - caplog: LogCaptureFixture, scancode_faker: ScanCodeFaker -) -> None: - scancode_data = scancode_faker.scancode_data(headers=[]) - - with pytest.raises(SystemExit): - convert_scancode_to_opossum(scancode_data) - - assert "header" in caplog.messages[0].lower() - - -def test_extract_scancode_header_error_with_multiple_headers( - caplog: LogCaptureFixture, scancode_faker: ScanCodeFaker -) -> None: - header1 = scancode_faker.header() - header2 = scancode_faker.header() - scancode_data = scancode_faker.scancode_data(headers=[header1, header2]) - - with pytest.raises(SystemExit): - convert_scancode_to_opossum(scancode_data) - - assert "header" in caplog.messages[0].lower() - - -def count_resources(resource: Resource) -> int: - return 1 + sum(count_resources(child) for child in resource.children.values()) - - -def count_attributions(resource: Resource) -> int: - return len(resource.attributions) + sum( - count_attributions(child) for child in resource.children.values() - ) - - -def test_convert_scancode_produces_expected_result( - scancode_faker: ScanCodeFaker, -) -> None: - scancode_data = scancode_faker.scancode_data() - opossum_data = convert_scancode_to_opossum(scancode_data) - - assert opossum_data.review_results is None - scan_results = opossum_data.scan_results - assert sum(count_resources(res) for res in scan_results.resources) == len( - scancode_data.files - ) - num_attributions = sum(count_attributions(res) for res in scan_results.resources) - num_license_detections = sum(len(f.license_detections) for f in scancode_data.files) - assert num_attributions == num_license_detections diff --git a/tests/test_scancode/test_get_attribution_info.py b/tests/test_scancode/test_get_attribution_info.py deleted file mode 100644 index cf1f847..0000000 --- a/tests/test_scancode/test_get_attribution_info.py +++ /dev/null @@ -1,83 +0,0 @@ -# SPDX-FileCopyrightText: TNG Technology Consulting GmbH -# -# SPDX-License-Identifier: Apache-2.0 - - -from opossum_lib.opossum.opossum_file import OpossumPackage, SourceInfo -from opossum_lib.scancode.constants import SCANCODE_SOURCE_NAME -from opossum_lib.scancode.convert_scancode_to_opossum import convert_scancode_to_opossum -from tests.test_setup.scancode_faker_setup import ScanCodeFaker - - -def test_get_attribution_info_directory(scancode_faker: ScanCodeFaker) -> None: - folder = scancode_faker.single_folder(path="some/single/folder") - scancode_data = scancode_faker.scancode_data(files=[folder]) - opossum = convert_scancode_to_opossum(scancode_data) - assert len(opossum.scan_results.resources) == 1 - assert opossum.scan_results.resources[0].attributions == [] - - -def test_get_attribution_info_from_file_without_detections( - scancode_faker: ScanCodeFaker, -) -> None: - file = scancode_faker.single_file(path="some/single/file", license_detections=[]) - scancode_data = scancode_faker.scancode_data(files=[file]) - opossum = convert_scancode_to_opossum(scancode_data) - assert len(opossum.scan_results.resources) == 1 - assert opossum.scan_results.resources[0].attributions == [] - - -def test_get_attribution_info_file_multiple(scancode_faker: ScanCodeFaker) -> None: - match1 = scancode_faker.match( - license_expression_spdx="Apache-2.0", - from_file="A", - score=75, - rule_relevance=50, - ) - match2 = scancode_faker.match( - license_expression_spdx="Apache-2.0", - from_file="A", - score=95, - rule_relevance=50, - ) - match3 = scancode_faker.match( - license_expression_spdx="MIT", - from_file="A", - score=50, - rule_relevance=50, - ) - license1 = scancode_faker.license_detection( - license_expression_spdx="Apache-2.0", - matches=[match1, match2], - ) - license2 = scancode_faker.license_detection( - license_expression_spdx="MIT", - matches=[match3], - ) - copyright1 = scancode_faker.copyright(copyright="Me") - copyright2 = scancode_faker.copyright(copyright="Myself") - copyright3 = scancode_faker.copyright(copyright="I") - file = scancode_faker.single_file( - path="A", - license_detections=[license1, license2], - copyrights=[copyright1, copyright2, copyright3], - ) - scancode_data = scancode_faker.scancode_data(files=[file]) - opossum = convert_scancode_to_opossum(scancode_data) - attributions = ( - opossum.to_opossum_file_format().input_file.external_attributions.values() - ) - - expected1 = OpossumPackage( - source=SourceInfo(name=SCANCODE_SOURCE_NAME), - license_name="Apache-2.0", - copyright="Me\nMyself\nI", - attribution_confidence=95, - ) - expected2 = OpossumPackage( - source=SourceInfo(name=SCANCODE_SOURCE_NAME), - license_name="MIT", - copyright="Me\nMyself\nI", - attribution_confidence=50, - ) - assert set(attributions) == {expected1, expected2} diff --git a/uv.lock b/uv.lock index 002d57a..975ca4b 100644 --- a/uv.lock +++ b/uv.lock @@ -80,6 +80,31 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/b9/f8/feced7779d755758a52d1f6635d990b8d98dc0a29fa568bbe0625f18fdf3/filelock-3.16.1-py3-none-any.whl", hash = "sha256:2082e5703d51fbf98ea75855d9d5527e33d8ff23099bec374a134febee6946b0", size = 16163 }, ] +[[package]] +name = "grimp" +version = "3.5" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "typing-extensions" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/df/bd/4f919930f4b9c89a6d6905e4e8e4f11aa2489e385064501ff191c7b7d9cb/grimp-3.5.tar.gz", hash = "sha256:dc9560aed1d82222b361fe5f312a775b15a96b9237670f3a9fb20f44d30d5762", size = 833830 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/31/02/33f4b4bc6d22b5ff7672ce5c67464f4dcee6e86cf3a072cdc9b1d46e5178/grimp-3.5-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:448dba63f938d0e13e6121559749816e3b2644202c912cc308e7608c6034737a", size = 351545 }, + { url = "https://files.pythonhosted.org/packages/78/e0/44bd64767763160ea0862ad756b88dfd375060b5636aaeb79c10861f09ae/grimp-3.5-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:17113aba41269d0ee91512c96eeb850c7c668440c6a8e0bfc94d17762184b293", size = 342780 }, + { url = "https://files.pythonhosted.org/packages/55/e6/b0fc52ab1ee04c80f1709eaa2e99468a8981d0a23dcbc7c2c6e781da22cc/grimp-3.5-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e6a55614945c319d1dc692c3e43f3a02b80c116a1298e593f5f887b97e6c983a", size = 399300 }, + { url = "https://files.pythonhosted.org/packages/dc/b9/aa6365859c96881d02f6ad6ab8aa38eaced5887f3cc431165ed9ee7b35ea/grimp-3.5-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:aba7ce7839b72efb4c6d06404d2b2a3026e28dd89816f4e546b3cd6626cbeeb1", size = 405288 }, + { url = "https://files.pythonhosted.org/packages/7d/f8/32446714ed0f23a1b8067f9f627c55f7c186d3477baa5c6e6312c95060ab/grimp-3.5-cp313-cp313-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:eaedfba320a70d87b14acb25a685c8629586b943129c71ffd02b47d9531c11ce", size = 416565 }, + { url = "https://files.pythonhosted.org/packages/93/3c/185a0292111350823fdd7d61481157295d90260e248888ab274a00f84456/grimp-3.5-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:60a9afd3dd00ad1d301a07f97e26bc9ecdc3d2db39ab6ac46c315a7dea0a96cb", size = 437249 }, + { url = "https://files.pythonhosted.org/packages/1a/21/55ede2511dc111e9ca90d23e479e263e0845dc7e389515587f9575c5b139/grimp-3.5-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:11c66039c0475e5c9fc6a086264f11870181ae79f603caa5dffa1411ddad636b", size = 443560 }, + { url = "https://files.pythonhosted.org/packages/12/e1/79f6a4212d46ae6db6281c5b179990d2dd53c72eed7587592c0968f0d3f0/grimp-3.5-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bff39a0061790f074f86a7841cd8e6064aa7b2208cb1ee5c3f2e685dead2b66e", size = 393185 }, + { url = "https://files.pythonhosted.org/packages/46/62/1a7f8b54d73f3ef6595dd8bfa10686321c04b562fbb997e9d46f95fe7b96/grimp-3.5-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:cf7f5367c4a87b8e9f08c09e7401d2d73f21cb65d6142445819f9df0d6ab3f6b", size = 573292 }, + { url = "https://files.pythonhosted.org/packages/24/1d/b90853a0b7eb7aa08073dafd896551d26a416e922a68eb1cab35319593c7/grimp-3.5-cp313-cp313-musllinux_1_2_armv7l.whl", hash = "sha256:edee4b087f007dab8b65461caf6a1b67b2f9480cceb5f6aceea87008d8f283c4", size = 664226 }, + { url = "https://files.pythonhosted.org/packages/6b/7b/ce6cf6ba2da6305725692ea2a7a4080da60ac075d23353fc07cae7adb5b7/grimp-3.5-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:6af125013ad2a56c18f2f53a3fcabbfbe96c70374abecd6f14b82dc444726ebe", size = 589077 }, + { url = "https://files.pythonhosted.org/packages/ad/29/03156310a1bf278e8dd46e2e02311bad1d1551b1555d245e28008c318086/grimp-3.5-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:24aabae0183ca5fd5a710257ff37120b55d8e6d6d4cbb2c08481552832e5c901", size = 560463 }, + { url = "https://files.pythonhosted.org/packages/1e/29/300daf9b6a7f3f979a379cda69e04374912031fd85f3b6f0e03d9b637263/grimp-3.5-cp313-none-win32.whl", hash = "sha256:506091bfd600dd7ad427586998ef5e54a2098485148a1499bd9af5943d2fb0b7", size = 222673 }, + { url = "https://files.pythonhosted.org/packages/e5/8c/15834d919407c1ab11590367e03ac9d1c162d518a0f54f2970e315e0d623/grimp-3.5-cp313-none-win_amd64.whl", hash = "sha256:099388df82d922ddc589f362f1a523ab053c8dee5d29a6b622b2cddf481c6a2f", size = 236037 }, +] + [[package]] name = "identify" version = "2.6.5" @@ -89,6 +114,20 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/ec/fa/dce098f4cdf7621aa8f7b4f919ce545891f489482f0bfa5102f3eca8608b/identify-2.6.5-py2.py3-none-any.whl", hash = "sha256:14181a47091eb75b337af4c23078c9d09225cd4c48929f521f3bf16b09d02566", size = 99078 }, ] +[[package]] +name = "import-linter" +version = "2.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "click" }, + { name = "grimp" }, + { name = "typing-extensions" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/5e/d3/546d27e9390061cbc692541612bbaa3f69a3ad23f592cfbfefe0517ff5b0/import_linter-2.1.tar.gz", hash = "sha256:393fadb2e91304c22c5ceab575213ed2b6a794abc7bd05ab345f2430930b2eae", size = 29160 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/d8/38/137d5310c52c1b79d1588afa2ce5038cb3a10dbfd1a642962f3bc3ccbe61/import_linter-2.1-py3-none-any.whl", hash = "sha256:324d65035f0252a8e432f60256b0d0d32d8d5d6e4f8fd29716688b09d7a2217a", size = 41402 }, +] + [[package]] name = "iniconfig" version = "2.0.0" @@ -153,36 +192,40 @@ version = "0.1" source = { editable = "." } dependencies = [ { name = "click" }, - { name = "faker" }, { name = "pydantic" }, { name = "pyinstaller" }, ] [package.dev-dependencies] dev = [ + { name = "import-linter" }, { name = "mypy" }, { name = "pre-commit" }, { name = "ruff" }, ] test = [ + { name = "faker" }, { name = "pytest" }, ] [package.metadata] requires-dist = [ { name = "click", specifier = ">=8.1.8,<9" }, - { name = "faker", specifier = ">=35.0.0" }, { name = "pydantic", specifier = ">=2.10.6" }, { name = "pyinstaller", specifier = ">=6.11.1" }, ] [package.metadata.requires-dev] dev = [ + { name = "import-linter", specifier = ">=2.1" }, { name = "mypy", specifier = ">=1.14.1,<2" }, { name = "pre-commit", specifier = ">=4.1.0,<5" }, { name = "ruff", specifier = ">=0.9.3" }, ] -test = [{ name = "pytest", specifier = ">=8.3.4,<9" }] +test = [ + { name = "faker", specifier = ">=35.0.0" }, + { name = "pytest", specifier = ">=8.3.4,<9" }, +] [[package]] name = "packaging"