diff --git a/src/analysis/plugin/__init__.py b/src/analysis/plugin/__init__.py index 639bb86d7..a7e18bcc5 100644 --- a/src/analysis/plugin/__init__.py +++ b/src/analysis/plugin/__init__.py @@ -1 +1 @@ -from .plugin import AnalysisPluginV0, Tag # noqa: F401 +from .plugin import AnalysisFailedError, AnalysisPluginV0, Tag # noqa: F401 diff --git a/src/analysis/plugin/plugin.py b/src/analysis/plugin/plugin.py index 80819f041..798e1c6ff 100644 --- a/src/analysis/plugin/plugin.py +++ b/src/analysis/plugin/plugin.py @@ -13,7 +13,11 @@ class AnalysisFailedError(Exception): - ... + """ + This exception is used to cancel an analysis in a controlled way while still providing context information and + will not log an error with traceback to the terminal. It is an "expected exception" during analysis: Some + requirement is missing or the analysis input is incompatible and the analysis cannot be performend. + """ class Tag(BaseModel): @@ -112,7 +116,7 @@ def analyze( file_handle: io.FileIO, virtual_file_path: dict, analyses: dict[str, pydantic.BaseModel], - ) -> typing.Optional[Schema]: + ) -> Schema: """Analyze a file. May return None if nothing was found. diff --git a/src/plugins/analysis/binwalk/code/binwalk.py b/src/plugins/analysis/binwalk/code/binwalk.py index 10ba5e71d..7153ee481 100644 --- a/src/plugins/analysis/binwalk/code/binwalk.py +++ b/src/plugins/analysis/binwalk/code/binwalk.py @@ -7,6 +7,7 @@ import binwalk from pydantic import BaseModel, Field +from semver import Version import config from analysis.plugin import AnalysisPluginV0 @@ -37,10 +38,10 @@ class Schema(BaseModel): def __init__(self): super().__init__( - metadata=AnalysisPluginV0.MetaData( + metadata=self.MetaData( name='binwalk', description='binwalk signature and entropy analysis', - version='1.0.0', + version=Version(1, 0, 0), Schema=self.Schema, mime_blacklist=['audio/', 'image/', 'video/', 'text/', *MIME_BLACKLIST_COMPRESSED], ), diff --git a/src/plugins/analysis/cpu_architecture/test/test_plugin_cpu_architecture.py b/src/plugins/analysis/cpu_architecture/test/test_plugin_cpu_architecture.py index 0b2b81507..99318f836 100644 --- a/src/plugins/analysis/cpu_architecture/test/test_plugin_cpu_architecture.py +++ b/src/plugins/analysis/cpu_architecture/test/test_plugin_cpu_architecture.py @@ -212,7 +212,7 @@ def test_metadatadetector_get_device_architecture(architecture, bitness, endiann @pytest.mark.AnalysisPluginTestConfig(plugin_class=AnalysisPlugin) -class TestAnalysisPluginsSoftwareComponents: +class TestAnalysisPluginCpuArchitecture: def test_analyze(self, analysis_plugin): dependencies = { 'kernel_config': _mock_kernel_config_analysis_arm, diff --git a/src/plugins/analysis/crypto_hints/code/crypto_hints.py b/src/plugins/analysis/crypto_hints/code/crypto_hints.py index 044803e41..4d98dbd2a 100644 --- a/src/plugins/analysis/crypto_hints/code/crypto_hints.py +++ b/src/plugins/analysis/crypto_hints/code/crypto_hints.py @@ -3,6 +3,7 @@ from typing import TYPE_CHECKING, List import pydantic +from semver import Version from analysis.plugin import AnalysisPluginV0, addons, compat @@ -15,10 +16,10 @@ class Schema(pydantic.BaseModel): matches: List[dict] def __init__(self): - metadata = AnalysisPluginV0.MetaData( + metadata = self.MetaData( name='crypto_hints', description='find indicators of specific crypto algorithms', - version='0.2.1', + version=Version(0, 2, 1), Schema=AnalysisPlugin.Schema, ) super().__init__(metadata=metadata) diff --git a/src/plugins/analysis/crypto_material/code/crypto_material.py b/src/plugins/analysis/crypto_material/code/crypto_material.py index 2a8186639..9b01a3978 100644 --- a/src/plugins/analysis/crypto_material/code/crypto_material.py +++ b/src/plugins/analysis/crypto_material/code/crypto_material.py @@ -4,6 +4,7 @@ from typing import TYPE_CHECKING, List, NamedTuple from pydantic import BaseModel, Field +from semver import Version from analysis.plugin import AnalysisPluginV0, Tag, addons, compat from helperFunctions.hash import get_md5 @@ -61,7 +62,7 @@ def __init__(self): metadata = self.MetaData( name='crypto_material', description='detects crypto material like SSH keys and SSL certificates', - version='1.0.0', + version=Version(1, 0, 0), mime_blacklist=['filesystem', *MIME_BLACKLIST_COMPRESSED], Schema=self.Schema, ) diff --git a/src/plugins/analysis/cwe_checker/code/cwe_checker.py b/src/plugins/analysis/cwe_checker/code/cwe_checker.py index d7a45c765..554efa354 100644 --- a/src/plugins/analysis/cwe_checker/code/cwe_checker.py +++ b/src/plugins/analysis/cwe_checker/code/cwe_checker.py @@ -24,7 +24,7 @@ from semver import Version import config -from analysis.plugin import AnalysisPluginV0 +from analysis.plugin import AnalysisFailedError, AnalysisPluginV0 from helperFunctions.docker import run_docker_container if TYPE_CHECKING: @@ -137,14 +137,16 @@ def _do_full_analysis(self, file_path: str) -> dict: except json.JSONDecodeError as error: raise Exception(f'cwe_checker execution failed\nUID: {file_path}') from error - def analyze(self, file_handle: FileIO, virtual_file_path: dict, analyses: dict[str, BaseModel]) -> Schema | None: + def analyze(self, file_handle: FileIO, virtual_file_path: dict, analyses: dict[str, BaseModel]) -> Schema: """ This function handles only ELF executables. Otherwise, it returns an empty dictionary. It calls the cwe_checker docker container. """ del virtual_file_path if not self._is_supported_arch(analyses['file_type']): - return None + full_type = analyses['file_type'].full + arch = full_type.split(',')[1].strip() if full_type.startswith('ELF') else 'Unknown' + raise AnalysisFailedError(f'Unsupported architecture: {arch}') result = self._do_full_analysis(file_handle.name) return self.Schema( diff --git a/src/plugins/analysis/device_tree/code/device_tree.py b/src/plugins/analysis/device_tree/code/device_tree.py index fbeec2368..0d997901c 100644 --- a/src/plugins/analysis/device_tree/code/device_tree.py +++ b/src/plugins/analysis/device_tree/code/device_tree.py @@ -1,6 +1,6 @@ from __future__ import annotations -from typing import TYPE_CHECKING, Dict, Optional +from typing import TYPE_CHECKING, Dict from semver import Version @@ -16,7 +16,7 @@ class AnalysisPlugin(AnalysisPluginV0): def __init__(self): - metadata = AnalysisPluginV0.MetaData( + metadata = self.MetaData( name='device_tree', description='get the device tree in text from the device tree blob', version=Version(2, 0, 1), @@ -40,7 +40,7 @@ def analyze( file_handle: io.FileIO, virtual_file_path: dict, analyses: Dict[str, dict], - ) -> Optional[Schema]: + ) -> Schema: del virtual_file_path, analyses binary = file_handle.readall() diff --git a/src/plugins/analysis/example_plugin/code/example_plugin.py b/src/plugins/analysis/example_plugin/code/example_plugin.py index dce6adfce..705458177 100644 --- a/src/plugins/analysis/example_plugin/code/example_plugin.py +++ b/src/plugins/analysis/example_plugin/code/example_plugin.py @@ -1,9 +1,11 @@ import io +from pathlib import Path import pydantic from pydantic import Field +from semver import Version -from analysis.plugin import AnalysisPluginV0 +from analysis.plugin.plugin import AnalysisFailedError, AnalysisPluginV0 class AnalysisPlugin(AnalysisPluginV0): @@ -24,10 +26,10 @@ class Schema(pydantic.BaseModel): dependant_analysis: dict def __init__(self): - metadata = AnalysisPluginV0.MetaData( + metadata = self.MetaData( name='ExamplePlugin', description='An example description', - version='0.0.0', + version=Version(0, 0, 0), Schema=AnalysisPlugin.Schema, # Note that you don't have to set these fields, # they are just here to show that you can. @@ -43,11 +45,15 @@ def summarize(self, result): del result return ['big-file', 'binary'] - def analyze(self, file_handle: io.FileIO, virtual_file_path: str, analyses: dict) -> Schema: + def analyze(self, file_handle: io.FileIO, virtual_file_path: dict, analyses: dict) -> Schema: first_byte = file_handle.read(1) + if first_byte == b'\xff': + raise AnalysisFailedError('reason for fail') + if first_byte == b'\xee': + raise Exception('Unexpected exception occurred.') return AnalysisPlugin.Schema( number=42, - name=file_handle.name, + name=Path(file_handle.name).name, first_byte=first_byte.hex(), virtual_file_path=virtual_file_path, dependant_analysis=analyses['file_type'].model_dump(), diff --git a/src/plugins/analysis/file_system_metadata/code/file_system_metadata.py b/src/plugins/analysis/file_system_metadata/code/file_system_metadata.py index c6f64b27f..58baf83e6 100644 --- a/src/plugins/analysis/file_system_metadata/code/file_system_metadata.py +++ b/src/plugins/analysis/file_system_metadata/code/file_system_metadata.py @@ -14,9 +14,10 @@ from docker.types import Mount from pydantic import BaseModel, Field +from semver import Version import config -from analysis.plugin import AnalysisPluginV0, Tag +from analysis.plugin import AnalysisFailedError, AnalysisPluginV0, Tag from helperFunctions.docker import run_docker_container from helperFunctions.tag import TagColor @@ -130,7 +131,7 @@ def __init__(self): description=( 'extract file system metadata (e.g. owner, group, etc.) from file system images contained in firmware' ), - version='1.2.0', + version=Version(1, 2, 0), Schema=self.Schema, timeout=30, ) @@ -196,7 +197,7 @@ def _extract_metadata_from_file_system(self, file_handle: FileIO) -> list[FileMe return _analyze_metadata_of_mounted_dir(json.loads(output_file.read_bytes())) message = 'Mounting the file system failed' logging.warning(f'{message} for {file_handle.name}:\n{output}') - raise RuntimeError(message) + raise AnalysisFailedError(message) def _mount_in_docker(self, input_dir: str) -> str: result = run_docker_container( diff --git a/src/plugins/analysis/file_system_metadata/test/test_plugin_file_system_metadata.py b/src/plugins/analysis/file_system_metadata/test/test_plugin_file_system_metadata.py index 4d4bbc439..962a3dd18 100644 --- a/src/plugins/analysis/file_system_metadata/test/test_plugin_file_system_metadata.py +++ b/src/plugins/analysis/file_system_metadata/test/test_plugin_file_system_metadata.py @@ -6,6 +6,8 @@ import pytest +from analysis.plugin import AnalysisFailedError + from ..code.file_system_metadata import ( SGID_BIT, STICKY_BIT, @@ -100,7 +102,7 @@ def test_extract_metadata_from_file_system(self, analysis_plugin): assert result[testfile_sticky_key].modification_time == 1518167842.0 def test_extract_metadata_from_file_system__unmountable(self, analysis_plugin): - with pytest.raises(RuntimeError, match='Mounting the file system failed'): + with pytest.raises(AnalysisFailedError, match='Mounting the file system failed'): analysis_plugin._extract_metadata_from_file_system(FileIO(self.test_file_tar)) def test_extract_metadata_from_tar(self): diff --git a/src/plugins/analysis/file_type/code/file_type.py b/src/plugins/analysis/file_type/code/file_type.py index 6e8a83e09..789c721ae 100644 --- a/src/plugins/analysis/file_type/code/file_type.py +++ b/src/plugins/analysis/file_type/code/file_type.py @@ -5,6 +5,7 @@ import pydantic from pydantic import Field +from semver import Version from analysis.plugin import AnalysisPluginV0 from helperFunctions import magic @@ -24,10 +25,10 @@ class Schema(pydantic.BaseModel): def __init__(self): super().__init__( - metadata=AnalysisPluginV0.MetaData( + metadata=self.MetaData( name='file_type', description='identify the file type', - version='1.0.0', + version=Version(1, 0, 0), Schema=AnalysisPlugin.Schema, ), ) diff --git a/src/plugins/analysis/hashlookup/code/hashlookup.py b/src/plugins/analysis/hashlookup/code/hashlookup.py index 50123aff3..c197773a2 100644 --- a/src/plugins/analysis/hashlookup/code/hashlookup.py +++ b/src/plugins/analysis/hashlookup/code/hashlookup.py @@ -7,7 +7,7 @@ from pydantic import BaseModel, Field, model_validator from semver import Version -from analysis.plugin import AnalysisPluginV0 +from analysis.plugin import AnalysisFailedError, AnalysisPluginV0 from plugins.mime_blacklists import MIME_BLACKLIST_COMPRESSED, MIME_BLACKLIST_NON_EXECUTABLE if TYPE_CHECKING: @@ -109,19 +109,19 @@ def __init__(self): ) ) - def analyze(self, file_handle: FileIO, virtual_file_path: dict, analyses: dict[str, BaseModel]) -> Schema | None: + def analyze(self, file_handle: FileIO, virtual_file_path: dict, analyses: dict[str, BaseModel]) -> Schema: del file_handle, virtual_file_path try: sha2_hash = analyses['file_hashes'].sha256 except (KeyError, AttributeError) as error: - raise HashLookupError('sha256 hash is missing in dependency results') from error + raise AnalysisFailedError('sha256 hash is missing in dependency results') from error result = _look_up_hash(sha2_hash.upper()) if 'FileName' not in result: if 'message' in result and result['message'] == 'Non existing SHA-256': # sha256 hash unknown to hashlookup at time of analysis' - return None + raise AnalysisFailedError('No record found in circl.lu for this file.') raise HashLookupError('Unknown error connecting to hashlookup API') return self.Schema.model_validate(result) @@ -134,4 +134,4 @@ def _look_up_hash(sha2_hash: str) -> dict: url = f'https://hashlookup.circl.lu/lookup/sha256/{sha2_hash}' return requests.get(url, headers={'accept': 'application/json'}).json() except (requests.ConnectionError, json.JSONDecodeError) as error: - raise HashLookupError('Failed to connect to circl.lu hashlookup API') from error + raise AnalysisFailedError('Failed to connect to circl.lu hashlookup API') from error diff --git a/src/plugins/analysis/hashlookup/test/test_hashlookup.py b/src/plugins/analysis/hashlookup/test/test_hashlookup.py index e1fed320b..55e673415 100644 --- a/src/plugins/analysis/hashlookup/test/test_hashlookup.py +++ b/src/plugins/analysis/hashlookup/test/test_hashlookup.py @@ -1,5 +1,6 @@ import pytest +from analysis.plugin import AnalysisFailedError from plugins.analysis.hash.code.hash import AnalysisPlugin as HashPlugin from plugins.analysis.hashlookup.code.hashlookup import AnalysisPlugin, HashLookupError @@ -71,8 +72,8 @@ def test_process_object_known_hash(self, analysis_plugin): def test_process_object_unknown_hash(self, analysis_plugin): dependencies = {'file_hashes': HashPlugin.Schema(md5='', sha256='unknown_hash')} - result = analysis_plugin.analyze(None, {}, dependencies) - assert result is None + with pytest.raises(AnalysisFailedError, match='No record found'): + analysis_plugin.analyze(None, {}, dependencies) def test_process_object_error(self, analysis_plugin): dependencies = {'file_hashes': HashPlugin.Schema(md5='', sha256='connection_error')} diff --git a/src/plugins/analysis/input_vectors/code/input_vectors.py b/src/plugins/analysis/input_vectors/code/input_vectors.py index 38d6a53ef..eeee07e04 100644 --- a/src/plugins/analysis/input_vectors/code/input_vectors.py +++ b/src/plugins/analysis/input_vectors/code/input_vectors.py @@ -6,7 +6,6 @@ from docker.errors import DockerException from docker.types import Mount from pydantic import BaseModel -from requests.exceptions import ReadTimeout from semver import Version from analysis.plugin import AnalysisPluginV0 @@ -63,6 +62,7 @@ def __init__(self): 'application/x-sharedlib', 'application/x-pie-executable', ], + timeout=TIMEOUT_IN_SECONDS, ), ) @@ -83,15 +83,13 @@ def _run_docker(self, file_handle: FileIO) -> dict: # We explicitly don't want stderr to ignore "Cannot analyse at [...]" combine_stderr_stdout=False, logging_label=self.metadata.name, - timeout=TIMEOUT_IN_SECONDS, + timeout=TIMEOUT_IN_SECONDS - 10, command=CONTAINER_TARGET_PATH, mounts=[ Mount(CONTAINER_TARGET_PATH, file_handle.name, type='bind', read_only=True), ], ) analysis_data = loads(result.stdout)['full'] - except ReadTimeout as err: - raise InputVectorsAnalysisError('Analysis timed out. It might not be complete.') from err except (DockerException, OSError, KeyError) as err: raise InputVectorsAnalysisError('Analysis issues. It might not be complete.') from err except JSONDecodeError as err: diff --git a/src/plugins/analysis/ip_and_uri_finder/code/__init__.py b/src/plugins/analysis/ip_and_uri_finder/code/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/src/plugins/analysis/ip_and_uri_finder/internal/__init__.py b/src/plugins/analysis/ip_and_uri_finder/internal/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/src/plugins/analysis/known_vulnerabilities/code/known_vulnerabilities.py b/src/plugins/analysis/known_vulnerabilities/code/known_vulnerabilities.py index a52e8e052..1cb613c3a 100644 --- a/src/plugins/analysis/known_vulnerabilities/code/known_vulnerabilities.py +++ b/src/plugins/analysis/known_vulnerabilities/code/known_vulnerabilities.py @@ -38,7 +38,7 @@ class Schema(BaseModel): vulnerabilities: List[Vulnerability] def __init__(self): - metadata = AnalysisPluginV0.MetaData( + metadata = self.MetaData( name='known_vulnerabilities', description='Rule based detection of known vulnerabilities like Heartbleed', dependencies=['file_hashes', 'software_components'], diff --git a/src/plugins/analysis/linter/code/source_code_analysis.py b/src/plugins/analysis/linter/code/source_code_analysis.py index cb9b002ee..2469c3f5a 100644 --- a/src/plugins/analysis/linter/code/source_code_analysis.py +++ b/src/plugins/analysis/linter/code/source_code_analysis.py @@ -63,7 +63,7 @@ class Issue(pydantic.BaseModel): def __init__(self): super().__init__( - metadata=AnalysisPluginV0.MetaData( + metadata=self.MetaData( name='source_code_analysis', description='This plugin implements static code analysis for multiple scripting languages', version=Version(0, 7, 3), diff --git a/src/plugins/analysis/software_components/test/test_plugin_software_components.py b/src/plugins/analysis/software_components/test/test_plugin_software_components.py index beb5d8a2d..e79ad87f5 100644 --- a/src/plugins/analysis/software_components/test/test_plugin_software_components.py +++ b/src/plugins/analysis/software_components/test/test_plugin_software_components.py @@ -8,7 +8,7 @@ @pytest.mark.AnalysisPluginTestConfig(plugin_class=AnalysisPlugin) -class TestAnalysisPluginsSoftwareComponents: +class TestAnalysisPluginSoftwareComponents: def test_process_object(self, analysis_plugin): with YARA_TEST_FILE.open('rb') as fp: results = analysis_plugin.analyze(fp, {}, {}) diff --git a/src/plugins/analysis/strings/code/strings.py b/src/plugins/analysis/strings/code/strings.py index 4dceaecaa..355e876f6 100644 --- a/src/plugins/analysis/strings/code/strings.py +++ b/src/plugins/analysis/strings/code/strings.py @@ -55,7 +55,7 @@ def _compile_regexes(self) -> list[tuple[Pattern[bytes], str]]: for regex, encoding in STRING_REGEXES ] - def analyze(self, file_handle: FileIO, virtual_file_path: dict, analyses: dict[str, BaseModel]): + def analyze(self, file_handle: FileIO, virtual_file_path: dict, analyses: dict[str, BaseModel]) -> Schema: del virtual_file_path, analyses return self.Schema( strings=[ diff --git a/src/plugins/analysis/users_and_passwords/code/password_file_analyzer.py b/src/plugins/analysis/users_and_passwords/code/password_file_analyzer.py index 999d8c624..2c40e8ce6 100644 --- a/src/plugins/analysis/users_and_passwords/code/password_file_analyzer.py +++ b/src/plugins/analysis/users_and_passwords/code/password_file_analyzer.py @@ -30,7 +30,7 @@ class Schema(pydantic.BaseModel): def __init__(self): super().__init__( - metadata=AnalysisPluginV0.MetaData( + metadata=self.MetaData( name='users_and_passwords', description=( 'search for UNIX, httpd, and mosquitto password files, parse them and try to crack the passwords' diff --git a/src/plugins/analysis/users_and_passwords/test/test_plugin_password_file_analyzer.py b/src/plugins/analysis/users_and_passwords/test/test_plugin_password_file_analyzer.py index 6817c78f1..85e8a4ecb 100644 --- a/src/plugins/analysis/users_and_passwords/test/test_plugin_password_file_analyzer.py +++ b/src/plugins/analysis/users_and_passwords/test/test_plugin_password_file_analyzer.py @@ -10,7 +10,7 @@ @pytest.mark.AnalysisPluginTestConfig(plugin_class=AnalysisPlugin) -class TestAnalysisPluginPasswordFileAnalyzer: +class TestAnalysisPluginUsersAndPasswords: def test_process_object_shadow_file(self, analysis_plugin): test_file = TEST_DATA_DIR / 'passwd_test' with test_file.open() as fp: diff --git a/src/scheduler/analysis/plugin.py b/src/scheduler/analysis/plugin.py index f14a5857b..aa3bd2769 100644 --- a/src/scheduler/analysis/plugin.py +++ b/src/scheduler/analysis/plugin.py @@ -9,18 +9,19 @@ import signal import time import traceback -import typing +from typing import TYPE_CHECKING, Dict, Type import psutil import pydantic from pydantic import BaseModel, ConfigDict import config -from objects.file import FileObject # noqa: TCH001 # needed by pydantic +from analysis.plugin.plugin import AnalysisFailedError +from objects.file import FileObject # noqa: TCH001 # needed by pydantic from statistic.analysis_stats import ANALYSIS_STATS_LIMIT from storage.fsorganizer import FSOrganizer -if typing.TYPE_CHECKING: +if TYPE_CHECKING: from analysis.plugin import AnalysisPluginV0 @@ -37,11 +38,11 @@ class Task(BaseModel): #: The virtual file path of the file object #: See :py:class:`FileObject`. - virtual_file_path: typing.Dict + virtual_file_path: Dict #: The path of the file on the disk path: str #: A dictionary containing plugin names as keys and their analysis as value. - dependencies: typing.Dict + dependencies: Dict #: The schedulers state associated with the file that is analyzed. #: Here it is just the whole FileObject # We need this because the scheduler is using multiple processes which @@ -57,7 +58,7 @@ def __init__( self, plugin: AnalysisPluginV0, config: Config, - schemata: typing.Dict[str, pydantic.BaseModel], + schemata: Dict[str, Type[pydantic.BaseModel]], ): self._plugin = plugin self._config = config @@ -186,7 +187,7 @@ def run(self): # noqa: C901, PLR0912, PLR0915 def _handle_sigterm(signum, frame): del signum, frame - logging.info(f'{self} received SIGTERM. Shutting down.') + logging.debug(f'{self} received SIGTERM. Shutting down.') nonlocal run nonlocal result run = False @@ -197,8 +198,8 @@ def _handle_sigterm(signum, frame): if not child_process.is_alive(): return - if not recv_conn.poll(Worker.SIGTERM_TIMEOUT): - raise Worker.TimeoutError(Worker.SIGTERM_TIMEOUT) + if not recv_conn.poll(self.SIGTERM_TIMEOUT): + raise self.TimeoutError(self.SIGTERM_TIMEOUT) result = recv_conn.recv() @@ -226,11 +227,13 @@ def _handle_sigterm(signum, frame): child_process.start() # If process crashes without an exception (e.g. SEGFAULT) we will report a timeout if not recv_conn.poll(self._worker_config.timeout): - raise Worker.TimeoutError(self._worker_config.timeout) + raise self.TimeoutError(self._worker_config.timeout) result = recv_conn.recv() if isinstance(result, str): + if result.startswith('Analysis failed'): + raise AnalysisFailedError(result) raise AnalysisExceptionError(result) duration = time.time() - start_time @@ -240,12 +243,14 @@ def _handle_sigterm(signum, frame): if duration > 120: # noqa: PLR2004 logging.info(f'{analysis_description} is slow: took {duration:.1f} seconds') self._update_duration_stats(duration) - except Worker.TimeoutError as err: + except self.TimeoutError as err: logging.warning(f'{analysis_description} timed out after {err.timeout} seconds.') entry['timeout'] = (self._plugin.metadata.name, 'Analysis timed out') - except Worker.CrashedError: + except self.CrashedError: logging.warning(f'{analysis_description} crashed.') entry['exception'] = (self._plugin.metadata.name, 'Analysis crashed') + except AnalysisFailedError as exc: + entry['exception'] = (self._plugin.metadata.name, str(exc)) except AnalysisExceptionError as exc: logging.error(f'{self} got an exception during {analysis_description}: {exc}') entry['exception'] = (self._plugin.metadata.name, 'Exception occurred during analysis') @@ -267,7 +272,7 @@ def _handle_sigterm(signum, frame): def _write_result_in_file_object(self, entry: dict, file_object: FileObject): """Takes a file_object and an entry as it is returned by :py:func:`Worker.run` - and returns a FileObject with the corresponding fileds set. + and returns a FileObject with the corresponding fields set. """ if 'analysis' in entry: file_object.processed_analysis[self._plugin.metadata.name] = entry['analysis'] @@ -284,6 +289,8 @@ def _child_entrypoint(plugin: AnalysisPluginV0, task: PluginRunner.Task, conn: m """ try: result = plugin.get_analysis(io.FileIO(task.path), task.virtual_file_path, task.dependencies) + except AnalysisFailedError as exc: + result = f'Analysis failed: {exc}' except Exception as exc: result = f'{exc}: {traceback.format_exc()}' diff --git a/src/scheduler/analysis/scheduler.py b/src/scheduler/analysis/scheduler.py index ce6140688..9703d4c0e 100644 --- a/src/scheduler/analysis/scheduler.py +++ b/src/scheduler/analysis/scheduler.py @@ -530,9 +530,9 @@ def _result_collector(self, index: int = 0): logging.debug(f'Stopped analysis result collector worker {index}') def _handle_collected_result(self, fo: FileObject, plugin_name: str): + if fo.analysis_exception: + self.task_scheduler.reschedule_failed_analysis_task(fo) if plugin_name in fo.processed_analysis: - if fo.analysis_exception: - self.task_scheduler.reschedule_failed_analysis_task(fo) self.status.add_analysis(fo, plugin_name) self.post_analysis(fo.uid, plugin_name, fo.processed_analysis[plugin_name]) self._check_further_process_or_complete(fo) diff --git a/src/test/integration/scheduler/test_analysis_fail.py b/src/test/integration/scheduler/test_analysis_fail.py new file mode 100644 index 000000000..243f373ea --- /dev/null +++ b/src/test/integration/scheduler/test_analysis_fail.py @@ -0,0 +1,45 @@ +from tempfile import NamedTemporaryFile + +import pytest + +from objects.firmware import Firmware + +REGULAR_RESULT = { + 'dependant_analysis': {'full': 'ASCII text, with no line terminators', 'mime': 'text/plain'}, + 'first_byte': '6e', + 'name': 'b55f174c36fd4f56ecc04931099300014d4014377c73af1fa433e258a9b38604_14', + 'number': 42, + 'virtual_file_path': {}, +} + + +@pytest.mark.parametrize( + ('content', 'expected_result'), + [ + (b'normal content', REGULAR_RESULT), + (b'\xeeException', {'failed': 'Exception occurred during analysis'}), + (b'\xffFailed', {'failed': 'Analysis failed: reason for fail'}), + ], +) +def test_analysis_fail(content, expected_result, analysis_scheduler, post_analysis_queue): + with NamedTemporaryFile() as tmp_file: + tmp_file.write(content) + tmp_file.flush() + test_fw = Firmware(file_path=tmp_file.name) + test_fw.release_date = '1970-01-01' + test_fw.scheduled_analysis = ['ExamplePlugin'] + + analysis_scheduler.start_analysis_of_object(test_fw) + + processed_container = {} + for _ in range(3): # container with 3 included files times 2 mandatory plugins run + uid, plugin, analysis_result = post_analysis_queue.get(timeout=3) + processed_container.setdefault(uid, {}).setdefault(plugin, {}) + processed_container[uid][plugin] = analysis_result + + assert len(processed_container) == 1, '1 files should have been analyzed' + assert all( + set(processed_analysis) == {'ExamplePlugin', 'file_hashes', 'file_type'} + for processed_analysis in processed_container.values() + ), 'at least one analysis not done' + assert processed_container[test_fw.uid]['ExamplePlugin']['result'] == expected_result diff --git a/src/test/unit/analysis/test_plugin_worker.py b/src/test/unit/analysis/test_plugin_worker.py new file mode 100644 index 000000000..67deeb905 --- /dev/null +++ b/src/test/unit/analysis/test_plugin_worker.py @@ -0,0 +1,113 @@ +import ctypes +from multiprocessing import Array, Queue, Value +from pathlib import Path +from tempfile import NamedTemporaryFile +from time import sleep + +from pydantic import BaseModel +from semver import Version + +from analysis.plugin import AnalysisFailedError, AnalysisPluginV0 +from scheduler.analysis.plugin import PluginRunner, Worker +from test.common_helper import create_test_file_object + + +class NormalPlugin(AnalysisPluginV0): + class Schema(BaseModel): + foo: str + + def __init__(self): + metadata = self.MetaData( + name=self.__class__.__name__, + description='', + Schema=self.Schema, + version=Version(0, 1, 0), + ) + super().__init__(metadata) + + def analyze(self, file_handle, virtual_file_path, analyses): + return self.Schema(foo='foo') + + +def _get_worker(plugin, timeout=5): + in_queue, out_queue = Queue(), Queue() + stats = Array(ctypes.c_float, 10) + stats_count = Value('i', 0) + stats_index = Value('i', 0) + worker_config = Worker.Config(timeout=timeout) + worker = Worker(plugin, worker_config, in_queue, out_queue, stats, stats_count, stats_index) + worker.SIGTERM_TIMEOUT = 0.1 + return worker + + +def _run_worker(worker: Worker): + with NamedTemporaryFile() as temp_file: + Path(temp_file.name).write_text('foo') + test_fo = create_test_file_object() + task = PluginRunner.Task(virtual_file_path={}, path=temp_file.name, dependencies={}, scheduler_state=test_fo) + worker._in_queue.put(task) + worker.start() + try: + return worker._out_queue.get(timeout=10) + finally: + worker.terminate() + + +def test_worker(): + plugin = NormalPlugin() + worker = _get_worker(plugin) + output_fo = _run_worker(worker) + + assert plugin.metadata.name in output_fo.processed_analysis + assert output_fo.processed_analysis[plugin.metadata.name]['result'] == {'foo': 'foo'} + + +class TimeoutPlugin(NormalPlugin): + def analyze(self, file_handle, virtual_file_path, analyses): + sleep(10) + + +def test_worker_timeout(): + plugin = TimeoutPlugin() + worker = _get_worker(plugin, timeout=1) + output_fo = _run_worker(worker) + + analysis_exception = getattr(output_fo, 'analysis_exception', None) + assert analysis_exception is not None + plugin_name, error = analysis_exception + assert plugin_name == plugin.metadata.name + assert error == 'Analysis timed out' + + +class ExceptionPlugin(NormalPlugin): + def analyze(self, file_handle, virtual_file_path, analyses): + raise Exception('unknown exception') + + +def test_worker_exception(): + plugin = ExceptionPlugin() + worker = _get_worker(plugin) + output_fo = _run_worker(worker) + + analysis_exception = getattr(output_fo, 'analysis_exception', None) + assert analysis_exception is not None + plugin_name, error = analysis_exception + assert plugin_name == plugin.metadata.name + assert error == 'Exception occurred during analysis' + + +class FailPlugin(NormalPlugin): + def analyze(self, file_handle, virtual_file_path, analyses): + raise AnalysisFailedError('reason') + + +def test_worker_failed(): + plugin = FailPlugin() + worker = _get_worker(plugin) + output_fo = _run_worker(worker) + + analysis_exception = getattr(output_fo, 'analysis_exception', None) + assert analysis_exception is not None + plugin_name, error = analysis_exception + assert plugin_name == plugin.metadata.name + assert error == 'Analysis failed: reason'