diff --git a/.python-version b/.python-version index 424e179..2c07333 100644 --- a/.python-version +++ b/.python-version @@ -1 +1 @@ -3.6.8 +3.11 diff --git a/.travis.yml b/.travis.yml index 1ef825f..da12660 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,9 +1,11 @@ language: python +dist: jammy python: - - "3.6" + - "3.11" # command to install dependencies install: - make dev # command to run tests script: - make tests + - make coverage diff --git a/Makefile b/Makefile index b16471c..4e7b15f 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,9 @@ PYTHON=venv/bin/python3 PIP=venv/bin/pip -NOSE=venv/bin/nosetests +COVERAGE=venv/bin/coverage +TEST_RUNNER=venv/bin/pytest +TEST_RUNNER_FLAGS=-s --durations=3 --durations-min=0.005 FLAKE=venv/bin/flake8 PYPICLOUD_HOST=pypicloud.getkeepsafe.local PIP_ARGS=--extra-index=http://$(PYPICLOUD_HOST)/simple/ --trusted-host $(PYPICLOUD_HOST) @@ -30,14 +32,16 @@ flake: $(FLAKE) validator tests test: flake - $(NOSE) -s $(FLAGS) + $(COVERAGE) run -m pytest $(TEST_RUNNER_FLAGS) vtest: - $(NOSE) -s -v $(FLAGS) + $(COVERAGE) run -m pytest -v $(TEST_RUNNER_FLAGS) + +testloop: + while sleep 1; do $(TEST_RUNNER) -s --lf $(TEST_RUNNER_FLAGS); done cov cover coverage: - $(NOSE) -s --with-cover --cover-html --cover-html-dir ./coverage $(FLAGS) - echo "open file://`pwd`/coverage/index.html" + $(COVERAGE) report -m clean: rm -rf `find . -name __pycache__` diff --git a/setup.cfg b/setup.cfg index 0945a29..15ae1fa 100644 --- a/setup.cfg +++ b/setup.cfg @@ -7,3 +7,9 @@ ignore = F403 [pep8] max-line-length = 120 + +[coverage:run] +branch = True + +[coverage:report] +fail_under = 96 diff --git a/setup.py b/setup.py index 6f1ddb5..bd913c5 100644 --- a/setup.py +++ b/setup.py @@ -1,8 +1,7 @@ import os from setuptools import setup, find_packages - -version = '0.7.2' +version = '1.0.0' def read(f): @@ -10,18 +9,18 @@ def read(f): install_requires = [ - 'sdiff @ git+https://github.com/KeepSafe/html-structure-diff.git@0.4.1#egg=sdiff', - 'aiohttp >=3, <3.4', + 'sdiff @ git+https://github.com/KeepSafe/html-structure-diff.git@1.0.0#egg=sdiff', + 'aiohttp==3.8.5', 'Markdown', 'parse <= 1.8.2', 'beautifulsoup4 >=4, <5', - 'lxml >=3', + 'lxml<5', ] tests_require = [ - 'nose', - 'flake8==3.6.0', - 'coverage', + 'pytest >= 8', + 'coverage==7.6.1', + 'flake8==7.1.1', ] devtools_require = [ @@ -32,6 +31,7 @@ def read(f): setup( name='content-validator', version=version, + python_requires='>=3.11', description=('Content validator looks at text content and preforms different validation tasks'), classifiers=[ 'License :: OSI Approved :: BSD License', 'Intended Audience :: Developers', 'Programming Language :: Python' diff --git a/tests/utils.py b/tests/utils.py index 3948220..e3aa96c 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -1,4 +1,3 @@ - def read(path): with open(path) as fp: return fp.read() diff --git a/validator/__init__.py b/validator/__init__.py index c7301e4..42d7083 100644 --- a/validator/__init__.py +++ b/validator/__init__.py @@ -3,7 +3,7 @@ from . import parsers, checks, reports, fs -class Validator(object): +class Validator: def __init__(self, contents, parser, reader, check, reporter=None): self.contents = contents self.parser = parser @@ -24,7 +24,7 @@ async def async_validate(self): return errors -class ReportBuilder(object): +class ReportBuilder: def __init__(self, contents, parser, reader, check): self.contents = contents self.parser = parser @@ -49,7 +49,7 @@ def validate(self): return Validator(self.contents, self.parser, self.reader, self.check, reporter).validate() -class CheckBuilder(object): +class CheckBuilder: def __init__(self, contents, content_type, parser, reader): self.contents = contents self.content_type = content_type @@ -89,7 +89,7 @@ async def async_validate(self): return res -class ParserBuilder(object): +class ParserBuilder: def __init__(self, contents, reader=None): self.contents = contents self.content_type = 'txt' @@ -120,7 +120,7 @@ def check(self): return CheckBuilder(self.contents, self.content_type, parser, self.reader) -class ContentBuilder(object): +class ContentBuilder: def files(self, pattern, **kwargs): contents = fs.files(pattern, **kwargs) return ParserBuilder(contents, parsers.FileReader()) diff --git a/validator/checks/__init__.py b/validator/checks/__init__.py index 9dc692f..61861f2 100644 --- a/validator/checks/__init__.py +++ b/validator/checks/__init__.py @@ -1,5 +1,3 @@ -from typing import Type - from sdiff import MdParser from .md import MarkdownComparator @@ -21,7 +19,7 @@ def url_occurences(filetype): return UrlOccurenciesValidator() -def markdown(filetype, md_parser_cls: Type[MdParser] = MdParser): +def markdown(filetype, md_parser_cls: type[MdParser] = MdParser): if filetype not in ['txt', 'html']: raise UndefinedCheckTypeError('got filetype %s' % filetype) return MarkdownComparator(md_parser_cls) @@ -33,7 +31,7 @@ def java_args(filetype): return JavaComparator() -class ChainCheck(object): +class ChainCheck: def __init__(self, checks): self.checks = checks diff --git a/validator/checks/java.py b/validator/checks/java.py index 319ec2d..70cfd0d 100644 --- a/validator/checks/java.py +++ b/validator/checks/java.py @@ -6,7 +6,7 @@ REF_PATTERN = r'@string/\w+' -class JavaComparator(object): +class JavaComparator: def _get_args(self, content): return re.findall(ARG_PATTERN, content) diff --git a/validator/checks/md.py b/validator/checks/md.py index d8542a8..70e6988 100644 --- a/validator/checks/md.py +++ b/validator/checks/md.py @@ -1,5 +1,4 @@ import re -from typing import Type from sdiff import diff, renderer, MdParser from markdown import markdown @@ -14,8 +13,8 @@ def save_file(content, filename): fp.write(content) -class MarkdownComparator(object): - def __init__(self, md_parser_cls: Type[MdParser] = MdParser): +class MarkdownComparator: + def __init__(self, md_parser_cls: type[MdParser] = MdParser): self._md_parser_cls = md_parser_cls def check(self, data, parser, reader): diff --git a/validator/checks/url.py b/validator/checks/url.py index dabd816..263a532 100644 --- a/validator/checks/url.py +++ b/validator/checks/url.py @@ -5,7 +5,6 @@ import string from bs4 import BeautifulSoup from urllib.parse import urlparse, urljoin -from typing import List, Optional from ..errors import UrlDiff, UrlOccurencyDiff @@ -23,7 +22,7 @@ class MissingUrlExtractorError(Exception): # the job of extractors is to find all non-parametrized urls in the given text for later checks via UrlValidator # which examines is particular url leads to working webpage (200 status) # since we are interested in all urls (including parametrized) we need to sligthly change their API and behaviour -class TextUrlExtractor(object): +class TextUrlExtractor: def __init__(self, **kwargs): pass @@ -60,12 +59,12 @@ def _validate_email(self, email): return False def _extract_from_anchors(self, soup): - return set([a.get('href') or a.text for a in soup.find_all('a')]) + return {a.get('href') or a.text for a in soup.find_all('a')} def _extract_from_img(self, soup): if self.skip_images: return set() - return set([img.get('src') for img in soup.find_all('img')]) + return {img.get('src') for img in soup.find_all('img')} def _fix_url(self, url): result = '' @@ -82,7 +81,7 @@ def _fix_url(self, url): if re.match(self.url_pattern, full_url): result = full_url else: - logging.error('{} not tested'.format(url_parsed.geturl())) + logging.error(f'{url_parsed.geturl()} not tested') return result def extract_urls(self, content, keep_placeholders=False): @@ -96,20 +95,20 @@ def extract_urls(self, content, keep_placeholders=False): return result -class UrlStatusChecker(object): +class UrlStatusChecker: retry_max_count = 3 - def __init__(self, headers=None, exclude_urls_regexs: Optional[List[str]] = None): + def __init__(self, headers=None, exclude_urls_regexs: list[str] | None = None): self._exclude_urls_regex = exclude_urls_regexs or [] if self._exclude_urls_regex: - logging.warning('Excluded urls regexps: {}'.format(self._exclude_urls_regex)) + logging.warning(f'Excluded urls regexps: {self._exclude_urls_regex}') self._headers = headers or {} if 'User-Agent' not in self._headers: self._headers['User-Agent'] = DEFAULT_USER_AGENT async def _make_request(self, url): try: - logging.info('checking {}'.format(url)) + logging.info(f'checking {url}') async with aiohttp.request('get', url, headers=self._headers, allow_redirects=True) as res: return res.status except Exception: @@ -143,7 +142,7 @@ async def _check_urls_coro(self, urls, future): if not is_exluded: urls_without_excluded.append(url) else: - logging.warning('url {} excluded from status check'.format(url.url)) + logging.warning(f'url {url.url} excluded from status check') tasks = [self._request_status_code(url.url) for url in urls_without_excluded] results = await asyncio.gather(*tasks) for index, url in enumerate(urls_without_excluded): @@ -167,10 +166,10 @@ async def async_check(self, urls): return future.result() -class UrlValidator(object): +class UrlValidator: _extractors = {'txt': TextUrlExtractor, 'html': HtmlUrlExtractor} - def __init__(self, filetype, headers=None, exclude_status_check_regexs: Optional[List[str]] = None, **kwargs): + def __init__(self, filetype, headers=None, exclude_status_check_regexs: list[str] | None = None, **kwargs): self.client_headers = headers or {} self._excluded_status_check_regexs = exclude_status_check_regexs or [] extractor_class = self._extractors.get(filetype) @@ -179,7 +178,7 @@ def __init__(self, filetype, headers=None, exclude_status_check_regexs: Optional self.extractor = extractor_class(**kwargs) def _get_urls(self, data, parser, reader): - flat_data = set(p for sublist in data for p in sublist) + flat_data = {p for sublist in data for p in sublist} # TODO yield instead urls = {} for element in flat_data: diff --git a/validator/errors.py b/validator/errors.py index 082885a..9972f88 100644 --- a/validator/errors.py +++ b/validator/errors.py @@ -1,7 +1,7 @@ from collections import namedtuple -class UrlDiff(object): +class UrlDiff: def __init__(self, url, files=None, status_code=200, has_disallowed_chars=False): self.url = url @@ -10,7 +10,7 @@ def __init__(self, url, files=None, status_code=200, has_disallowed_chars=False) self.has_disallowed_chars = has_disallowed_chars def __str__(self): - return 'Url(%s, %s, %s, %s)' % (self.url, self.files, self.status_code, self.has_disallowed_chars) + return 'Url({}, {}, {}, {})'.format(self.url, self.files, self.status_code, self.has_disallowed_chars) def __repr__(self): return 'Url: %s' % self.url @@ -37,7 +37,7 @@ def is_valid(self): ContentData.__new__.__defaults__ = ('', ) * 2 -class MdDiff(object): +class MdDiff: def __init__(self, base, other, error_msgs): self.base = base diff --git a/validator/fs.py b/validator/fs.py index b87808d..6e12b16 100644 --- a/validator/fs.py +++ b/validator/fs.py @@ -91,10 +91,10 @@ def files(pattern, **kwargs): [[Path(path/to1/file1.txt), Path(path/to1/file2.txt)], [Path(path/to2/file1.txt), Path(path/to2/file2.txt)]] """ # extract named parameters from the pattern - params = set([p for p in map(lambda e: e[1], Formatter().parse(pattern)) if p]) + params = {p for p in map(lambda e: e[1], Formatter().parse(pattern)) if p} if params: if len(params - kwargs.keys()) > 0: - raise ValueError('missing parameters {} for pattern {}'.format(params - kwargs.keys(), pattern)) + raise ValueError(f'missing parameters {params - kwargs.keys()} for pattern {pattern}') return _params_pattern(pattern, params, **kwargs) else: return _no_params_pattern(pattern) diff --git a/validator/parsers.py b/validator/parsers.py index 379902b..3c87b89 100644 --- a/validator/parsers.py +++ b/validator/parsers.py @@ -9,22 +9,22 @@ def __init__(self, msg): super().__init__(msg) -class FileReader(object): +class FileReader: def read(self, path): return read_content(path) -class TxtReader(object): +class TxtReader: def read(self, content): return content -class MarkdownParser(object): +class MarkdownParser: def parse(self, content): return markdown.markdown(content) -class XmlParser(object): +class XmlParser: def __init__(self, query='*'): self.query = query @@ -38,12 +38,12 @@ def parse(self, content): return '\n\n'.join(texts) -class CsvParser(object): +class CsvParser: def parse(self, content): return '\n'.join(content.split(',')) -class ChainParser(object): +class ChainParser: def __init__(self, parsers): self.parsers = parsers diff --git a/validator/reports.py b/validator/reports.py index c052e62..8100963 100644 --- a/validator/reports.py +++ b/validator/reports.py @@ -6,7 +6,7 @@ from .errors import UrlDiff, MdDiff, UrlOccurencyDiff -class HtmlReporter(object): +class HtmlReporter: report_template = """ @@ -82,12 +82,12 @@ def __init__(self, output_directory='errors'): self.output_directory = output_directory def _add_content(self, soup, tag_id, content): - tags = soup.select('#{}'.format(tag_id)) + tags = soup.select(f'#{tag_id}') if tags and content: tags[0].append(content) else: - print('missing tag: %s, content %s' % (tag_id, content)) + print('missing tag: {}, content {}'.format(tag_id, content)) return soup # TODO just rewrite !!! @@ -99,7 +99,7 @@ def report(self, errors): # TODO use mustache for templates report_soup = BeautifulSoup(self.report_template, 'lxml') if isinstance(error, UrlDiff): - messages = ['{} returned with code {}'.format(error.url, error.status_code)] + messages = [f'{error.url} returned with code {error.status_code}'] self._add_content(report_soup, 'urls', '\n'.join(messages)) if isinstance(error, MdDiff): error_msgs = '
'.join(map(lambda i: str(i), error.error_msgs)) @@ -113,20 +113,20 @@ def report(self, errors): save_report(self.output_directory, error.other.original, report_soup.prettify()) -class ConsoleReporter(object): +class ConsoleReporter: def report(self, errors): for error in errors: if isinstance(error, UrlDiff): - print('{} returned with code {}'.format(error.url, error.status_code)) + print(f'{error.url} returned with code {error.status_code}') for path in error.files: - print('\t{}'.format(str(path))) + print(f'\t{str(path)}') print() if isinstance(error, MdDiff): - print('Files are different:\n\t{}\n\t{}\n\n'.format(str(error.base), str(error.other))) + print(f'Files are different:\n\t{str(error.base)}\n\t{str(error.other)}\n\n') -class StoreReporter(object): +class StoreReporter: def __init__(self): self.log = [] @@ -134,16 +134,16 @@ def __init__(self): def report(self, errors): for error in errors: if isinstance(error, UrlDiff): - self.log.append('%s returned with code %s for files' % (error.url, error.status_code)) + self.log.append('{} returned with code {} for files'.format(error.url, error.status_code)) for path in error.files: self.log.append('\t%s' % str(path)) if isinstance(error, MdDiff): - self.log.append('Files are different:\n\t%s\n\t%s\n\n' % (str(error.base), str(error.other))) + self.log.append('Files are different:\n\t{}\n\t{}\n\n'.format(str(error.base), str(error.other))) if isinstance(error, UrlOccurencyDiff): - self.log.append('Count of URLS in %s and %s are different' % (error.base_path, error.translation_path)) + self.log.append(f'Count of URLS in {error.base_path} and {error.translation_path} are different') -class ChainReporter(object): +class ChainReporter: def __init__(self, reporters): self.reporters = reporters