diff --git a/.python-version b/.python-version
index 424e179..2c07333 100644
--- a/.python-version
+++ b/.python-version
@@ -1 +1 @@
-3.6.8
+3.11
diff --git a/.travis.yml b/.travis.yml
index 1ef825f..da12660 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -1,9 +1,11 @@
language: python
+dist: jammy
python:
- - "3.6"
+ - "3.11"
# command to install dependencies
install:
- make dev
# command to run tests
script:
- make tests
+ - make coverage
diff --git a/Makefile b/Makefile
index b16471c..4e7b15f 100644
--- a/Makefile
+++ b/Makefile
@@ -2,7 +2,9 @@
PYTHON=venv/bin/python3
PIP=venv/bin/pip
-NOSE=venv/bin/nosetests
+COVERAGE=venv/bin/coverage
+TEST_RUNNER=venv/bin/pytest
+TEST_RUNNER_FLAGS=-s --durations=3 --durations-min=0.005
FLAKE=venv/bin/flake8
PYPICLOUD_HOST=pypicloud.getkeepsafe.local
PIP_ARGS=--extra-index=http://$(PYPICLOUD_HOST)/simple/ --trusted-host $(PYPICLOUD_HOST)
@@ -30,14 +32,16 @@ flake:
$(FLAKE) validator tests
test: flake
- $(NOSE) -s $(FLAGS)
+ $(COVERAGE) run -m pytest $(TEST_RUNNER_FLAGS)
vtest:
- $(NOSE) -s -v $(FLAGS)
+ $(COVERAGE) run -m pytest -v $(TEST_RUNNER_FLAGS)
+
+testloop:
+ while sleep 1; do $(TEST_RUNNER) -s --lf $(TEST_RUNNER_FLAGS); done
cov cover coverage:
- $(NOSE) -s --with-cover --cover-html --cover-html-dir ./coverage $(FLAGS)
- echo "open file://`pwd`/coverage/index.html"
+ $(COVERAGE) report -m
clean:
rm -rf `find . -name __pycache__`
diff --git a/setup.cfg b/setup.cfg
index 0945a29..15ae1fa 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -7,3 +7,9 @@ ignore = F403
[pep8]
max-line-length = 120
+
+[coverage:run]
+branch = True
+
+[coverage:report]
+fail_under = 96
diff --git a/setup.py b/setup.py
index 6f1ddb5..bd913c5 100644
--- a/setup.py
+++ b/setup.py
@@ -1,8 +1,7 @@
import os
from setuptools import setup, find_packages
-
-version = '0.7.2'
+version = '1.0.0'
def read(f):
@@ -10,18 +9,18 @@ def read(f):
install_requires = [
- 'sdiff @ git+https://github.com/KeepSafe/html-structure-diff.git@0.4.1#egg=sdiff',
- 'aiohttp >=3, <3.4',
+ 'sdiff @ git+https://github.com/KeepSafe/html-structure-diff.git@1.0.0#egg=sdiff',
+ 'aiohttp==3.8.5',
'Markdown',
'parse <= 1.8.2',
'beautifulsoup4 >=4, <5',
- 'lxml >=3',
+ 'lxml<5',
]
tests_require = [
- 'nose',
- 'flake8==3.6.0',
- 'coverage',
+ 'pytest >= 8',
+ 'coverage==7.6.1',
+ 'flake8==7.1.1',
]
devtools_require = [
@@ -32,6 +31,7 @@ def read(f):
setup(
name='content-validator',
version=version,
+ python_requires='>=3.11',
description=('Content validator looks at text content and preforms different validation tasks'),
classifiers=[
'License :: OSI Approved :: BSD License', 'Intended Audience :: Developers', 'Programming Language :: Python'
diff --git a/tests/utils.py b/tests/utils.py
index 3948220..e3aa96c 100644
--- a/tests/utils.py
+++ b/tests/utils.py
@@ -1,4 +1,3 @@
-
def read(path):
with open(path) as fp:
return fp.read()
diff --git a/validator/__init__.py b/validator/__init__.py
index c7301e4..42d7083 100644
--- a/validator/__init__.py
+++ b/validator/__init__.py
@@ -3,7 +3,7 @@
from . import parsers, checks, reports, fs
-class Validator(object):
+class Validator:
def __init__(self, contents, parser, reader, check, reporter=None):
self.contents = contents
self.parser = parser
@@ -24,7 +24,7 @@ async def async_validate(self):
return errors
-class ReportBuilder(object):
+class ReportBuilder:
def __init__(self, contents, parser, reader, check):
self.contents = contents
self.parser = parser
@@ -49,7 +49,7 @@ def validate(self):
return Validator(self.contents, self.parser, self.reader, self.check, reporter).validate()
-class CheckBuilder(object):
+class CheckBuilder:
def __init__(self, contents, content_type, parser, reader):
self.contents = contents
self.content_type = content_type
@@ -89,7 +89,7 @@ async def async_validate(self):
return res
-class ParserBuilder(object):
+class ParserBuilder:
def __init__(self, contents, reader=None):
self.contents = contents
self.content_type = 'txt'
@@ -120,7 +120,7 @@ def check(self):
return CheckBuilder(self.contents, self.content_type, parser, self.reader)
-class ContentBuilder(object):
+class ContentBuilder:
def files(self, pattern, **kwargs):
contents = fs.files(pattern, **kwargs)
return ParserBuilder(contents, parsers.FileReader())
diff --git a/validator/checks/__init__.py b/validator/checks/__init__.py
index 9dc692f..61861f2 100644
--- a/validator/checks/__init__.py
+++ b/validator/checks/__init__.py
@@ -1,5 +1,3 @@
-from typing import Type
-
from sdiff import MdParser
from .md import MarkdownComparator
@@ -21,7 +19,7 @@ def url_occurences(filetype):
return UrlOccurenciesValidator()
-def markdown(filetype, md_parser_cls: Type[MdParser] = MdParser):
+def markdown(filetype, md_parser_cls: type[MdParser] = MdParser):
if filetype not in ['txt', 'html']:
raise UndefinedCheckTypeError('got filetype %s' % filetype)
return MarkdownComparator(md_parser_cls)
@@ -33,7 +31,7 @@ def java_args(filetype):
return JavaComparator()
-class ChainCheck(object):
+class ChainCheck:
def __init__(self, checks):
self.checks = checks
diff --git a/validator/checks/java.py b/validator/checks/java.py
index 319ec2d..70cfd0d 100644
--- a/validator/checks/java.py
+++ b/validator/checks/java.py
@@ -6,7 +6,7 @@
REF_PATTERN = r'@string/\w+'
-class JavaComparator(object):
+class JavaComparator:
def _get_args(self, content):
return re.findall(ARG_PATTERN, content)
diff --git a/validator/checks/md.py b/validator/checks/md.py
index d8542a8..70e6988 100644
--- a/validator/checks/md.py
+++ b/validator/checks/md.py
@@ -1,5 +1,4 @@
import re
-from typing import Type
from sdiff import diff, renderer, MdParser
from markdown import markdown
@@ -14,8 +13,8 @@ def save_file(content, filename):
fp.write(content)
-class MarkdownComparator(object):
- def __init__(self, md_parser_cls: Type[MdParser] = MdParser):
+class MarkdownComparator:
+ def __init__(self, md_parser_cls: type[MdParser] = MdParser):
self._md_parser_cls = md_parser_cls
def check(self, data, parser, reader):
diff --git a/validator/checks/url.py b/validator/checks/url.py
index dabd816..263a532 100644
--- a/validator/checks/url.py
+++ b/validator/checks/url.py
@@ -5,7 +5,6 @@
import string
from bs4 import BeautifulSoup
from urllib.parse import urlparse, urljoin
-from typing import List, Optional
from ..errors import UrlDiff, UrlOccurencyDiff
@@ -23,7 +22,7 @@ class MissingUrlExtractorError(Exception):
# the job of extractors is to find all non-parametrized urls in the given text for later checks via UrlValidator
# which examines is particular url leads to working webpage (200 status)
# since we are interested in all urls (including parametrized) we need to sligthly change their API and behaviour
-class TextUrlExtractor(object):
+class TextUrlExtractor:
def __init__(self, **kwargs):
pass
@@ -60,12 +59,12 @@ def _validate_email(self, email):
return False
def _extract_from_anchors(self, soup):
- return set([a.get('href') or a.text for a in soup.find_all('a')])
+ return {a.get('href') or a.text for a in soup.find_all('a')}
def _extract_from_img(self, soup):
if self.skip_images:
return set()
- return set([img.get('src') for img in soup.find_all('img')])
+ return {img.get('src') for img in soup.find_all('img')}
def _fix_url(self, url):
result = ''
@@ -82,7 +81,7 @@ def _fix_url(self, url):
if re.match(self.url_pattern, full_url):
result = full_url
else:
- logging.error('{} not tested'.format(url_parsed.geturl()))
+ logging.error(f'{url_parsed.geturl()} not tested')
return result
def extract_urls(self, content, keep_placeholders=False):
@@ -96,20 +95,20 @@ def extract_urls(self, content, keep_placeholders=False):
return result
-class UrlStatusChecker(object):
+class UrlStatusChecker:
retry_max_count = 3
- def __init__(self, headers=None, exclude_urls_regexs: Optional[List[str]] = None):
+ def __init__(self, headers=None, exclude_urls_regexs: list[str] | None = None):
self._exclude_urls_regex = exclude_urls_regexs or []
if self._exclude_urls_regex:
- logging.warning('Excluded urls regexps: {}'.format(self._exclude_urls_regex))
+ logging.warning(f'Excluded urls regexps: {self._exclude_urls_regex}')
self._headers = headers or {}
if 'User-Agent' not in self._headers:
self._headers['User-Agent'] = DEFAULT_USER_AGENT
async def _make_request(self, url):
try:
- logging.info('checking {}'.format(url))
+ logging.info(f'checking {url}')
async with aiohttp.request('get', url, headers=self._headers, allow_redirects=True) as res:
return res.status
except Exception:
@@ -143,7 +142,7 @@ async def _check_urls_coro(self, urls, future):
if not is_exluded:
urls_without_excluded.append(url)
else:
- logging.warning('url {} excluded from status check'.format(url.url))
+ logging.warning(f'url {url.url} excluded from status check')
tasks = [self._request_status_code(url.url) for url in urls_without_excluded]
results = await asyncio.gather(*tasks)
for index, url in enumerate(urls_without_excluded):
@@ -167,10 +166,10 @@ async def async_check(self, urls):
return future.result()
-class UrlValidator(object):
+class UrlValidator:
_extractors = {'txt': TextUrlExtractor, 'html': HtmlUrlExtractor}
- def __init__(self, filetype, headers=None, exclude_status_check_regexs: Optional[List[str]] = None, **kwargs):
+ def __init__(self, filetype, headers=None, exclude_status_check_regexs: list[str] | None = None, **kwargs):
self.client_headers = headers or {}
self._excluded_status_check_regexs = exclude_status_check_regexs or []
extractor_class = self._extractors.get(filetype)
@@ -179,7 +178,7 @@ def __init__(self, filetype, headers=None, exclude_status_check_regexs: Optional
self.extractor = extractor_class(**kwargs)
def _get_urls(self, data, parser, reader):
- flat_data = set(p for sublist in data for p in sublist)
+ flat_data = {p for sublist in data for p in sublist}
# TODO yield instead
urls = {}
for element in flat_data:
diff --git a/validator/errors.py b/validator/errors.py
index 082885a..9972f88 100644
--- a/validator/errors.py
+++ b/validator/errors.py
@@ -1,7 +1,7 @@
from collections import namedtuple
-class UrlDiff(object):
+class UrlDiff:
def __init__(self, url, files=None, status_code=200, has_disallowed_chars=False):
self.url = url
@@ -10,7 +10,7 @@ def __init__(self, url, files=None, status_code=200, has_disallowed_chars=False)
self.has_disallowed_chars = has_disallowed_chars
def __str__(self):
- return 'Url(%s, %s, %s, %s)' % (self.url, self.files, self.status_code, self.has_disallowed_chars)
+ return 'Url({}, {}, {}, {})'.format(self.url, self.files, self.status_code, self.has_disallowed_chars)
def __repr__(self):
return 'Url: %s' % self.url
@@ -37,7 +37,7 @@ def is_valid(self):
ContentData.__new__.__defaults__ = ('', ) * 2
-class MdDiff(object):
+class MdDiff:
def __init__(self, base, other, error_msgs):
self.base = base
diff --git a/validator/fs.py b/validator/fs.py
index b87808d..6e12b16 100644
--- a/validator/fs.py
+++ b/validator/fs.py
@@ -91,10 +91,10 @@ def files(pattern, **kwargs):
[[Path(path/to1/file1.txt), Path(path/to1/file2.txt)], [Path(path/to2/file1.txt), Path(path/to2/file2.txt)]]
"""
# extract named parameters from the pattern
- params = set([p for p in map(lambda e: e[1], Formatter().parse(pattern)) if p])
+ params = {p for p in map(lambda e: e[1], Formatter().parse(pattern)) if p}
if params:
if len(params - kwargs.keys()) > 0:
- raise ValueError('missing parameters {} for pattern {}'.format(params - kwargs.keys(), pattern))
+ raise ValueError(f'missing parameters {params - kwargs.keys()} for pattern {pattern}')
return _params_pattern(pattern, params, **kwargs)
else:
return _no_params_pattern(pattern)
diff --git a/validator/parsers.py b/validator/parsers.py
index 379902b..3c87b89 100644
--- a/validator/parsers.py
+++ b/validator/parsers.py
@@ -9,22 +9,22 @@ def __init__(self, msg):
super().__init__(msg)
-class FileReader(object):
+class FileReader:
def read(self, path):
return read_content(path)
-class TxtReader(object):
+class TxtReader:
def read(self, content):
return content
-class MarkdownParser(object):
+class MarkdownParser:
def parse(self, content):
return markdown.markdown(content)
-class XmlParser(object):
+class XmlParser:
def __init__(self, query='*'):
self.query = query
@@ -38,12 +38,12 @@ def parse(self, content):
return '\n\n'.join(texts)
-class CsvParser(object):
+class CsvParser:
def parse(self, content):
return '\n'.join(content.split(','))
-class ChainParser(object):
+class ChainParser:
def __init__(self, parsers):
self.parsers = parsers
diff --git a/validator/reports.py b/validator/reports.py
index c052e62..8100963 100644
--- a/validator/reports.py
+++ b/validator/reports.py
@@ -6,7 +6,7 @@
from .errors import UrlDiff, MdDiff, UrlOccurencyDiff
-class HtmlReporter(object):
+class HtmlReporter:
report_template = """
@@ -82,12 +82,12 @@ def __init__(self, output_directory='errors'):
self.output_directory = output_directory
def _add_content(self, soup, tag_id, content):
- tags = soup.select('#{}'.format(tag_id))
+ tags = soup.select(f'#{tag_id}')
if tags and content:
tags[0].append(content)
else:
- print('missing tag: %s, content %s' % (tag_id, content))
+ print('missing tag: {}, content {}'.format(tag_id, content))
return soup
# TODO just rewrite !!!
@@ -99,7 +99,7 @@ def report(self, errors):
# TODO use mustache for templates
report_soup = BeautifulSoup(self.report_template, 'lxml')
if isinstance(error, UrlDiff):
- messages = ['{} returned with code {}'.format(error.url, error.status_code)]
+ messages = [f'{error.url} returned with code {error.status_code}']
self._add_content(report_soup, 'urls', '\n'.join(messages))
if isinstance(error, MdDiff):
error_msgs = '
'.join(map(lambda i: str(i), error.error_msgs))
@@ -113,20 +113,20 @@ def report(self, errors):
save_report(self.output_directory, error.other.original, report_soup.prettify())
-class ConsoleReporter(object):
+class ConsoleReporter:
def report(self, errors):
for error in errors:
if isinstance(error, UrlDiff):
- print('{} returned with code {}'.format(error.url, error.status_code))
+ print(f'{error.url} returned with code {error.status_code}')
for path in error.files:
- print('\t{}'.format(str(path)))
+ print(f'\t{str(path)}')
print()
if isinstance(error, MdDiff):
- print('Files are different:\n\t{}\n\t{}\n\n'.format(str(error.base), str(error.other)))
+ print(f'Files are different:\n\t{str(error.base)}\n\t{str(error.other)}\n\n')
-class StoreReporter(object):
+class StoreReporter:
def __init__(self):
self.log = []
@@ -134,16 +134,16 @@ def __init__(self):
def report(self, errors):
for error in errors:
if isinstance(error, UrlDiff):
- self.log.append('%s returned with code %s for files' % (error.url, error.status_code))
+ self.log.append('{} returned with code {} for files'.format(error.url, error.status_code))
for path in error.files:
self.log.append('\t%s' % str(path))
if isinstance(error, MdDiff):
- self.log.append('Files are different:\n\t%s\n\t%s\n\n' % (str(error.base), str(error.other)))
+ self.log.append('Files are different:\n\t{}\n\t{}\n\n'.format(str(error.base), str(error.other)))
if isinstance(error, UrlOccurencyDiff):
- self.log.append('Count of URLS in %s and %s are different' % (error.base_path, error.translation_path))
+ self.log.append(f'Count of URLS in {error.base_path} and {error.translation_path} are different')
-class ChainReporter(object):
+class ChainReporter:
def __init__(self, reporters):
self.reporters = reporters