KeepSafe · b1r3k · Oct 9, 2024 · Oct 9, 2024 · Oct 9, 2024 · Oct 10, 2024
diff --git a/.python-version b/.python-version
@@ -1 +1 @@
-3.6.8
+3.11
diff --git a/.travis.yml b/.travis.yml
@@ -1,9 +1,11 @@
 language: python
+dist: jammy
 python:
-  - "3.6"
+  - "3.11"
 # command to install dependencies
 install:
   - make dev
 # command to run tests
 script:
   - make tests
+  - make coverage
diff --git a/Makefile b/Makefile
@@ -2,7 +2,9 @@
 
 PYTHON=venv/bin/python3
 PIP=venv/bin/pip
-NOSE=venv/bin/nosetests
+COVERAGE=venv/bin/coverage
+TEST_RUNNER=venv/bin/pytest
+TEST_RUNNER_FLAGS=-s --durations=3 --durations-min=0.005
 FLAKE=venv/bin/flake8
 PYPICLOUD_HOST=pypicloud.getkeepsafe.local
 PIP_ARGS=--extra-index=http://$(PYPICLOUD_HOST)/simple/ --trusted-host $(PYPICLOUD_HOST)
@@ -30,14 +32,16 @@ flake:
 	$(FLAKE) validator tests
 
 test: flake
-	$(NOSE) -s $(FLAGS)
+	$(COVERAGE) run -m pytest $(TEST_RUNNER_FLAGS)
 
 vtest:
-	$(NOSE) -s -v $(FLAGS)
+	$(COVERAGE) run -m pytest -v $(TEST_RUNNER_FLAGS)
+
+testloop:
+	while sleep 1; do $(TEST_RUNNER) -s --lf $(TEST_RUNNER_FLAGS); done
 
 cov cover coverage:
-	$(NOSE) -s --with-cover --cover-html --cover-html-dir ./coverage $(FLAGS)
-	echo "open file://`pwd`/coverage/index.html"
+	$(COVERAGE) report -m
 
 clean:
 	rm -rf `find . -name __pycache__`

diff --git a/setup.cfg b/setup.cfg
@@ -7,3 +7,9 @@ ignore = F403
 
 [pep8]
 max-line-length = 120
+
+[coverage:run]
+branch = True
+
+[coverage:report]
+fail_under = 96
diff --git a/setup.py b/setup.py
@@ -1,27 +1,26 @@
 import os
 from setuptools import setup, find_packages
 
-
-version = '0.7.1'
+version = '1.0.0'
 
 
 def read(f):
     return open(os.path.join(os.path.dirname(__file__), f)).read().strip()
 
 
 install_requires = [
-    'sdiff @ git+https://github.com/KeepSafe/html-structure-diff.git@0.4.1#egg=sdiff',
-    'aiohttp >=3, <3.4',
+    'sdiff @ git+https://github.com/KeepSafe/html-structure-diff.git@1.0.0#egg=sdiff',
+    'aiohttp==3.8.5',
     'Markdown',
     'parse >=1, <2',
     'beautifulsoup4 >=4, <5',
-    'lxml >=3',
+    'lxml<5',
 ]
 
 tests_require = [
-    'nose',
-    'flake8==3.6.0',
-    'coverage',
+    'pytest >= 8',
+    'coverage==7.6.1',
+    'flake8==7.1.1',
 ]
 
 devtools_require = [
@@ -32,6 +31,7 @@ def read(f):
 setup(
     name='content-validator',
     version=version,
+    python_requires='>=3.11',
     description=('Content validator looks at text content and preforms different validation tasks'),
     classifiers=[
         'License :: OSI Approved :: BSD License', 'Intended Audience :: Developers', 'Programming Language :: Python'

diff --git a/tests/utils.py b/tests/utils.py
@@ -1,4 +1,3 @@
-
 def read(path):
     with open(path) as fp:
         return fp.read()
diff --git a/validator/__init__.py b/validator/__init__.py
@@ -3,7 +3,7 @@
 from . import parsers, checks, reports, fs
 
 
-class Validator(object):
+class Validator:
     def __init__(self, contents, parser, reader, check, reporter=None):
         self.contents = contents
         self.parser = parser
@@ -24,7 +24,7 @@ async def async_validate(self):
         return errors
 
 
-class ReportBuilder(object):
+class ReportBuilder:
     def __init__(self, contents, parser, reader, check):
         self.contents = contents
         self.parser = parser
@@ -49,7 +49,7 @@ def validate(self):
         return Validator(self.contents, self.parser, self.reader, self.check, reporter).validate()
 
 
-class CheckBuilder(object):
+class CheckBuilder:
     def __init__(self, contents, content_type, parser, reader):
         self.contents = contents
         self.content_type = content_type
@@ -89,7 +89,7 @@ async def async_validate(self):
         return res
 
 
-class ParserBuilder(object):
+class ParserBuilder:
     def __init__(self, contents, reader=None):
         self.contents = contents
         self.content_type = 'txt'
@@ -120,7 +120,7 @@ def check(self):
         return CheckBuilder(self.contents, self.content_type, parser, self.reader)
 
 
-class ContentBuilder(object):
+class ContentBuilder:
     def files(self, pattern, **kwargs):
         contents = fs.files(pattern, **kwargs)
         return ParserBuilder(contents, parsers.FileReader())

diff --git a/validator/checks/__init__.py b/validator/checks/__init__.py
@@ -1,5 +1,3 @@
-from typing import Type
-
 from sdiff import MdParser
 
 from .md import MarkdownComparator
@@ -21,7 +19,7 @@ def url_occurences(filetype):
     return UrlOccurenciesValidator()
 
 
-def markdown(filetype, md_parser_cls: Type[MdParser] = MdParser):
+def markdown(filetype, md_parser_cls: type[MdParser] = MdParser):
     if filetype not in ['txt', 'html']:
         raise UndefinedCheckTypeError('got filetype %s' % filetype)
     return MarkdownComparator(md_parser_cls)
@@ -33,7 +31,7 @@ def java_args(filetype):
     return JavaComparator()
 
 
-class ChainCheck(object):
+class ChainCheck:
     def __init__(self, checks):
         self.checks = checks
 

diff --git a/validator/checks/java.py b/validator/checks/java.py
@@ -6,7 +6,7 @@
 REF_PATTERN = r'@string/\w+'
 
 
-class JavaComparator(object):
+class JavaComparator:
     def _get_args(self, content):
         return re.findall(ARG_PATTERN, content)
 

diff --git a/validator/checks/md.py b/validator/checks/md.py
@@ -1,5 +1,4 @@
 import re
-from typing import Type
 
 from sdiff import diff, renderer, MdParser
 from markdown import markdown
@@ -14,8 +13,8 @@ def save_file(content, filename):
         fp.write(content)
 
 
-class MarkdownComparator(object):
-    def __init__(self, md_parser_cls: Type[MdParser] = MdParser):
+class MarkdownComparator:
+    def __init__(self, md_parser_cls: type[MdParser] = MdParser):
         self._md_parser_cls = md_parser_cls
 
     def check(self, data, parser, reader):

diff --git a/validator/checks/url.py b/validator/checks/url.py
@@ -5,7 +5,6 @@
 import string
 from bs4 import BeautifulSoup
 from urllib.parse import urlparse, urljoin
-from typing import List, Optional
 
 from ..errors import UrlDiff, UrlOccurencyDiff
 
@@ -23,7 +22,7 @@ class MissingUrlExtractorError(Exception):
 # the job of extractors is to find all non-parametrized urls in the given text for later checks via UrlValidator
 # which examines is particular url leads to working webpage (200 status)
 # since we are interested in all urls (including parametrized) we need to sligthly change their API and behaviour
-class TextUrlExtractor(object):
+class TextUrlExtractor:
     def __init__(self, **kwargs):
         pass
 
@@ -60,12 +59,12 @@ def _validate_email(self, email):
         return False
 
     def _extract_from_anchors(self, soup):
-        return set([a.get('href') or a.text for a in soup.find_all('a')])
+        return {a.get('href') or a.text for a in soup.find_all('a')}
 
     def _extract_from_img(self, soup):
         if self.skip_images:
             return set()
-        return set([img.get('src') for img in soup.find_all('img')])
+        return {img.get('src') for img in soup.find_all('img')}
 
     def _fix_url(self, url):
         result = ''
@@ -82,7 +81,7 @@ def _fix_url(self, url):
                 if re.match(self.url_pattern, full_url):
                     result = full_url
         else:
-            logging.error('{} not tested'.format(url_parsed.geturl()))
+            logging.error(f'{url_parsed.geturl()} not tested')
         return result
 
     def extract_urls(self, content, keep_placeholders=False):
@@ -96,20 +95,20 @@ def extract_urls(self, content, keep_placeholders=False):
         return result
 
 
-class UrlStatusChecker(object):
+class UrlStatusChecker:
     retry_max_count = 3
 
-    def __init__(self, headers=None, exclude_urls_regexs: Optional[List[str]] = None):
+    def __init__(self, headers=None, exclude_urls_regexs: list[str] | None = None):
         self._exclude_urls_regex = exclude_urls_regexs or []
         if self._exclude_urls_regex:
-            logging.warning('Excluded urls regexps: {}'.format(self._exclude_urls_regex))
+            logging.warning(f'Excluded urls regexps: {self._exclude_urls_regex}')
         self._headers = headers or {}
         if 'User-Agent' not in self._headers:
             self._headers['User-Agent'] = DEFAULT_USER_AGENT
 
     async def _make_request(self, url):
         try:
-            logging.info('checking {}'.format(url))
+            logging.info(f'checking {url}')
             async with aiohttp.request('get', url, headers=self._headers, allow_redirects=True) as res:
                 return res.status
         except Exception:
@@ -143,7 +142,7 @@ async def _check_urls_coro(self, urls, future):
             if not is_exluded:
                 urls_without_excluded.append(url)
             else:
-                logging.warning('url {} excluded from status check'.format(url.url))
+                logging.warning(f'url {url.url} excluded from status check')
         tasks = [self._request_status_code(url.url) for url in urls_without_excluded]
         results = await asyncio.gather(*tasks)
         for index, url in enumerate(urls_without_excluded):
@@ -167,10 +166,10 @@ async def async_check(self, urls):
         return future.result()
 
 
-class UrlValidator(object):
+class UrlValidator:
     _extractors = {'txt': TextUrlExtractor, 'html': HtmlUrlExtractor}
 
-    def __init__(self, filetype, headers=None, exclude_status_check_regexs: Optional[List[str]] = None, **kwargs):
+    def __init__(self, filetype, headers=None, exclude_status_check_regexs: list[str] | None = None, **kwargs):
         self.client_headers = headers or {}
         self._excluded_status_check_regexs = exclude_status_check_regexs or []
         extractor_class = self._extractors.get(filetype)
@@ -179,7 +178,7 @@ def __init__(self, filetype, headers=None, exclude_status_check_regexs: Optional
         self.extractor = extractor_class(**kwargs)
 
     def _get_urls(self, data, parser, reader):
-        flat_data = set(p for sublist in data for p in sublist)
+        flat_data = {p for sublist in data for p in sublist}
         # TODO yield instead
         urls = {}
         for element in flat_data:

diff --git a/validator/errors.py b/validator/errors.py
@@ -1,7 +1,7 @@
 from collections import namedtuple
 
 
-class UrlDiff(object):
+class UrlDiff:
 
     def __init__(self, url, files=None, status_code=200, has_disallowed_chars=False):
         self.url = url
@@ -10,7 +10,7 @@ def __init__(self, url, files=None, status_code=200, has_disallowed_chars=False)
         self.has_disallowed_chars = has_disallowed_chars
 
     def __str__(self):
-        return 'Url(%s, %s, %s, %s)' % (self.url, self.files, self.status_code, self.has_disallowed_chars)
+        return 'Url({}, {}, {}, {})'.format(self.url, self.files, self.status_code, self.has_disallowed_chars)
 
     def __repr__(self):
         return 'Url: %s' % self.url
@@ -37,7 +37,7 @@ def is_valid(self):
 ContentData.__new__.__defaults__ = ('', ) * 2
 
 
-class MdDiff(object):
+class MdDiff:
 
     def __init__(self, base, other, error_msgs):
         self.base = base

diff --git a/validator/fs.py b/validator/fs.py
@@ -91,10 +91,10 @@ def files(pattern, **kwargs):
     [[Path(path/to1/file1.txt), Path(path/to1/file2.txt)], [Path(path/to2/file1.txt), Path(path/to2/file2.txt)]]
     """
     # extract named parameters from the pattern
-    params = set([p for p in map(lambda e: e[1], Formatter().parse(pattern)) if p])
+    params = {p for p in map(lambda e: e[1], Formatter().parse(pattern)) if p}
     if params:
         if len(params - kwargs.keys()) > 0:
-            raise ValueError('missing parameters {} for pattern {}'.format(params - kwargs.keys(), pattern))
+            raise ValueError(f'missing parameters {params - kwargs.keys()} for pattern {pattern}')
         return _params_pattern(pattern, params, **kwargs)
     else:
         return _no_params_pattern(pattern)

diff --git a/validator/parsers.py b/validator/parsers.py
@@ -9,22 +9,22 @@ def __init__(self, msg):
         super().__init__(msg)
 
 
-class FileReader(object):
+class FileReader:
     def read(self, path):
         return read_content(path)
 
 
-class TxtReader(object):
+class TxtReader:
     def read(self, content):
         return content
 
 
-class MarkdownParser(object):
+class MarkdownParser:
     def parse(self, content):
         return markdown.markdown(content)
 
 
-class XmlParser(object):
+class XmlParser:
     def __init__(self, query='*'):
         self.query = query
 
@@ -38,12 +38,12 @@ def parse(self, content):
         return '\n\n'.join(texts)
 
 
-class CsvParser(object):
+class CsvParser:
     def parse(self, content):
         return '\n'.join(content.split(','))
 
 
-class ChainParser(object):
+class ChainParser:
     def __init__(self, parsers):
         self.parsers = parsers