Standardize code to "Best Practices" (hhursev#188)

* run Black on everything * add pre-commit hooks * 🎨 🚨 * Add black & flake8 to requirements * 🚨 flake8 fixes * 💚 use proper version * Only Python 3.6+ * ✨ Add *Black* badge * add black & flake8 pipeline tasks Prevents "bad" merges when pre-commit hooks can't be run. * create README FAQ & update instructions
TheEpic-dev · Jul 15, 2020 · 7834069 · 7834069
1 parent 1030f35
commit 7834069
Show file tree

Hide file tree

Showing 254 changed files with 26,349 additions and 27,706 deletions.
diff --git a/.flake8 b/.flake8
@@ -0,0 +1,8 @@
+[flake8]
+ignore = E203, E266, E501, W503
+# line length is intentionally set to 80 here because black uses Bugbear
+# See https://github.com/psf/black/blob/master/README.md#line-length for more details
+max-line-length = 80
+max-complexity = 18
+select = B,C,E,F,W,T4,B9
+exclude = tests/test_data/*
diff --git a/.gitignore b/.gitignore
@@ -139,4 +139,4 @@ Temporary Items
 
 # Editor configs
 .vscode
-.idea
+.idea
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -0,0 +1,15 @@
+repos:
+-   repo: https://github.com/pre-commit/pre-commit-hooks
+    rev: v2.3.0
+    hooks:
+    -   id: check-yaml
+    -   id: end-of-file-fixer
+    -   id: trailing-whitespace
+-   repo: https://github.com/psf/black
+    rev: 19.3b0
+    hooks:
+      - id: black
+-   repo: https://gitlab.com/pycqa/flake8
+    rev: 3.8.3
+    hooks:
+    - id: flake8
diff --git a/.travis.yml b/.travis.yml
@@ -2,7 +2,6 @@ dist: xenial
 language: python
 
 python:
-  - "3.5"
   - "3.6"
   - "3.7"
   - "3.8"
@@ -12,6 +11,8 @@ install:
   - pip install coveralls
 
 script:
+  - black --check .
+  - flake8 --count .
   - coverage run -m unittest
 
 after_success:

diff --git a/README.rst b/README.rst
@@ -13,6 +13,9 @@
 .. image:: https://img.shields.io/github/stars/hhursev/recipe-scrapers?style=social
     :target: https://github.com/hhursev/recipe-scrapers/
     :alt: Github
+.. image:: https://img.shields.io/badge/code%20style-black-000000.svg
+    :target: https://github.com/psf/black
+    :alt: Black formatted
 
 
 ------
@@ -145,13 +148,17 @@ If you want a scraper for a new site added
 
 - Open an `Issue <https://github.com/hhursev/recipe-scraper/issues/new>`_ providing us the site name, as well as a recipe link from it.
 - You are a developer and want to code the scraper on your own:
-    - If Schema is available on the site - `you can do this <https://github.com/hhursev/recipe-scrapers/pull/176>`_
-    - Otherwise, scrape the HTML - `like this <https://github.com/hhursev/recipe-scrapers/commit/ffee963d04>`_
+
+  - If Schema is available on the site - `you can do this <https://github.com/hhursev/recipe-scrapers/pull/176>`_
+
+    - `How do I know if a schema is available on my site? <#faq>`_
+
+  - Otherwise, scrape the HTML - `like this <https://github.com/hhursev/recipe-scrapers/commit/ffee963d04>`_
 
 For Devs / Contribute
 ---------------------
 
-Assuming you have `python3` installed, navigate to the directory where you want this project to live in and drop these lines
+Assuming you have ``python3`` installed, navigate to the directory where you want this project to live in and drop these lines
 
 .. code::
 
@@ -160,9 +167,19 @@ Assuming you have `python3` installed, navigate to the directory where you want
     python3 -m venv .venv &&
     source .venv/bin/activate &&
     pip install -r requirements.txt &&
+    pre-commit install &&
     coverage run -m unittest &&
     coverage report
 
+FAQ
+---
+- **How do I know if a website has a Recipe Schema?**
+
+  - Go to a recipe on the website you want to be supported.
+  - Hit ``Ctrl - u`` on your keyboard
+  - Search (``Ctrl -f``) for ``application/ld+json``. It should be inside a ``script`` tag.
+  - If you found it then it's highly likely your website supports recipe schemas. Otherwise, you'll need to parse the HTML.
+
 
 Spacial thanks to:
 ------------------

diff --git a/recipe_scrapers/__version__.py b/recipe_scrapers/__version__.py
@@ -1 +1 @@
-__version__ = '8.2.2'
+__version__ = "8.2.2"
diff --git a/recipe_scrapers/_abstract.py b/recipe_scrapers/_abstract.py
@@ -8,12 +8,11 @@
 
 # some sites close their content for 'bots', so user-agent must be supplied
 HEADERS = {
-    'User-Agent': 'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.9.0.7) Gecko/2009021910 Firefox/3.0.7'
+    "User-Agent": "Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.9.0.7) Gecko/2009021910 Firefox/3.0.7"
 }
 
 
 class AbstractScraper(metaclass=ExceptionHandlingMetaclass):
-
     def __init__(self, url, exception_handling=True, meta_http_equiv=False, test=False):
         if test:  # when testing, we load a file
             with url:
@@ -68,29 +67,30 @@ def language(self):
         May be overridden by individual scrapers.
         """
         candidate_languages = set()
-        html = self.soup.find(
-            'html',
-            {'lang': True}
-        )
-        candidate_languages.add(html.get('lang'))
+        html = self.soup.find("html", {"lang": True})
+        candidate_languages.add(html.get("lang"))
 
         # Deprecated: check for a meta http-equiv header
         # See: https://www.w3.org/International/questions/qa-http-and-lang
-        meta_language = self.soup.find(
-            'meta',
-            {
-                'http-equiv': lambda x: x and x.lower() == 'content-language',
-                'content': True
-            }
-        ) if self.meta_http_equiv else None
+        meta_language = (
+            self.soup.find(
+                "meta",
+                {
+                    "http-equiv": lambda x: x and x.lower() == "content-language",
+                    "content": True,
+                },
+            )
+            if self.meta_http_equiv
+            else None
+        )
         if meta_language:
-            for language in meta_language.get('content').split(','):
+            for language in meta_language.get("content").split(","):
                 candidate_languages.add(language)
                 break
 
         # If other langs exist, remove 'en' commonly generated by HTML editors
-        if len(candidate_languages) > 1 and 'en' in candidate_languages:
-            candidate_languages.remove('en')
+        if len(candidate_languages) > 1 and "en" in candidate_languages:
+            candidate_languages.remove("en")
 
         # Return the first candidate language
         for language in candidate_languages:
@@ -112,11 +112,7 @@ def reviews(self):
         raise NotImplementedError("This should be implemented.")
 
     def links(self):
-        invalid_href = ('#', '')
-        links_html = self.soup.findAll('a', href=True)
-
-        return [
-            link.attrs
-            for link in links_html
-            if link['href'] not in invalid_href
-        ]
+        invalid_href = ("#", "")
+        links_html = self.soup.findAll("a", href=True)
+
+        return [link.attrs for link in links_html if link["href"] not in invalid_href]
diff --git a/recipe_scrapers/_decorators.py b/recipe_scrapers/_decorators.py
@@ -4,22 +4,22 @@
 from ._schemaorg import SchemaOrgException
 from ._utils import normalize_string
 
-class Decorators:
 
+class Decorators:
     @staticmethod
     def schema_org_priority(decorated):
         """
         Use SchemaOrg parser with priority (if there's data in it)
         On exception raised - continue by default.
         If there's no data (no schema implemented on the site) - continue by default
         """
+
         @functools.wraps(decorated)
         def schema_org_priority_wrapper(self, *args, **kwargs):
             function = getattr(self.schema, decorated.__name__)
             if not function:
                 raise SchemaOrgException(
-                    "Function '{}' not found in schema"
-                    .format(decorated.__name)
+                    "Function '{}' not found in schema".format(decorated.__name)
                 )
 
             if not self.schema.data:
@@ -39,12 +39,12 @@ def og_image_get(decorated):
         def og_image_get_wrapper(self, *args, **kwargs):
             try:
                 image = self.soup.find(
-                    'meta',
-                    {'property': 'og:image', 'content': True}
+                    "meta", {"property": "og:image", "content": True}
                 )
-                return image.get('content')
+                return image.get("content")
             except AttributeError:
                 return decorated(self, *args, **kwargs)
+
         return og_image_get_wrapper
 
     @staticmethod
@@ -53,11 +53,13 @@ def bcp47_validate(decorated):
         def bcp47_validate_wrapper(self, *args, **kwargs):
             tag = tags.tag(decorated(self, *args, **kwargs))
             return str(tag) if tag.valid else None
+
         return bcp47_validate_wrapper
 
     @staticmethod
     def normalize_string_output(decorated):
         @functools.wraps(decorated)
         def normalize_string_output_wrapper(self, *args, **kwargs):
             return normalize_string(decorated(self, *args, **kwargs))
+
         return normalize_string_output_wrapper
diff --git a/recipe_scrapers/_exception_handling.py b/recipe_scrapers/_exception_handling.py
@@ -3,16 +3,16 @@
 
 
 ON_EXCEPTION_RETURN_VALUES = {
-    'title': '',
-    'total_time': 0,
-    'yields': '',
-    'image': '',
-    'ingredients': [],
-    'instructions': '',
-    'ratings': -1,
-    'reviews': None,
-    'links': [],
-    'language': 'en',
+    "title": "",
+    "total_time": 0,
+    "yields": "",
+    "image": "",
+    "ingredients": [],
+    "instructions": "",
+    "ratings": -1,
+    "reviews": None,
+    "links": [],
+    "language": "en",
 }
 
 
@@ -24,7 +24,10 @@ def exception_handling_wrapper(self, *args, **kwargs):
                 return decorated(self, *args, **kwargs)
             except Exception as e:
                 logging.info("exception_handling silencing exception: {}".format(e))
-                logging.debug("exception_handling silencing exception: {}".format(e), exc_info=True)
+                logging.debug(
+                    "exception_handling silencing exception: {}".format(e),
+                    exc_info=True,
+                )
                 return ON_EXCEPTION_RETURN_VALUES.get(decorated.__name__)
         else:
             return decorated(self, *args, **kwargs)
@@ -52,6 +55,7 @@ class ExceptionHandlingMetaclass(type):
     scraper = scrape_me('<recipe_url>', exception_handling=False)
     scraper.total_time()  # and etc.
     """
+
     def __new__(cls, class_name, bases, attributes):
         """
         Go through all class attributes.
-Original file line number
+Diff line change
@@ Expand Up / @@ -139,4 +139,4 @@ Temporary Items @@
     # Editor configs
     .vscode
-    .idea
+    .idea