Skip to content

Commit

Permalink
Standardize code to "Best Practices" (hhursev#188)
Browse files Browse the repository at this point in the history
* run Black on everything

* add pre-commit hooks

* 🎨 🚨

* Add black & flake8 to requirements

* 🚨 flake8 fixes

* 💚 use proper  version

* Only Python 3.6+

* ✨ Add *Black* badge

* add black & flake8 pipeline tasks

Prevents "bad" merges when pre-commit hooks can't be run.

* create README FAQ & update instructions
  • Loading branch information
bfcarpio authored Jul 15, 2020
1 parent 1030f35 commit 7834069
Show file tree
Hide file tree
Showing 254 changed files with 26,349 additions and 27,706 deletions.
8 changes: 8 additions & 0 deletions .flake8
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
[flake8]
ignore = E203, E266, E501, W503
# line length is intentionally set to 80 here because black uses Bugbear
# See https://github.com/psf/black/blob/master/README.md#line-length for more details
max-line-length = 80
max-complexity = 18
select = B,C,E,F,W,T4,B9
exclude = tests/test_data/*
2 changes: 1 addition & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -139,4 +139,4 @@ Temporary Items

# Editor configs
.vscode
.idea
.idea
15 changes: 15 additions & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
repos:
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v2.3.0
hooks:
- id: check-yaml
- id: end-of-file-fixer
- id: trailing-whitespace
- repo: https://github.com/psf/black
rev: 19.3b0
hooks:
- id: black
- repo: https://gitlab.com/pycqa/flake8
rev: 3.8.3
hooks:
- id: flake8
3 changes: 2 additions & 1 deletion .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@ dist: xenial
language: python

python:
- "3.5"
- "3.6"
- "3.7"
- "3.8"
Expand All @@ -12,6 +11,8 @@ install:
- pip install coveralls

script:
- black --check .
- flake8 --count .
- coverage run -m unittest

after_success:
Expand Down
23 changes: 20 additions & 3 deletions README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,9 @@
.. image:: https://img.shields.io/github/stars/hhursev/recipe-scrapers?style=social
:target: https://github.com/hhursev/recipe-scrapers/
:alt: Github
.. image:: https://img.shields.io/badge/code%20style-black-000000.svg
:target: https://github.com/psf/black
:alt: Black formatted


------
Expand Down Expand Up @@ -145,13 +148,17 @@ If you want a scraper for a new site added

- Open an `Issue <https://github.com/hhursev/recipe-scraper/issues/new>`_ providing us the site name, as well as a recipe link from it.
- You are a developer and want to code the scraper on your own:
- If Schema is available on the site - `you can do this <https://github.com/hhursev/recipe-scrapers/pull/176>`_
- Otherwise, scrape the HTML - `like this <https://github.com/hhursev/recipe-scrapers/commit/ffee963d04>`_

- If Schema is available on the site - `you can do this <https://github.com/hhursev/recipe-scrapers/pull/176>`_

- `How do I know if a schema is available on my site? <#faq>`_

- Otherwise, scrape the HTML - `like this <https://github.com/hhursev/recipe-scrapers/commit/ffee963d04>`_

For Devs / Contribute
---------------------

Assuming you have `python3` installed, navigate to the directory where you want this project to live in and drop these lines
Assuming you have ``python3`` installed, navigate to the directory where you want this project to live in and drop these lines

.. code::
Expand All @@ -160,9 +167,19 @@ Assuming you have `python3` installed, navigate to the directory where you want
python3 -m venv .venv &&
source .venv/bin/activate &&
pip install -r requirements.txt &&
pre-commit install &&
coverage run -m unittest &&
coverage report
FAQ
---
- **How do I know if a website has a Recipe Schema?**

- Go to a recipe on the website you want to be supported.
- Hit ``Ctrl - u`` on your keyboard
- Search (``Ctrl -f``) for ``application/ld+json``. It should be inside a ``script`` tag.
- If you found it then it's highly likely your website supports recipe schemas. Otherwise, you'll need to parse the HTML.


Spacial thanks to:
------------------
Expand Down
2 changes: 1 addition & 1 deletion recipe_scrapers/__version__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = '8.2.2'
__version__ = "8.2.2"
46 changes: 21 additions & 25 deletions recipe_scrapers/_abstract.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,12 +8,11 @@

# some sites close their content for 'bots', so user-agent must be supplied
HEADERS = {
'User-Agent': 'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.9.0.7) Gecko/2009021910 Firefox/3.0.7'
"User-Agent": "Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.9.0.7) Gecko/2009021910 Firefox/3.0.7"
}


class AbstractScraper(metaclass=ExceptionHandlingMetaclass):

def __init__(self, url, exception_handling=True, meta_http_equiv=False, test=False):
if test: # when testing, we load a file
with url:
Expand Down Expand Up @@ -68,29 +67,30 @@ def language(self):
May be overridden by individual scrapers.
"""
candidate_languages = set()
html = self.soup.find(
'html',
{'lang': True}
)
candidate_languages.add(html.get('lang'))
html = self.soup.find("html", {"lang": True})
candidate_languages.add(html.get("lang"))

# Deprecated: check for a meta http-equiv header
# See: https://www.w3.org/International/questions/qa-http-and-lang
meta_language = self.soup.find(
'meta',
{
'http-equiv': lambda x: x and x.lower() == 'content-language',
'content': True
}
) if self.meta_http_equiv else None
meta_language = (
self.soup.find(
"meta",
{
"http-equiv": lambda x: x and x.lower() == "content-language",
"content": True,
},
)
if self.meta_http_equiv
else None
)
if meta_language:
for language in meta_language.get('content').split(','):
for language in meta_language.get("content").split(","):
candidate_languages.add(language)
break

# If other langs exist, remove 'en' commonly generated by HTML editors
if len(candidate_languages) > 1 and 'en' in candidate_languages:
candidate_languages.remove('en')
if len(candidate_languages) > 1 and "en" in candidate_languages:
candidate_languages.remove("en")

# Return the first candidate language
for language in candidate_languages:
Expand All @@ -112,11 +112,7 @@ def reviews(self):
raise NotImplementedError("This should be implemented.")

def links(self):
invalid_href = ('#', '')
links_html = self.soup.findAll('a', href=True)

return [
link.attrs
for link in links_html
if link['href'] not in invalid_href
]
invalid_href = ("#", "")
links_html = self.soup.findAll("a", href=True)

return [link.attrs for link in links_html if link["href"] not in invalid_href]
14 changes: 8 additions & 6 deletions recipe_scrapers/_decorators.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,22 +4,22 @@
from ._schemaorg import SchemaOrgException
from ._utils import normalize_string

class Decorators:

class Decorators:
@staticmethod
def schema_org_priority(decorated):
"""
Use SchemaOrg parser with priority (if there's data in it)
On exception raised - continue by default.
If there's no data (no schema implemented on the site) - continue by default
"""

@functools.wraps(decorated)
def schema_org_priority_wrapper(self, *args, **kwargs):
function = getattr(self.schema, decorated.__name__)
if not function:
raise SchemaOrgException(
"Function '{}' not found in schema"
.format(decorated.__name)
"Function '{}' not found in schema".format(decorated.__name)
)

if not self.schema.data:
Expand All @@ -39,12 +39,12 @@ def og_image_get(decorated):
def og_image_get_wrapper(self, *args, **kwargs):
try:
image = self.soup.find(
'meta',
{'property': 'og:image', 'content': True}
"meta", {"property": "og:image", "content": True}
)
return image.get('content')
return image.get("content")
except AttributeError:
return decorated(self, *args, **kwargs)

return og_image_get_wrapper

@staticmethod
Expand All @@ -53,11 +53,13 @@ def bcp47_validate(decorated):
def bcp47_validate_wrapper(self, *args, **kwargs):
tag = tags.tag(decorated(self, *args, **kwargs))
return str(tag) if tag.valid else None

return bcp47_validate_wrapper

@staticmethod
def normalize_string_output(decorated):
@functools.wraps(decorated)
def normalize_string_output_wrapper(self, *args, **kwargs):
return normalize_string(decorated(self, *args, **kwargs))

return normalize_string_output_wrapper
26 changes: 15 additions & 11 deletions recipe_scrapers/_exception_handling.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,16 +3,16 @@


ON_EXCEPTION_RETURN_VALUES = {
'title': '',
'total_time': 0,
'yields': '',
'image': '',
'ingredients': [],
'instructions': '',
'ratings': -1,
'reviews': None,
'links': [],
'language': 'en',
"title": "",
"total_time": 0,
"yields": "",
"image": "",
"ingredients": [],
"instructions": "",
"ratings": -1,
"reviews": None,
"links": [],
"language": "en",
}


Expand All @@ -24,7 +24,10 @@ def exception_handling_wrapper(self, *args, **kwargs):
return decorated(self, *args, **kwargs)
except Exception as e:
logging.info("exception_handling silencing exception: {}".format(e))
logging.debug("exception_handling silencing exception: {}".format(e), exc_info=True)
logging.debug(
"exception_handling silencing exception: {}".format(e),
exc_info=True,
)
return ON_EXCEPTION_RETURN_VALUES.get(decorated.__name__)
else:
return decorated(self, *args, **kwargs)
Expand Down Expand Up @@ -52,6 +55,7 @@ class ExceptionHandlingMetaclass(type):
scraper = scrape_me('<recipe_url>', exception_handling=False)
scraper.total_time() # and etc.
"""

def __new__(cls, class_name, bases, attributes):
"""
Go through all class attributes.
Expand Down
Loading

0 comments on commit 7834069

Please sign in to comment.