From 58ca8bbf6d1589bd0c8cc1ebda52299346f55e8a Mon Sep 17 00:00:00 2001 From: Ammar Najjar Date: Sat, 22 Aug 2020 22:32:03 +0200 Subject: [PATCH] Use f-strings (#4307) --- docs/conf.py | 2 +- docs/intro/tutorial.rst | 6 +- docs/topics/developer-tools.rst | 138 +++++++++--------- docs/topics/exporters.rst | 6 +- docs/topics/item-pipeline.rst | 6 +- docs/topics/leaks.rst | 2 +- docs/topics/selectors.rst | 7 +- docs/topics/settings.rst | 2 +- docs/topics/spiders.rst | 4 +- extras/qps-bench-server.py | 2 +- extras/qpsclient.py | 4 +- scrapy/cmdline.py | 16 +- scrapy/commands/__init__.py | 2 +- scrapy/commands/bench.py | 2 +- scrapy/commands/check.py | 10 +- scrapy/commands/edit.py | 4 +- scrapy/commands/genspider.py | 27 ++-- scrapy/commands/parse.py | 4 +- scrapy/commands/runspider.py | 8 +- scrapy/commands/startproject.py | 12 +- scrapy/commands/version.py | 4 +- scrapy/contracts/__init__.py | 8 +- scrapy/contracts/default.py | 12 +- scrapy/core/downloader/__init__.py | 16 +- scrapy/core/downloader/handlers/__init__.py | 3 +- scrapy/core/downloader/handlers/http11.py | 17 ++- scrapy/core/downloader/handlers/s3.py | 6 +- scrapy/core/downloader/middleware.py | 15 +- scrapy/core/downloader/webclient.py | 6 +- scrapy/core/engine.py | 12 +- scrapy/core/scraper.py | 4 +- scrapy/core/spidermw.py | 20 +-- scrapy/downloadermiddlewares/cookies.py | 12 +- scrapy/downloadermiddlewares/httpproxy.py | 2 +- scrapy/downloadermiddlewares/retry.py | 2 +- scrapy/downloadermiddlewares/robotstxt.py | 6 +- scrapy/downloadermiddlewares/stats.py | 6 +- scrapy/exporters.py | 2 +- scrapy/extensions/corestats.py | 2 +- scrapy/extensions/debug.py | 2 +- scrapy/extensions/httpcache.py | 10 +- scrapy/extensions/memdebug.py | 2 +- scrapy/extensions/memusage.py | 14 +- scrapy/extensions/statsmailer.py | 10 +- scrapy/http/common.py | 2 +- scrapy/http/headers.py | 2 +- scrapy/http/request/__init__.py | 12 +- scrapy/http/request/form.py | 19 ++- scrapy/http/response/__init__.py | 6 +- scrapy/http/response/text.py | 15 +- scrapy/item.py | 6 +- scrapy/link.py | 6 +- scrapy/logformatter.py | 4 +- scrapy/pipelines/files.py | 23 ++- scrapy/pipelines/images.py | 9 +- scrapy/pipelines/media.py | 7 +- scrapy/pqueues.py | 9 +- scrapy/responsetypes.py | 2 +- scrapy/selector/unified.py | 4 +- scrapy/settings/__init__.py | 2 +- scrapy/settings/default_settings.py | 2 +- scrapy/shell.py | 4 +- scrapy/spiderloader.py | 13 +- scrapy/spidermiddlewares/depth.py | 2 +- scrapy/spidermiddlewares/httperror.py | 2 +- scrapy/spidermiddlewares/offsite.py | 6 +- scrapy/spidermiddlewares/referer.py | 2 +- scrapy/spiders/__init__.py | 11 +- scrapy/spiders/feed.py | 4 +- scrapy/utils/benchserver.py | 6 +- scrapy/utils/conf.py | 22 +-- scrapy/utils/curl.py | 4 +- scrapy/utils/decorators.py | 4 +- scrapy/utils/deprecate.py | 11 +- scrapy/utils/engine.py | 4 +- scrapy/utils/ftp.py | 2 +- scrapy/utils/iterators.py | 13 +- scrapy/utils/log.py | 4 +- scrapy/utils/misc.py | 15 +- scrapy/utils/project.py | 4 +- scrapy/utils/python.py | 12 +- scrapy/utils/reactor.py | 8 +- scrapy/utils/reqser.py | 4 +- scrapy/utils/response.py | 10 +- scrapy/utils/serialize.py | 6 +- scrapy/utils/ssl.py | 4 +- scrapy/utils/test.py | 2 +- scrapy/utils/testproc.py | 6 +- scrapy/utils/testsite.py | 4 +- scrapy/utils/trackref.py | 4 +- scrapy/utils/url.py | 4 +- sep/sep-002.rst | 2 +- sep/sep-004.rst | 4 +- sep/sep-014.rst | 29 ++-- sep/sep-018.rst | 22 +-- tests/CrawlerRunner/ip_address.py | 2 +- tests/mockserver.py | 10 +- tests/py36/_test_crawl.py | 2 +- tests/spiders.py | 8 +- tests/test_cmdline/extensions.py | 2 +- tests/test_command_check.py | 10 +- tests/test_command_parse.py | 16 +- tests/test_command_shell.py | 16 +- tests/test_command_version.py | 2 +- tests/test_commands.py | 32 ++-- tests/test_contracts.py | 2 +- tests/test_crawl.py | 6 +- tests/test_downloader_handlers.py | 22 +-- tests/test_downloadermiddleware.py | 2 +- ...test_downloadermiddleware_decompression.py | 2 +- tests/test_downloadermiddleware_httpcache.py | 10 +- tests/test_downloadermiddleware_redirect.py | 10 +- tests/test_downloadermiddleware_retry.py | 2 +- tests/test_engine.py | 23 +-- tests/test_feedexport.py | 13 +- tests/test_loader_deprecated.py | 2 +- tests/test_logformatter.py | 2 +- tests/test_middleware.py | 2 +- tests/test_pipeline_crawl.py | 4 +- tests/test_pipeline_files.py | 4 +- tests/test_pipeline_images.py | 4 +- tests/test_proxy_connect.py | 6 +- tests/test_request_attribute_binding.py | 4 +- tests/test_responsetypes.py | 12 +- tests/test_selector.py | 2 +- tests/test_signals.py | 2 +- tests/test_spidermiddleware_output_chain.py | 24 +-- tests/test_utils_curl.py | 2 +- tests/test_utils_datatypes.py | 2 +- tests/test_utils_defer.py | 8 +- tests/test_utils_iterators.py | 2 +- tests/test_utils_url.py | 6 +- tests/test_webclient.py | 8 +- 133 files changed, 561 insertions(+), 568 deletions(-) diff --git a/docs/conf.py b/docs/conf.py index 427c79481b4..27d2b5dff04 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -49,7 +49,7 @@ # General information about the project. project = 'Scrapy' -copyright = '2008–{}, Scrapy developers'.format(datetime.now().year) +copyright = f'2008–{datetime.now().year}, Scrapy developers' # The version info for the project you're documenting, acts as replacement for # |version| and |release|, also used in various other places throughout the diff --git a/docs/intro/tutorial.rst b/docs/intro/tutorial.rst index f96c788873f..914b910225a 100644 --- a/docs/intro/tutorial.rst +++ b/docs/intro/tutorial.rst @@ -101,10 +101,10 @@ This is the code for our first Spider. Save it in a file named def parse(self, response): page = response.url.split("/")[-2] - filename = 'quotes-%s.html' % page + filename = f'quotes-{page}.html' with open(filename, 'wb') as f: f.write(response.body) - self.log('Saved file %s' % filename) + self.log(f'Saved file {filename}') As you can see, our Spider subclasses :class:`scrapy.Spider ` @@ -190,7 +190,7 @@ for your spider:: def parse(self, response): page = response.url.split("/")[-2] - filename = 'quotes-%s.html' % page + filename = f'quotes-{page}.html' with open(filename, 'wb') as f: f.write(response.body) diff --git a/docs/topics/developer-tools.rst b/docs/topics/developer-tools.rst index 101aa159c14..c83b1a9d9ee 100644 --- a/docs/topics/developer-tools.rst +++ b/docs/topics/developer-tools.rst @@ -5,9 +5,9 @@ Using your browser's Developer Tools for scraping ================================================= Here is a general guide on how to use your browser's Developer Tools -to ease the scraping process. Today almost all browsers come with +to ease the scraping process. Today almost all browsers come with built in `Developer Tools`_ and although we will use Firefox in this -guide, the concepts are applicable to any other browser. +guide, the concepts are applicable to any other browser. In this guide we'll introduce the basic tools to use from a browser's Developer Tools by scraping `quotes.toscrape.com`_. @@ -41,16 +41,16 @@ Therefore, you should keep in mind the following things: Inspecting a website ==================== -By far the most handy feature of the Developer Tools is the `Inspector` -feature, which allows you to inspect the underlying HTML code of -any webpage. To demonstrate the Inspector, let's look at the +By far the most handy feature of the Developer Tools is the `Inspector` +feature, which allows you to inspect the underlying HTML code of +any webpage. To demonstrate the Inspector, let's look at the `quotes.toscrape.com`_-site. On the site we have a total of ten quotes from various authors with specific -tags, as well as the Top Ten Tags. Let's say we want to extract all the quotes -on this page, without any meta-information about authors, tags, etc. +tags, as well as the Top Ten Tags. Let's say we want to extract all the quotes +on this page, without any meta-information about authors, tags, etc. -Instead of viewing the whole source code for the page, we can simply right click +Instead of viewing the whole source code for the page, we can simply right click on a quote and select ``Inspect Element (Q)``, which opens up the `Inspector`. In it you should see something like this: @@ -97,16 +97,16 @@ Then, back to your web browser, right-click on the ``span`` tag, select >>> response.xpath('/html/body/div/div[2]/div[1]/div[1]/span[1]/text()').getall() ['“The world as we have created it is a process of our thinking. It cannot be changed without changing our thinking.”'] -Adding ``text()`` at the end we are able to extract the first quote with this +Adding ``text()`` at the end we are able to extract the first quote with this basic selector. But this XPath is not really that clever. All it does is -go down a desired path in the source code starting from ``html``. So let's -see if we can refine our XPath a bit: +go down a desired path in the source code starting from ``html``. So let's +see if we can refine our XPath a bit: -If we check the `Inspector` again we'll see that directly beneath our -expanded ``div`` tag we have nine identical ``div`` tags, each with the -same attributes as our first. If we expand any of them, we'll see the same +If we check the `Inspector` again we'll see that directly beneath our +expanded ``div`` tag we have nine identical ``div`` tags, each with the +same attributes as our first. If we expand any of them, we'll see the same structure as with our first quote: Two ``span`` tags and one ``div`` tag. We can -expand each ``span`` tag with the ``class="text"`` inside our ``div`` tags and +expand each ``span`` tag with the ``class="text"`` inside our ``div`` tags and see each quote: .. code-block:: html @@ -121,7 +121,7 @@ see each quote: With this knowledge we can refine our XPath: Instead of a path to follow, -we'll simply select all ``span`` tags with the ``class="text"`` by using +we'll simply select all ``span`` tags with the ``class="text"`` by using the `has-class-extension`_: >>> response.xpath('//span[has-class("text")]/text()').getall() @@ -130,45 +130,45 @@ the `has-class-extension`_: '“There are only two ways to live your life. One is as though nothing is a miracle. The other is as though everything is a miracle.”', ...] -And with one simple, cleverer XPath we are able to extract all quotes from -the page. We could have constructed a loop over our first XPath to increase -the number of the last ``div``, but this would have been unnecessarily +And with one simple, cleverer XPath we are able to extract all quotes from +the page. We could have constructed a loop over our first XPath to increase +the number of the last ``div``, but this would have been unnecessarily complex and by simply constructing an XPath with ``has-class("text")`` -we were able to extract all quotes in one line. +we were able to extract all quotes in one line. -The `Inspector` has a lot of other helpful features, such as searching in the +The `Inspector` has a lot of other helpful features, such as searching in the source code or directly scrolling to an element you selected. Let's demonstrate -a use case: +a use case: -Say you want to find the ``Next`` button on the page. Type ``Next`` into the -search bar on the top right of the `Inspector`. You should get two results. -The first is a ``li`` tag with the ``class="next"``, the second the text +Say you want to find the ``Next`` button on the page. Type ``Next`` into the +search bar on the top right of the `Inspector`. You should get two results. +The first is a ``li`` tag with the ``class="next"``, the second the text of an ``a`` tag. Right click on the ``a`` tag and select ``Scroll into View``. If you hover over the tag, you'll see the button highlighted. From here -we could easily create a :ref:`Link Extractor ` to -follow the pagination. On a simple site such as this, there may not be +we could easily create a :ref:`Link Extractor ` to +follow the pagination. On a simple site such as this, there may not be the need to find an element visually but the ``Scroll into View`` function -can be quite useful on complex sites. +can be quite useful on complex sites. Note that the search bar can also be used to search for and test CSS -selectors. For example, you could search for ``span.text`` to find -all quote texts. Instead of a full text search, this searches for -exactly the ``span`` tag with the ``class="text"`` in the page. +selectors. For example, you could search for ``span.text`` to find +all quote texts. Instead of a full text search, this searches for +exactly the ``span`` tag with the ``class="text"`` in the page. .. _topics-network-tool: The Network-tool ================ While scraping you may come across dynamic webpages where some parts -of the page are loaded dynamically through multiple requests. While -this can be quite tricky, the `Network`-tool in the Developer Tools +of the page are loaded dynamically through multiple requests. While +this can be quite tricky, the `Network`-tool in the Developer Tools greatly facilitates this task. To demonstrate the Network-tool, let's -take a look at the page `quotes.toscrape.com/scroll`_. +take a look at the page `quotes.toscrape.com/scroll`_. -The page is quite similar to the basic `quotes.toscrape.com`_-page, -but instead of the above-mentioned ``Next`` button, the page -automatically loads new quotes when you scroll to the bottom. We -could go ahead and try out different XPaths directly, but instead +The page is quite similar to the basic `quotes.toscrape.com`_-page, +but instead of the above-mentioned ``Next`` button, the page +automatically loads new quotes when you scroll to the bottom. We +could go ahead and try out different XPaths directly, but instead we'll check another quite useful command from the Scrapy shell: .. skip: next @@ -179,9 +179,9 @@ we'll check another quite useful command from the Scrapy shell: (...) >>> view(response) -A browser window should open with the webpage but with one -crucial difference: Instead of the quotes we just see a greenish -bar with the word ``Loading...``. +A browser window should open with the webpage but with one +crucial difference: Instead of the quotes we just see a greenish +bar with the word ``Loading...``. .. image:: _images/network_01.png :width: 777 @@ -189,21 +189,21 @@ bar with the word ``Loading...``. :alt: Response from quotes.toscrape.com/scroll The ``view(response)`` command let's us view the response our -shell or later our spider receives from the server. Here we see -that some basic template is loaded which includes the title, +shell or later our spider receives from the server. Here we see +that some basic template is loaded which includes the title, the login-button and the footer, but the quotes are missing. This tells us that the quotes are being loaded from a different request -than ``quotes.toscrape/scroll``. +than ``quotes.toscrape/scroll``. -If you click on the ``Network`` tab, you will probably only see -two entries. The first thing we do is enable persistent logs by -clicking on ``Persist Logs``. If this option is disabled, the +If you click on the ``Network`` tab, you will probably only see +two entries. The first thing we do is enable persistent logs by +clicking on ``Persist Logs``. If this option is disabled, the log is automatically cleared each time you navigate to a different -page. Enabling this option is a good default, since it gives us -control on when to clear the logs. +page. Enabling this option is a good default, since it gives us +control on when to clear the logs. If we reload the page now, you'll see the log get populated with six -new requests. +new requests. .. image:: _images/network_02.png :width: 777 @@ -212,31 +212,31 @@ new requests. Here we see every request that has been made when reloading the page and can inspect each request and its response. So let's find out -where our quotes are coming from: +where our quotes are coming from: -First click on the request with the name ``scroll``. On the right +First click on the request with the name ``scroll``. On the right you can now inspect the request. In ``Headers`` you'll find details about the request headers, such as the URL, the method, the IP-address, and so on. We'll ignore the other tabs and click directly on ``Response``. -What you should see in the ``Preview`` pane is the rendered HTML-code, -that is exactly what we saw when we called ``view(response)`` in the -shell. Accordingly the ``type`` of the request in the log is ``html``. -The other requests have types like ``css`` or ``js``, but what -interests us is the one request called ``quotes?page=1`` with the -type ``json``. +What you should see in the ``Preview`` pane is the rendered HTML-code, +that is exactly what we saw when we called ``view(response)`` in the +shell. Accordingly the ``type`` of the request in the log is ``html``. +The other requests have types like ``css`` or ``js``, but what +interests us is the one request called ``quotes?page=1`` with the +type ``json``. -If we click on this request, we see that the request URL is +If we click on this request, we see that the request URL is ``http://quotes.toscrape.com/api/quotes?page=1`` and the response is a JSON-object that contains our quotes. We can also right-click -on the request and open ``Open in new tab`` to get a better overview. +on the request and open ``Open in new tab`` to get a better overview. .. image:: _images/network_03.png :width: 777 :height: 375 :alt: JSON-object returned from the quotes.toscrape API -With this response we can now easily parse the JSON-object and +With this response we can now easily parse the JSON-object and also request each page to get every quote on the site:: import scrapy @@ -255,17 +255,17 @@ also request each page to get every quote on the site:: yield {"quote": quote["text"]} if data["has_next"]: self.page += 1 - url = "http://quotes.toscrape.com/api/quotes?page={}".format(self.page) + url = f"http://quotes.toscrape.com/api/quotes?page={self.page}" yield scrapy.Request(url=url, callback=self.parse) -This spider starts at the first page of the quotes-API. With each -response, we parse the ``response.text`` and assign it to ``data``. -This lets us operate on the JSON-object like on a Python dictionary. +This spider starts at the first page of the quotes-API. With each +response, we parse the ``response.text`` and assign it to ``data``. +This lets us operate on the JSON-object like on a Python dictionary. We iterate through the ``quotes`` and print out the ``quote["text"]``. -If the handy ``has_next`` element is ``true`` (try loading +If the handy ``has_next`` element is ``true`` (try loading `quotes.toscrape.com/api/quotes?page=10`_ in your browser or a -page-number greater than 10), we increment the ``page`` attribute -and ``yield`` a new request, inserting the incremented page-number +page-number greater than 10), we increment the ``page`` attribute +and ``yield`` a new request, inserting the incremented page-number into our ``url``. .. _requests-from-curl: @@ -298,7 +298,7 @@ Note that to translate a cURL command into a Scrapy request, you may use `curl2scrapy `_. As you can see, with a few inspections in the `Network`-tool we -were able to easily replicate the dynamic requests of the scrolling +were able to easily replicate the dynamic requests of the scrolling functionality of the page. Crawling dynamic pages can be quite daunting and pages can be very complex, but it (mostly) boils down to identifying the correct request and replicating it in your spider. diff --git a/docs/topics/exporters.rst b/docs/topics/exporters.rst index 11ef5b2a650..793799a9aad 100644 --- a/docs/topics/exporters.rst +++ b/docs/topics/exporters.rst @@ -57,7 +57,7 @@ value of one of their fields:: adapter = ItemAdapter(item) year = adapter['year'] if year not in self.year_to_exporter: - f = open('{}.xml'.format(year), 'wb') + f = open(f'{year}.xml', 'wb') exporter = XmlItemExporter(f) exporter.start_exporting() self.year_to_exporter[year] = exporter @@ -98,7 +98,7 @@ Example:: import scrapy def serialize_price(value): - return '$ %s' % str(value) + return f'$ {str(value)}' class Product(scrapy.Item): name = scrapy.Field() @@ -122,7 +122,7 @@ Example:: def serialize_field(self, field, name, value): if field == 'price': - return '$ %s' % str(value) + return f'$ {str(value)}' return super(Product, self).serialize_field(field, name, value) .. _topics-exporters-reference: diff --git a/docs/topics/item-pipeline.rst b/docs/topics/item-pipeline.rst index cd6a6d47e75..6287ee0ad07 100644 --- a/docs/topics/item-pipeline.rst +++ b/docs/topics/item-pipeline.rst @@ -96,7 +96,7 @@ contain a price:: adapter['price'] = adapter['price'] * self.vat_factor return item else: - raise DropItem("Missing price in %s" % item) + raise DropItem(f"Missing price in {item}") Write items to a JSON file @@ -211,7 +211,7 @@ item. # Save screenshot to file, filename will be hash of url. url = adapter["url"] url_hash = hashlib.md5(url.encode("utf8")).hexdigest() - filename = "{}.png".format(url_hash) + filename = f"{url_hash}.png" with open(filename, "wb") as f: f.write(response.body) @@ -240,7 +240,7 @@ returns multiples items with the same id:: def process_item(self, item, spider): adapter = ItemAdapter(item) if adapter['id'] in self.ids_seen: - raise DropItem("Duplicate item found: %r" % item) + raise DropItem(f"Duplicate item found: {item!r}") else: self.ids_seen.add(adapter['id']) return item diff --git a/docs/topics/leaks.rst b/docs/topics/leaks.rst index d2f7edf0a75..b895b95cbc1 100644 --- a/docs/topics/leaks.rst +++ b/docs/topics/leaks.rst @@ -102,7 +102,7 @@ A real example Let's see a concrete example of a hypothetical case of memory leaks. Suppose we have some spider with a line similar to this one:: - return Request("http://www.somenastyspider.com/product.php?pid=%d" % product_id, + return Request(f"http://www.somenastyspider.com/product.php?pid={product_id}", callback=self.parse, cb_kwargs={'referer': response}) That line is passing a response reference inside a request which effectively diff --git a/docs/topics/selectors.rst b/docs/topics/selectors.rst index 9e2c6ba428f..b576fde91f1 100644 --- a/docs/topics/selectors.rst +++ b/docs/topics/selectors.rst @@ -328,8 +328,9 @@ too. Here's an example: 'Name: My image 5
'] >>> for index, link in enumerate(links): -... args = (index, link.xpath('@href').get(), link.xpath('img/@src').get()) -... print('Link number %d points to url %r and image %r' % args) +... href_xpath = link.xpath('@href').get() +... img_xpath = link.xpath('img/@src').get() +... print(f'Link number {index} points to url {href_xpath!r} and image {img_xpath!r}') Link number 0 points to url 'image1.html' and image 'image1_thumb.jpg' Link number 1 points to url 'image2.html' and image 'image2_thumb.jpg' Link number 2 points to url 'image3.html' and image 'image3_thumb.jpg' @@ -822,7 +823,7 @@ with groups of itemscopes and corresponding itemprops:: ... props = scope.xpath(''' ... set:difference(./descendant::*/@itemprop, ... .//*[@itemscope]/*/@itemprop)''') - ... print(" properties: %s" % (props.getall())) + ... print(f" properties: {props.getall()}") ... print("") current scope: ['http://schema.org/Product'] diff --git a/docs/topics/settings.rst b/docs/topics/settings.rst index 618b9989e26..22d60f87cf3 100644 --- a/docs/topics/settings.rst +++ b/docs/topics/settings.rst @@ -110,7 +110,7 @@ In a spider, the settings are available through ``self.settings``:: start_urls = ['http://example.com'] def parse(self, response): - print("Existing settings: %s" % self.settings.attributes.keys()) + print(f"Existing settings: {self.settings.attributes.keys()}") .. note:: The ``settings`` attribute is set in the base Spider class after the spider diff --git a/docs/topics/spiders.rst b/docs/topics/spiders.rst index e50e4aa0a96..2056664c7ca 100644 --- a/docs/topics/spiders.rst +++ b/docs/topics/spiders.rst @@ -279,7 +279,7 @@ Spiders can access arguments in their `__init__` methods:: def __init__(self, category=None, *args, **kwargs): super(MySpider, self).__init__(*args, **kwargs) - self.start_urls = ['http://www.example.com/categories/%s' % category] + self.start_urls = [f'http://www.example.com/categories/{category}'] # ... The default `__init__` method will take any spider arguments @@ -292,7 +292,7 @@ The above example can also be written as follows:: name = 'myspider' def start_requests(self): - yield scrapy.Request('http://www.example.com/categories/%s' % self.category) + yield scrapy.Request(f'http://www.example.com/categories/{self.category}') Keep in mind that spider arguments are only strings. The spider will not do any parsing on its own. diff --git a/extras/qps-bench-server.py b/extras/qps-bench-server.py index da7a0022b65..a6472b1bad7 100755 --- a/extras/qps-bench-server.py +++ b/extras/qps-bench-server.py @@ -37,7 +37,7 @@ def render(self, request): if now - self.lastmark >= 3: self.lastmark = now qps = len(self.tail) / sum(self.tail) - print('samplesize={0} concurrent={1} qps={2:0.2f}'.format(len(self.tail), self.concurrent, qps)) + print(f'samplesize={len(self.tail)} concurrent={self.concurrent} qps={qps:0.2f}') if 'latency' in request.args: latency = float(request.args['latency'][0]) diff --git a/extras/qpsclient.py b/extras/qpsclient.py index fe1f96cbb1c..f9fb703424b 100644 --- a/extras/qpsclient.py +++ b/extras/qpsclient.py @@ -37,11 +37,11 @@ def __init__(self, *a, **kw): def start_requests(self): url = self.benchurl if self.latency is not None: - url += '?latency={0}'.format(self.latency) + url += f'?latency={self.latency}' slots = int(self.slots) if slots > 1: - urls = [url.replace('localhost', '127.0.0.%d' % (x + 1)) for x in range(slots)] + urls = [url.replace('localhost', f'127.0.0.{x + 1}') for x in range(slots)] else: urls = [url] diff --git a/scrapy/cmdline.py b/scrapy/cmdline.py index 3e88536e4ad..91482ce0156 100644 --- a/scrapy/cmdline.py +++ b/scrapy/cmdline.py @@ -44,7 +44,7 @@ def _get_commands_from_entry_points(inproject, group='scrapy.commands'): if inspect.isclass(obj): cmds[entry_point.name] = obj() else: - raise Exception("Invalid entry point %s" % entry_point.name) + raise Exception(f"Invalid entry point {entry_point.name}") return cmds @@ -67,11 +67,11 @@ def _pop_command_name(argv): def _print_header(settings, inproject): + version = scrapy.__version__ if inproject: - print("Scrapy %s - project: %s\n" % (scrapy.__version__, - settings['BOT_NAME'])) + print(f"Scrapy {version} - project: {settings['BOT_NAME']}\n") else: - print("Scrapy %s - no active project\n" % scrapy.__version__) + print(f"Scrapy {version} - no active project\n") def _print_commands(settings, inproject): @@ -81,7 +81,7 @@ def _print_commands(settings, inproject): print("Available commands:") cmds = _get_commands_dict(settings, inproject) for cmdname, cmdclass in sorted(cmds.items()): - print(" %-13s %s" % (cmdname, cmdclass.short_desc())) + print(f" {cmdname:<13} {cmdclass.short_desc()}") if not inproject: print() print(" [ more ] More commands available when run from project directory") @@ -91,7 +91,7 @@ def _print_commands(settings, inproject): def _print_unknown_command(settings, cmdname, inproject): _print_header(settings, inproject) - print("Unknown command: %s\n" % cmdname) + print(f"Unknown command: {cmdname}\n") print('Use "scrapy" to see available commands') @@ -133,7 +133,7 @@ def execute(argv=None, settings=None): sys.exit(2) cmd = cmds[cmdname] - parser.usage = "scrapy %s %s" % (cmdname, cmd.syntax()) + parser.usage = f"scrapy {cmdname} {cmd.syntax()}" parser.description = cmd.long_desc() settings.setdict(cmd.default_settings, priority='command') cmd.settings = settings @@ -155,7 +155,7 @@ def _run_command(cmd, args, opts): def _run_command_profiled(cmd, args, opts): if opts.profile: - sys.stderr.write("scrapy: writing cProfile stats to %r\n" % opts.profile) + sys.stderr.write(f"scrapy: writing cProfile stats to {opts.profile!r}\n") loc = locals() p = cProfile.Profile() p.runctx('cmd.run(args, opts)', globals(), loc) diff --git a/scrapy/commands/__init__.py b/scrapy/commands/__init__.py index cfd940fe7e6..23ccffcd96f 100644 --- a/scrapy/commands/__init__.py +++ b/scrapy/commands/__init__.py @@ -61,7 +61,7 @@ def add_options(self, parser): group.add_option("--logfile", metavar="FILE", help="log file. if omitted stderr will be used") group.add_option("-L", "--loglevel", metavar="LEVEL", default=None, - help="log level (default: %s)" % self.settings['LOG_LEVEL']) + help=f"log level (default: {self.settings['LOG_LEVEL']})") group.add_option("--nolog", action="store_true", help="disable logging completely") group.add_option("--profile", metavar="FILE", default=None, diff --git a/scrapy/commands/bench.py b/scrapy/commands/bench.py index c9f3b38e073..999c987eac0 100644 --- a/scrapy/commands/bench.py +++ b/scrapy/commands/bench.py @@ -50,7 +50,7 @@ class _BenchSpider(scrapy.Spider): def start_requests(self): qargs = {'total': self.total, 'show': self.show} - url = '{}?{}'.format(self.baseurl, urlencode(qargs, doseq=1)) + url = f'{self.baseurl}?{urlencode(qargs, doseq=1)}' return [scrapy.Request(url, dont_filter=True)] def parse(self, response): diff --git a/scrapy/commands/check.py b/scrapy/commands/check.py index 09a76ca7afc..7e848dc976a 100644 --- a/scrapy/commands/check.py +++ b/scrapy/commands/check.py @@ -17,7 +17,7 @@ def printSummary(self, start, stop): plural = "s" if run != 1 else "" writeln(self.separator2) - writeln("Ran %d contract%s in %.3fs" % (run, plural, stop - start)) + writeln(f"Ran {run} contract{plural} in {stop - start:.3f}") writeln() infos = [] @@ -25,14 +25,14 @@ def printSummary(self, start, stop): write("FAILED") failed, errored = map(len, (self.failures, self.errors)) if failed: - infos.append("failures=%d" % failed) + infos.append(f"failures={failed}") if errored: - infos.append("errors=%d" % errored) + infos.append(f"errors={errored}") else: write("OK") if infos: - writeln(" (%s)" % (", ".join(infos),)) + writeln(f" ({', '.join(infos)})") else: write("\n") @@ -85,7 +85,7 @@ def run(self, args, opts): continue print(spider) for method in sorted(methods): - print(' * %s' % method) + print(f' * {method}') else: start = time.time() self.crawler_process.start() diff --git a/scrapy/commands/edit.py b/scrapy/commands/edit.py index 25d843a53ea..177b2014301 100644 --- a/scrapy/commands/edit.py +++ b/scrapy/commands/edit.py @@ -32,8 +32,8 @@ def run(self, args, opts): try: spidercls = self.crawler_process.spider_loader.load(args[0]) except KeyError: - return self._err("Spider not found: %s" % args[0]) + return self._err(f"Spider not found: {args[0]}") sfile = sys.modules[spidercls.__module__].__file__ sfile = sfile.replace('.pyc', '.py') - self.exitcode = os.system('%s "%s"' % (editor, sfile)) + self.exitcode = os.system(f'{editor} "{sfile}"') diff --git a/scrapy/commands/genspider.py b/scrapy/commands/genspider.py index 74a077d1b7b..72248bdede4 100644 --- a/scrapy/commands/genspider.py +++ b/scrapy/commands/genspider.py @@ -73,17 +73,18 @@ def run(self, args, opts): if template_file: self._genspider(module, name, domain, opts.template, template_file) if opts.edit: - self.exitcode = os.system('scrapy edit "%s"' % name) + self.exitcode = os.system(f'scrapy edit "{name}"') def _genspider(self, module, name, domain, template_name, template_file): """Generate the spider module, based on the given template""" + capitalized_module = ''.join(s.capitalize() for s in module.split('_')) tvars = { 'project_name': self.settings.get('BOT_NAME'), 'ProjectName': string_camelcase(self.settings.get('BOT_NAME')), 'module': module, 'name': name, 'domain': domain, - 'classname': '%sSpider' % ''.join(s.capitalize() for s in module.split('_')) + 'classname': f'{capitalized_module}Spider' } if self.settings.get('NEWSPIDER_MODULE'): spiders_module = import_module(self.settings['NEWSPIDER_MODULE']) @@ -91,32 +92,32 @@ def _genspider(self, module, name, domain, template_name, template_file): else: spiders_module = None spiders_dir = "." - spider_file = "%s.py" % join(spiders_dir, module) + spider_file = f"{join(spiders_dir, module)}.py" shutil.copyfile(template_file, spider_file) render_templatefile(spider_file, **tvars) - print("Created spider %r using template %r " - % (name, template_name), end=('' if spiders_module else '\n')) + print(f"Created spider {name!r} using template {template_name!r} ", + end=('' if spiders_module else '\n')) if spiders_module: - print("in module:\n %s.%s" % (spiders_module.__name__, module)) + print("in module:\n {spiders_module.__name__}.{module}") def _find_template(self, template): - template_file = join(self.templates_dir, '%s.tmpl' % template) + template_file = join(self.templates_dir, f'{template}.tmpl') if exists(template_file): return template_file - print("Unable to find template: %s\n" % template) + print(f"Unable to find template: {template}\n") print('Use "scrapy genspider --list" to see all available templates.') def _list_templates(self): print("Available templates:") for filename in sorted(os.listdir(self.templates_dir)): if filename.endswith('.tmpl'): - print(" %s" % splitext(filename)[0]) + print(f" {splitext(filename)[0]}") def _spider_exists(self, name): if not self.settings.get('NEWSPIDER_MODULE'): # if run as a standalone command and file with same filename already exists if exists(name + ".py"): - print("%s already exists" % (abspath(name + ".py"))) + print(f"{abspath(name + '.py')} already exists") return True return False @@ -126,8 +127,8 @@ def _spider_exists(self, name): pass else: # if spider with same name exists - print("Spider %r already exists in module:" % name) - print(" %s" % spidercls.__module__) + print(f"Spider {name!r} already exists in module:") + print(f" {spidercls.__module__}") return True # a file with the same name exists in the target directory @@ -135,7 +136,7 @@ def _spider_exists(self, name): spiders_dir = dirname(spiders_module.__file__) spiders_dir_abs = abspath(spiders_dir) if exists(join(spiders_dir_abs, name + ".py")): - print("%s already exists" % (join(spiders_dir_abs, (name + ".py")))) + print(f"{join(spiders_dir_abs, (name + '.py'))} already exists") return True return False diff --git a/scrapy/commands/parse.py b/scrapy/commands/parse.py index abc8ba9ff7f..83ee074daf4 100644 --- a/scrapy/commands/parse.py +++ b/scrapy/commands/parse.py @@ -96,13 +96,13 @@ def print_results(self, opts): if opts.verbose: for level in range(1, self.max_level + 1): - print('\n>>> DEPTH LEVEL: %s <<<' % level) + print(f'\n>>> DEPTH LEVEL: {level} <<<') if not opts.noitems: self.print_items(level, colour) if not opts.nolinks: self.print_requests(level, colour) else: - print('\n>>> STATUS DEPTH LEVEL %s <<<' % self.max_level) + print(f'\n>>> STATUS DEPTH LEVEL {self.max_level} <<<') if not opts.noitems: self.print_items(colour=colour) if not opts.nolinks: diff --git a/scrapy/commands/runspider.py b/scrapy/commands/runspider.py index befee021b6b..aedd8c2ce01 100644 --- a/scrapy/commands/runspider.py +++ b/scrapy/commands/runspider.py @@ -12,7 +12,7 @@ def _import_file(filepath): dirname, file = os.path.split(abspath) fname, fext = os.path.splitext(file) if fext != '.py': - raise ValueError("Not a Python source file: %s" % abspath) + raise ValueError(f"Not a Python source file: {abspath}") if dirname: sys.path = [dirname] + sys.path try: @@ -42,14 +42,14 @@ def run(self, args, opts): raise UsageError() filename = args[0] if not os.path.exists(filename): - raise UsageError("File not found: %s\n" % filename) + raise UsageError(f"File not found: {filename}\n") try: module = _import_file(filename) except (ImportError, ValueError) as e: - raise UsageError("Unable to load %r: %s\n" % (filename, e)) + raise UsageError(f"Unable to load {filename!r}: {e}\n") spclasses = list(iter_spider_classes(module)) if not spclasses: - raise UsageError("No spider found in file: %s\n" % filename) + raise UsageError(f"No spider found in file: {filename}\n") spidercls = spclasses.pop() self.crawler_process.crawl(spidercls, **opts.spargs) diff --git a/scrapy/commands/startproject.py b/scrapy/commands/startproject.py index e5158d99384..1d73fa0cb70 100644 --- a/scrapy/commands/startproject.py +++ b/scrapy/commands/startproject.py @@ -52,7 +52,7 @@ def _module_exists(module_name): print('Error: Project names must begin with a letter and contain' ' only\nletters, numbers and underscores') elif _module_exists(project_name): - print('Error: Module %r already exists' % project_name) + print(f'Error: Module {project_name!r} already exists') else: return True return False @@ -100,7 +100,7 @@ def run(self, args, opts): if exists(join(project_dir, 'scrapy.cfg')): self.exitcode = 1 - print('Error: scrapy.cfg already exists in %s' % abspath(project_dir)) + print(f'Error: scrapy.cfg already exists in {abspath(project_dir)}') return if not self._is_valid_name(project_name): @@ -113,11 +113,11 @@ def run(self, args, opts): path = join(*paths) tplfile = join(project_dir, string.Template(path).substitute(project_name=project_name)) render_templatefile(tplfile, project_name=project_name, ProjectName=string_camelcase(project_name)) - print("New Scrapy project '%s', using template directory '%s', " - "created in:" % (project_name, self.templates_dir)) - print(" %s\n" % abspath(project_dir)) + print(f"New Scrapy project '{project_name}', using template directory " + f"'{self.templates_dir}', created in:") + print(f" {abspath(project_dir)}\n") print("You can start your first spider with:") - print(" cd %s" % project_dir) + print(f" cd {project_dir}") print(" scrapy genspider example example.com") @property diff --git a/scrapy/commands/version.py b/scrapy/commands/version.py index d0ea72a6775..dc80870431c 100644 --- a/scrapy/commands/version.py +++ b/scrapy/commands/version.py @@ -23,8 +23,8 @@ def run(self, args, opts): if opts.verbose: versions = scrapy_components_versions() width = max(len(n) for (n, _) in versions) - patt = "%-{}s : %s".format(width) + patt = f"%-{width}s : %s" for name, version in versions: print(patt % (name, version)) else: - print("Scrapy %s" % scrapy.__version__) + print(f"Scrapy {scrapy.__version__}") diff --git a/scrapy/contracts/__init__.py b/scrapy/contracts/__init__.py index 5af3831a29d..db0a56e5642 100644 --- a/scrapy/contracts/__init__.py +++ b/scrapy/contracts/__init__.py @@ -112,8 +112,8 @@ class Contract: request_cls = None def __init__(self, method, *args): - self.testcase_pre = _create_testcase(method, '@%s pre-hook' % self.name) - self.testcase_post = _create_testcase(method, '@%s post-hook' % self.name) + self.testcase_pre = _create_testcase(method, f'@{self.name} pre-hook') + self.testcase_post = _create_testcase(method, f'@{self.name} post-hook') self.args = args def add_pre_hook(self, request, results): @@ -172,8 +172,8 @@ def _create_testcase(method, desc): class ContractTestCase(TestCase): def __str__(_self): - return "[%s] %s (%s)" % (spider, method.__name__, desc) + return f"[{spider}] {method.__name__} ({desc})" - name = '%s_%s' % (spider, method.__name__) + name = f'{spider}_{method.__name__}' setattr(ContractTestCase, name, lambda x: x) return ContractTestCase(name) diff --git a/scrapy/contracts/default.py b/scrapy/contracts/default.py index cfdcc7c252b..9704f525303 100644 --- a/scrapy/contracts/default.py +++ b/scrapy/contracts/default.py @@ -60,8 +60,7 @@ def __init__(self, *args, **kwargs): if len(self.args) not in [1, 2, 3]: raise ValueError( - "Incorrect argument quantity: expected 1, 2 or 3, got %i" - % len(self.args) + f"Incorrect argument quantity: expected 1, 2 or 3, got {len(self.args)}" ) self.obj_name = self.args[0] or None self.obj_type_verifier = self.object_type_verifiers[self.obj_name] @@ -88,10 +87,9 @@ def post_process(self, output): if self.min_bound == self.max_bound: expected = self.min_bound else: - expected = '%s..%s' % (self.min_bound, self.max_bound) + expected = f'{self.min_bound}..{self.max_bound}' - raise ContractFail("Returned %s %s, expected %s" % - (occurrences, self.obj_name, expected)) + raise ContractFail(f"Returned {occurrences} {self.obj_name}, expected {expected}") class ScrapesContract(Contract): @@ -106,5 +104,5 @@ def post_process(self, output): if is_item(x): missing = [arg for arg in self.args if arg not in ItemAdapter(x)] if missing: - missing_str = ", ".join(missing) - raise ContractFail("Missing fields: %s" % missing_str) + missing_fields = ", ".join(missing) + raise ContractFail(f"Missing fields: {missing_fields}") diff --git a/scrapy/core/downloader/__init__.py b/scrapy/core/downloader/__init__.py index dc5cf1ab80d..12a9db6dd4a 100644 --- a/scrapy/core/downloader/__init__.py +++ b/scrapy/core/downloader/__init__.py @@ -41,17 +41,17 @@ def close(self): def __repr__(self): cls_name = self.__class__.__name__ - return "%s(concurrency=%r, delay=%0.2f, randomize_delay=%r)" % ( - cls_name, self.concurrency, self.delay, self.randomize_delay) + return (f"{cls_name}(concurrency={self.concurrency!r}, " + f"delay={self.delay:.2f}, " + f"randomize_delay={self.randomize_delay!r}") def __str__(self): return ( - "" % ( - self.concurrency, self.delay, self.randomize_delay, - len(self.active), len(self.queue), len(self.transferring), - datetime.fromtimestamp(self.lastseen).isoformat() - ) + f"" ) diff --git a/scrapy/core/downloader/handlers/__init__.py b/scrapy/core/downloader/handlers/__init__.py index e8668097884..73aeb23526a 100644 --- a/scrapy/core/downloader/handlers/__init__.py +++ b/scrapy/core/downloader/handlers/__init__.py @@ -71,8 +71,7 @@ def download_request(self, request, spider): scheme = urlparse_cached(request).scheme handler = self._get_handler(scheme) if not handler: - raise NotSupported("Unsupported URL scheme '%s': %s" % - (scheme, self._notconfigured[scheme])) + raise NotSupported(f"Unsupported URL scheme '{scheme}': {self._notconfigured[scheme]}") return handler.download_request(request, spider) @defer.inlineCallbacks diff --git a/scrapy/core/downloader/handlers/http11.py b/scrapy/core/downloader/handlers/http11.py index 25e80098407..1b041c8a833 100644 --- a/scrapy/core/downloader/handlers/http11.py +++ b/scrapy/core/downloader/handlers/http11.py @@ -60,11 +60,11 @@ def __init__(self, settings, crawler=None): settings=settings, crawler=crawler, ) - msg = """ - '%s' does not accept `method` argument (type OpenSSL.SSL method,\ - e.g. OpenSSL.SSL.SSLv23_METHOD) and/or `tls_verbose_logging` argument and/or `tls_ciphers` argument.\ - Please upgrade your context factory class to handle them or ignore them.""" % ( - settings['DOWNLOADER_CLIENTCONTEXTFACTORY'],) + msg = f""" + '{settings["DOWNLOADER_CLIENTCONTEXTFACTORY"]}' does not accept `method` \ + argument (type OpenSSL.SSL method, e.g. OpenSSL.SSL.SSLv23_METHOD) and/or \ + `tls_verbose_logging` argument and/or `tls_ciphers` argument.\ + Please upgrade your context factory class to handle them or ignore them.""" warnings.warn(msg) self._default_maxsize = settings.getint('DOWNLOAD_MAXSIZE') self._default_warnsize = settings.getint('DOWNLOAD_WARNSIZE') @@ -169,8 +169,9 @@ def processProxyResponse(self, rcvd_bytes): else: extra = rcvd_bytes[:32] self._tunnelReadyDeferred.errback( - TunnelError('Could not open CONNECT tunnel with proxy %s:%s [%r]' % ( - self._host, self._port, extra))) + TunnelError('Could not open CONNECT tunnel with proxy ' + f'{self._host}:{self._port} [{extra!r}]') + ) def connectFailed(self, reason): """Propagates the errback to the appropriate deferred.""" @@ -371,7 +372,7 @@ def _cb_timeout(self, result, request, url, timeout): if self._txresponse: self._txresponse._transport.stopProducing() - raise TimeoutError("Getting %s took longer than %s seconds." % (url, timeout)) + raise TimeoutError(f"Getting {url} took longer than {timeout} seconds.") def _cb_latency(self, result, request, start_time): request.meta['download_latency'] = time() - start_time diff --git a/scrapy/core/downloader/handlers/s3.py b/scrapy/core/downloader/handlers/s3.py index 8f63ad97427..0ef977893db 100644 --- a/scrapy/core/downloader/handlers/s3.py +++ b/scrapy/core/downloader/handlers/s3.py @@ -56,7 +56,7 @@ def __init__(self, settings, *, import botocore.credentials kw.pop('anon', None) if kw: - raise TypeError('Unexpected keyword arguments: %s' % kw) + raise TypeError(f'Unexpected keyword arguments: {kw}') if not self.anon: SignerCls = botocore.auth.AUTH_TYPE_MAPS['s3'] self._signer = SignerCls(botocore.credentials.Credentials( @@ -85,14 +85,14 @@ def download_request(self, request, spider): scheme = 'https' if request.meta.get('is_secure') else 'http' bucket = p.hostname path = p.path + '?' + p.query if p.query else p.path - url = '%s://%s.s3.amazonaws.com%s' % (scheme, bucket, path) + url = f'{scheme}://{bucket}.s3.amazonaws.com{path}' if self.anon: request = request.replace(url=url) elif self._signer is not None: import botocore.awsrequest awsrequest = botocore.awsrequest.AWSRequest( method=request.method, - url='%s://s3.amazonaws.com/%s%s' % (scheme, bucket, path), + url=f'{scheme}://s3.amazonaws.com/{bucket}{path}', headers=request.headers.to_unicode_dict(), data=request.body) self._signer.add_auth(awsrequest) diff --git a/scrapy/core/downloader/middleware.py b/scrapy/core/downloader/middleware.py index 4c2eea5220f..b0e612e43df 100644 --- a/scrapy/core/downloader/middleware.py +++ b/scrapy/core/downloader/middleware.py @@ -36,8 +36,9 @@ def process_request(request): response = yield deferred_from_coro(method(request=request, spider=spider)) if response is not None and not isinstance(response, (Response, Request)): raise _InvalidOutput( - "Middleware %s.process_request must return None, Response or Request, got %s" - % (method.__self__.__class__.__name__, response.__class__.__name__) + f"Middleware {method.__self__.__class__.__name__}" + ".process_request must return None, Response or " + f"Request, got {response.__class__.__name__}" ) if response: return response @@ -54,8 +55,9 @@ def process_response(response): response = yield deferred_from_coro(method(request=request, response=response, spider=spider)) if not isinstance(response, (Response, Request)): raise _InvalidOutput( - "Middleware %s.process_response must return Response or Request, got %s" - % (method.__self__.__class__.__name__, type(response)) + f"Middleware {method.__self__.__class__.__name__}" + ".process_response must return Response or Request, " + f"got {type(response)}" ) if isinstance(response, Request): return response @@ -68,8 +70,9 @@ def process_exception(failure): response = yield deferred_from_coro(method(request=request, exception=exception, spider=spider)) if response is not None and not isinstance(response, (Response, Request)): raise _InvalidOutput( - "Middleware %s.process_exception must return None, Response or Request, got %s" - % (method.__self__.__class__.__name__, type(response)) + f"Middleware {method.__self__.__class__.__name__}" + ".process_exception must return None, Response or " + f"Request, got {type(response)}" ) if response: return response diff --git a/scrapy/core/downloader/webclient.py b/scrapy/core/downloader/webclient.py index b2b96f1eab0..c1368339378 100644 --- a/scrapy/core/downloader/webclient.py +++ b/scrapy/core/downloader/webclient.py @@ -88,8 +88,8 @@ def timeout(self): self.transport.stopProducing() self.factory.noPage( - defer.TimeoutError("Getting %s took longer than %s seconds." - % (self.factory.url, self.factory.timeout))) + defer.TimeoutError(f"Getting {self.factory.url} took longer " + f"than {self.factory.timeout} seconds.")) # This class used to inherit from Twisted’s @@ -155,7 +155,7 @@ def __init__(self, request, timeout=180): self.headers['Content-Length'] = 0 def __repr__(self): - return "<%s: %s>" % (self.__class__.__name__, self.url) + return f"<{self.__class__.__name__}: {self.url}>" def _cancelTimeout(self, result, timeoutCall): if timeoutCall.active(): diff --git a/scrapy/core/engine.py b/scrapy/core/engine.py index 5e0dfe37c31..93bcdb49a9e 100644 --- a/scrapy/core/engine.py +++ b/scrapy/core/engine.py @@ -171,8 +171,8 @@ def _next_request_from_scheduler(self, spider): def _handle_downloader_output(self, response, request, spider): if not isinstance(response, (Request, Response, Failure)): raise TypeError( - "Incorrect type: expected Request, Response or Failure, got %s: %r" - % (type(response), response) + "Incorrect type: expected Request, Response or Failure, got " + f"{type(response)}: {response!r}" ) # downloader middleware can return requests (for example, redirects) if isinstance(response, Request): @@ -214,7 +214,7 @@ def has_capacity(self): def crawl(self, request, spider): if spider not in self.open_spiders: - raise RuntimeError("Spider %r not opened when crawling: %s" % (spider.name, request)) + raise RuntimeError(f"Spider {spider.name!r} not opened when crawling: {request}") self.schedule(request, spider) self.slot.nextcall.schedule() @@ -239,8 +239,8 @@ def _download(self, request, spider): def _on_success(response): if not isinstance(response, (Response, Request)): raise TypeError( - "Incorrect type: expected Response or Request, got %s: %r" - % (type(response), response) + "Incorrect type: expected Response or Request, got " + f"{type(response)}: {response!r}" ) if isinstance(response, Response): if response.request is None: @@ -268,7 +268,7 @@ def _on_complete(_): @defer.inlineCallbacks def open_spider(self, spider, start_requests=(), close_if_idle=True): if not self.has_capacity(): - raise RuntimeError("No free spider slot when opening %r" % spider.name) + raise RuntimeError(f"No free spider slot when opening {spider.name!r}") logger.info("Spider opened", extra={'spider': spider}) nextcall = CallLaterOnce(self._next_request, spider) scheduler = self.scheduler_cls.from_crawler(self.crawler) diff --git a/scrapy/core/scraper.py b/scrapy/core/scraper.py index 20bdb22a1ec..0d3e3450f1e 100644 --- a/scrapy/core/scraper.py +++ b/scrapy/core/scraper.py @@ -125,7 +125,7 @@ def _scrape(self, result, request, spider): Handle the downloaded response or failure through the spider callback/errback """ if not isinstance(result, (Response, Failure)): - raise TypeError("Incorrect type: expected Response or Failure, got %s: %r" % (type(result), result)) + raise TypeError(f"Incorrect type: expected Response or Failure, got {type(result)}: {result!r}") dfd = self._scrape2(result, request, spider) # returns spider's processed output dfd.addErrback(self.handle_spider_error, request, result, spider) dfd.addCallback(self.handle_spider_output, request, result, spider) @@ -173,7 +173,7 @@ def handle_spider_error(self, _failure, request, response, spider): spider=spider ) self.crawler.stats.inc_value( - "spider_exceptions/%s" % _failure.value.__class__.__name__, + f"spider_exceptions/{_failure.value.__class__.__name__}", spider=spider ) diff --git a/scrapy/core/spidermw.py b/scrapy/core/spidermw.py index 5a99b96bed2..763e0cdf626 100644 --- a/scrapy/core/spidermw.py +++ b/scrapy/core/spidermw.py @@ -19,10 +19,7 @@ def _isiterable(possible_iterator): def _fname(f): - return "{}.{}".format( - f.__self__.__class__.__name__, - f.__func__.__name__ - ) + return f"{f.__self__.__class__.__name__}.{f.__func__.__name__}" class SpiderMiddlewareManager(MiddlewareManager): @@ -51,8 +48,9 @@ def process_spider_input(response): try: result = method(response=response, spider=spider) if result is not None: - msg = "Middleware {} must return None or raise an exception, got {}" - raise _InvalidOutput(msg.format(_fname(method), type(result))) + msg = (f"Middleware {_fname(method)} must return None " + f"or raise an exception, got {type(result)}") + raise _InvalidOutput(msg) except _InvalidOutput: raise except Exception: @@ -86,8 +84,9 @@ def process_spider_exception(_failure, start_index=0): elif result is None: continue else: - msg = "Middleware {} must return None or an iterable, got {}" - raise _InvalidOutput(msg.format(_fname(method), type(result))) + msg = (f"Middleware {_fname(method)} must return None " + f"or an iterable, got {type(result)}") + raise _InvalidOutput(msg) return _failure def process_spider_output(result, start_index=0): @@ -110,8 +109,9 @@ def process_spider_output(result, start_index=0): if _isiterable(result): result = _evaluate_iterable(result, method_index + 1, recovered) else: - msg = "Middleware {} must return an iterable, got {}" - raise _InvalidOutput(msg.format(_fname(method), type(result))) + msg = (f"Middleware {_fname(method)} must return an " + f"iterable, got {type(result)}") + raise _InvalidOutput(msg) return MutableChain(result, recovered) diff --git a/scrapy/downloadermiddlewares/cookies.py b/scrapy/downloadermiddlewares/cookies.py index 77048f3890a..e2b7dd9011f 100644 --- a/scrapy/downloadermiddlewares/cookies.py +++ b/scrapy/downloadermiddlewares/cookies.py @@ -54,8 +54,8 @@ def _debug_cookie(self, request, spider): cl = [to_unicode(c, errors='replace') for c in request.headers.getlist('Cookie')] if cl: - cookies = "\n".join("Cookie: {}\n".format(c) for c in cl) - msg = "Sending cookies to: {}\n{}".format(request, cookies) + cookies = "\n".join(f"Cookie: {c}\n" for c in cl) + msg = f"Sending cookies to: {request}\n{cookies}" logger.debug(msg, extra={'spider': spider}) def _debug_set_cookie(self, response, spider): @@ -63,8 +63,8 @@ def _debug_set_cookie(self, response, spider): cl = [to_unicode(c, errors='replace') for c in response.headers.getlist('Set-Cookie')] if cl: - cookies = "\n".join("Set-Cookie: {}\n".format(c) for c in cl) - msg = "Received cookies from: {}\n{}".format(response, cookies) + cookies = "\n".join(f"Set-Cookie: {c}\n" for c in cl) + msg = f"Received cookies from: {response}\n{cookies}" logger.debug(msg, extra={'spider': spider}) def _format_cookie(self, cookie, request): @@ -90,9 +90,9 @@ def _format_cookie(self, cookie, request): request, cookie) decoded[key] = cookie[key].decode("latin1", errors="replace") - cookie_str = "{}={}".format(decoded.pop("name"), decoded.pop("value")) + cookie_str = f"{decoded.pop('name')}={decoded.pop('value')}" for key, value in decoded.items(): # path, domain - cookie_str += "; {}={}".format(key.capitalize(), value) + cookie_str += f"; {key.capitalize()}={value}" return cookie_str def _get_request_cookies(self, jar, request): diff --git a/scrapy/downloadermiddlewares/httpproxy.py b/scrapy/downloadermiddlewares/httpproxy.py index da89d3e9bbc..04da1131162 100644 --- a/scrapy/downloadermiddlewares/httpproxy.py +++ b/scrapy/downloadermiddlewares/httpproxy.py @@ -24,7 +24,7 @@ def from_crawler(cls, crawler): def _basic_auth_header(self, username, password): user_pass = to_bytes( - '%s:%s' % (unquote(username), unquote(password)), + f'{unquote(username)}:{unquote(password)}', encoding=self.auth_encoding) return base64.b64encode(user_pass) diff --git a/scrapy/downloadermiddlewares/retry.py b/scrapy/downloadermiddlewares/retry.py index 67be8c28249..51fe592545b 100644 --- a/scrapy/downloadermiddlewares/retry.py +++ b/scrapy/downloadermiddlewares/retry.py @@ -88,7 +88,7 @@ def _retry(self, request, reason, spider): reason = global_object_name(reason.__class__) stats.inc_value('retry/count') - stats.inc_value('retry/reason_count/%s' % reason) + stats.inc_value(f'retry/reason_count/{reason}') return retryreq else: stats.inc_value('retry/max_reached') diff --git a/scrapy/downloadermiddlewares/robotstxt.py b/scrapy/downloadermiddlewares/robotstxt.py index 7f18b2bf2f4..d6da5553500 100644 --- a/scrapy/downloadermiddlewares/robotstxt.py +++ b/scrapy/downloadermiddlewares/robotstxt.py @@ -61,7 +61,7 @@ def robot_parser(self, request, spider): if netloc not in self._parsers: self._parsers[netloc] = Deferred() - robotsurl = "%s://%s/robots.txt" % (url.scheme, url.netloc) + robotsurl = f"{url.scheme}://{url.netloc}/robots.txt" robotsreq = Request( robotsurl, priority=self.DOWNLOAD_PRIORITY, @@ -94,7 +94,7 @@ def _logerror(self, failure, request, spider): def _parse_robots(self, response, netloc, spider): self.crawler.stats.inc_value('robotstxt/response_count') - self.crawler.stats.inc_value('robotstxt/response_status_count/{}'.format(response.status)) + self.crawler.stats.inc_value(f'robotstxt/response_status_count/{response.status}') rp = self._parserimpl.from_crawler(self.crawler, response.body) rp_dfd = self._parsers[netloc] self._parsers[netloc] = rp @@ -102,7 +102,7 @@ def _parse_robots(self, response, netloc, spider): def _robots_error(self, failure, netloc): if failure.type is not IgnoreRequest: - key = 'robotstxt/exception_count/{}'.format(failure.type) + key = f'robotstxt/exception_count/{failure.type}' self.crawler.stats.inc_value(key) rp_dfd = self._parsers[netloc] self._parsers[netloc] = None diff --git a/scrapy/downloadermiddlewares/stats.py b/scrapy/downloadermiddlewares/stats.py index 46a2ad39767..5479cd0e215 100644 --- a/scrapy/downloadermiddlewares/stats.py +++ b/scrapy/downloadermiddlewares/stats.py @@ -17,13 +17,13 @@ def from_crawler(cls, crawler): def process_request(self, request, spider): self.stats.inc_value('downloader/request_count', spider=spider) - self.stats.inc_value('downloader/request_method_count/%s' % request.method, spider=spider) + self.stats.inc_value(f'downloader/request_method_count/{request.method}', spider=spider) reqlen = len(request_httprepr(request)) self.stats.inc_value('downloader/request_bytes', reqlen, spider=spider) def process_response(self, request, response, spider): self.stats.inc_value('downloader/response_count', spider=spider) - self.stats.inc_value('downloader/response_status_count/%s' % response.status, spider=spider) + self.stats.inc_value(f'downloader/response_status_count/{response.status}', spider=spider) reslen = len(response_httprepr(response)) self.stats.inc_value('downloader/response_bytes', reslen, spider=spider) return response @@ -31,4 +31,4 @@ def process_response(self, request, response, spider): def process_exception(self, request, exception, spider): ex_class = global_object_name(exception.__class__) self.stats.inc_value('downloader/exception_count', spider=spider) - self.stats.inc_value('downloader/exception_type_count/%s' % ex_class, spider=spider) + self.stats.inc_value(f'downloader/exception_type_count/{ex_class}', spider=spider) diff --git a/scrapy/exporters.py b/scrapy/exporters.py index 95518b3acf3..54cf5c0b1f0 100644 --- a/scrapy/exporters.py +++ b/scrapy/exporters.py @@ -39,7 +39,7 @@ def _configure(self, options, dont_fail=False): self.export_empty_fields = options.pop('export_empty_fields', False) self.indent = options.pop('indent', None) if not dont_fail and options: - raise TypeError("Unexpected options: %s" % ', '.join(options.keys())) + raise TypeError(f"Unexpected options: {', '.join(options.keys())}") def export_item(self, item): raise NotImplementedError diff --git a/scrapy/extensions/corestats.py b/scrapy/extensions/corestats.py index 389cb65bc78..675f8276f1f 100644 --- a/scrapy/extensions/corestats.py +++ b/scrapy/extensions/corestats.py @@ -43,4 +43,4 @@ def response_received(self, spider): def item_dropped(self, item, spider, exception): reason = exception.__class__.__name__ self.stats.inc_value('item_dropped_count', spider=spider) - self.stats.inc_value('item_dropped_reasons_count/%s' % reason, spider=spider) + self.stats.inc_value(f'item_dropped_reasons_count/{reason}', spider=spider) diff --git a/scrapy/extensions/debug.py b/scrapy/extensions/debug.py index 5863997843d..fd2a02d8d91 100644 --- a/scrapy/extensions/debug.py +++ b/scrapy/extensions/debug.py @@ -48,7 +48,7 @@ def _thread_stacks(self): for id_, frame in sys._current_frames().items(): name = id2name.get(id_, '') dump = ''.join(traceback.format_stack(frame)) - dumps += "# Thread: {0}({1})\n{2}\n".format(name, id_, dump) + dumps += f"# Thread: {name}({id_})\n{dump}\n" return dumps diff --git a/scrapy/extensions/httpcache.py b/scrapy/extensions/httpcache.py index 6294a9b5230..e0c04b2de3b 100644 --- a/scrapy/extensions/httpcache.py +++ b/scrapy/extensions/httpcache.py @@ -223,7 +223,7 @@ def __init__(self, settings): self.db = None def open_spider(self, spider): - dbpath = os.path.join(self.cachedir, '%s.db' % spider.name) + dbpath = os.path.join(self.cachedir, f'{spider.name}.db') self.db = self.dbmodule.open(dbpath, 'c') logger.debug("Using DBM cache storage in %(cachepath)s" % {'cachepath': dbpath}, extra={'spider': spider}) @@ -251,13 +251,13 @@ def store_response(self, spider, request, response): 'headers': dict(response.headers), 'body': response.body, } - self.db['%s_data' % key] = pickle.dumps(data, protocol=4) - self.db['%s_time' % key] = str(time()) + self.db[f'{key}_data'] = pickle.dumps(data, protocol=4) + self.db[f'{key}_time'] = str(time()) def _read_data(self, spider, request): key = self._request_key(request) db = self.db - tkey = '%s_time' % key + tkey = f'{key}_time' if tkey not in db: return # not found @@ -265,7 +265,7 @@ def _read_data(self, spider, request): if 0 < self.expiration_secs < time() - float(ts): return # expired - return pickle.loads(db['%s_data' % key]) + return pickle.loads(db[f'{key}_data']) def _request_key(self, request): return request_fingerprint(request) diff --git a/scrapy/extensions/memdebug.py b/scrapy/extensions/memdebug.py index dc8cdbb1d66..cee44ea6206 100644 --- a/scrapy/extensions/memdebug.py +++ b/scrapy/extensions/memdebug.py @@ -30,4 +30,4 @@ def spider_closed(self, spider, reason): for cls, wdict in live_refs.items(): if not wdict: continue - self.stats.set_value('memdebug/live_refs/%s' % cls.__name__, len(wdict), spider=spider) + self.stats.set_value(f'memdebug/live_refs/{cls.__name__}', len(wdict), spider=spider) diff --git a/scrapy/extensions/memusage.py b/scrapy/extensions/memusage.py index ab2e43e8c67..274cbdbfed6 100644 --- a/scrapy/extensions/memusage.py +++ b/scrapy/extensions/memusage.py @@ -82,8 +82,8 @@ def _check_limit(self): {'memusage': mem}, extra={'crawler': self.crawler}) if self.notify_mails: subj = ( - "%s terminated: memory usage exceeded %dM at %s" - % (self.crawler.settings['BOT_NAME'], mem, socket.gethostname()) + f"{self.crawler.settings['BOT_NAME']} terminated: " + f"memory usage exceeded {mem}M at {socket.gethostname()}" ) self._send_report(self.notify_mails, subj) self.crawler.stats.set_value('memusage/limit_notified', 1) @@ -105,8 +105,8 @@ def _check_warning(self): {'memusage': mem}, extra={'crawler': self.crawler}) if self.notify_mails: subj = ( - "%s warning: memory usage reached %dM at %s" - % (self.crawler.settings['BOT_NAME'], mem, socket.gethostname()) + f"{self.crawler.settings['BOT_NAME']} warning: " + f"memory usage reached {mem}M at {socket.gethostname()}" ) self._send_report(self.notify_mails, subj) self.crawler.stats.set_value('memusage/warning_notified', 1) @@ -115,9 +115,9 @@ def _check_warning(self): def _send_report(self, rcpts, subject): """send notification mail with some additional useful info""" stats = self.crawler.stats - s = "Memory usage at engine startup : %dM\r\n" % (stats.get_value('memusage/startup')/1024/1024) - s += "Maximum memory usage : %dM\r\n" % (stats.get_value('memusage/max')/1024/1024) - s += "Current memory usage : %dM\r\n" % (self.get_virtual_size()/1024/1024) + s = f"Memory usage at engine startup : {stats.get_value('memusage/startup')/1024/1024}M\r\n" + s += f"Maximum memory usage : {stats.get_value('memusage/max')/1024/1024}M\r\n" + s += f"Current memory usage : {self.get_virtual_size()/1024/1024}M\r\n" s += "ENGINE STATUS ------------------------------------------------------- \r\n" s += "\r\n" diff --git a/scrapy/extensions/statsmailer.py b/scrapy/extensions/statsmailer.py index 320f13b290e..997e74fc9cb 100644 --- a/scrapy/extensions/statsmailer.py +++ b/scrapy/extensions/statsmailer.py @@ -24,11 +24,11 @@ def from_crawler(cls, crawler): o = cls(crawler.stats, recipients, mail) crawler.signals.connect(o.spider_closed, signal=signals.spider_closed) return o - + def spider_closed(self, spider): spider_stats = self.stats.get_stats(spider) body = "Global stats\n\n" - body += "\n".join("%-50s : %s" % i for i in self.stats.get_stats().items()) - body += "\n\n%s stats\n\n" % spider.name - body += "\n".join("%-50s : %s" % i for i in spider_stats.items()) - return self.mail.send(self.recipients, "Scrapy stats for: %s" % spider.name, body) + body += "\n".join(f"{i:<50} : {self.stats.get_stats()[i]}" for i in self.stats.get_stats()) + body += f"\n\n{spider.name} stats\n\n" + body += "\n".join(f"{i:<50} : {spider_stats[i]}" for i in spider_stats) + return self.mail.send(self.recipients, f"Scrapy stats for: {spider.name}", body) diff --git a/scrapy/http/common.py b/scrapy/http/common.py index ba6ab277c50..98699d7fddc 100644 --- a/scrapy/http/common.py +++ b/scrapy/http/common.py @@ -1,6 +1,6 @@ def obsolete_setter(setter, attrname): def newsetter(self, value): c = self.__class__.__name__ - msg = "%s.%s is not modifiable, use %s.replace() instead" % (c, attrname, c) + msg = f"{c}.{attrname} is not modifiable, use {c}.replace() instead" raise AttributeError(msg) return newsetter diff --git a/scrapy/http/headers.py b/scrapy/http/headers.py index 6bf9e534659..1a2b99b0a4e 100644 --- a/scrapy/http/headers.py +++ b/scrapy/http/headers.py @@ -33,7 +33,7 @@ def _tobytes(self, x): elif isinstance(x, int): return str(x).encode(self.encoding) else: - raise TypeError('Unsupported value type: {}'.format(type(x))) + raise TypeError(f'Unsupported value type: {type(x)}') def __getitem__(self, key): try: diff --git a/scrapy/http/request/__init__.py b/scrapy/http/request/__init__.py index a98ba9960c8..ef58deacc33 100644 --- a/scrapy/http/request/__init__.py +++ b/scrapy/http/request/__init__.py @@ -25,13 +25,13 @@ def __init__(self, url, callback=None, method='GET', headers=None, body=None, self._set_url(url) self._set_body(body) if not isinstance(priority, int): - raise TypeError("Request priority not an integer: %r" % priority) + raise TypeError(f"Request priority not an integer: {priority!r}") self.priority = priority if callback is not None and not callable(callback): - raise TypeError('callback must be a callable, got %s' % type(callback).__name__) + raise TypeError(f'callback must be a callable, got {type(callback).__name__}') if errback is not None and not callable(errback): - raise TypeError('errback must be a callable, got %s' % type(errback).__name__) + raise TypeError(f'errback must be a callable, got {type(errback).__name__}') self.callback = callback self.errback = errback @@ -60,13 +60,13 @@ def _get_url(self): def _set_url(self, url): if not isinstance(url, str): - raise TypeError('Request url must be str or unicode, got %s:' % type(url).__name__) + raise TypeError(f'Request url must be str or unicode, got {type(url).__name__}') s = safe_url_string(url, self.encoding) self._url = escape_ajax(s) if ('://' not in self._url) and (not self._url.startswith('data:')): - raise ValueError('Missing scheme in request url: %s' % self._url) + raise ValueError(f'Missing scheme in request url: {self._url}') url = property(_get_url, obsolete_setter(_set_url, 'url')) @@ -86,7 +86,7 @@ def encoding(self): return self._encoding def __str__(self): - return "<%s %s>" % (self.method, self.url) + return f"<{self.method} {self.url}>" __repr__ = __str__ diff --git a/scrapy/http/request/form.py b/scrapy/http/request/form.py index 59af8132124..c90d68fa183 100644 --- a/scrapy/http/request/form.py +++ b/scrapy/http/request/form.py @@ -80,15 +80,15 @@ def _get_form(response, formname, formid, formnumber, formxpath): base_url=get_base_url(response)) forms = root.xpath('//form') if not forms: - raise ValueError("No
element found in %s" % response) + raise ValueError(f"No element found in {response}") if formname is not None: - f = root.xpath('//form[@name="%s"]' % formname) + f = root.xpath(f'//form[@name="{formname}"]') if f: return f[0] if formid is not None: - f = root.xpath('//form[@id="%s"]' % formid) + f = root.xpath(f'//form[@id="{formid}"]') if f: return f[0] @@ -103,7 +103,7 @@ def _get_form(response, formname, formid, formnumber, formxpath): el = el.getparent() if el is None: break - raise ValueError('No element found with %s' % formxpath) + raise ValueError(f'No element found with {formxpath}') # If we get here, it means that either formname was None # or invalid @@ -111,8 +111,7 @@ def _get_form(response, formname, formid, formnumber, formxpath): try: form = forms[formnumber] except IndexError: - raise IndexError("Form number %d not found in %s" % - (formnumber, response)) + raise IndexError(f"Form number {formnumber} not found in {response}") else: return form @@ -205,12 +204,12 @@ def _get_clickable(clickdata, form): # We didn't find it, so now we build an XPath expression out of the other # arguments, because they can be used as such - xpath = './/*' + ''.join('[@%s="%s"]' % c for c in clickdata.items()) + xpath = './/*' + ''.join(f'[@{key}="{clickdata[key]}"]' for key in clickdata) el = form.xpath(xpath) if len(el) == 1: return (el[0].get('name'), el[0].get('value') or '') elif len(el) > 1: - raise ValueError("Multiple elements found (%r) matching the criteria " - "in clickdata: %r" % (el, clickdata)) + raise ValueError(f"Multiple elements found ({el!r}) matching the " + f"criteria in clickdata: {clickdata!r}") else: - raise ValueError('No clickable element matching clickdata: %r' % (clickdata,)) + raise ValueError(f'No clickable element matching clickdata: {clickdata!r}') diff --git a/scrapy/http/response/__init__.py b/scrapy/http/response/__init__.py index c2c37dd1d7f..c635fde6916 100644 --- a/scrapy/http/response/__init__.py +++ b/scrapy/http/response/__init__.py @@ -55,8 +55,8 @@ def _set_url(self, url): if isinstance(url, str): self._url = url else: - raise TypeError('%s url must be str, got %s:' % - (type(self).__name__, type(url).__name__)) + raise TypeError(f'{type(self).__name__} url must be str, ' + f'got {type(url).__name__}') url = property(_get_url, obsolete_setter(_set_url, 'url')) @@ -77,7 +77,7 @@ def _set_body(self, body): body = property(_get_body, obsolete_setter(_set_body, 'body')) def __str__(self): - return "<%d %s>" % (self.status, self.url) + return f"<{self.status} {self.url}>" __repr__ = __str__ diff --git a/scrapy/http/response/text.py b/scrapy/http/response/text.py index a7bb34d4887..e36e148800d 100644 --- a/scrapy/http/response/text.py +++ b/scrapy/http/response/text.py @@ -47,8 +47,8 @@ def _set_body(self, body): self._body = b'' # used by encoding detection if isinstance(body, str): if self._encoding is None: - raise TypeError('Cannot convert unicode body - %s has no encoding' % - type(self).__name__) + raise TypeError('Cannot convert unicode body - ' + f'{type(self).__name__} has no encoding') self._body = body.encode(self._encoding) else: super()._set_body(body) @@ -92,7 +92,7 @@ def text(self): # _body_inferred_encoding is called benc = self.encoding if self._cached_ubody is None: - charset = 'charset=%s' % benc + charset = f'charset={benc}' self._cached_ubody = html_to_unicode(charset, self.body)[1] return self._cached_ubody @@ -255,12 +255,11 @@ def _url_from_selector(sel): # e.g. ::attr(href) result return strip_html5_whitespace(sel.root) if not hasattr(sel.root, 'tag'): - raise _InvalidSelector("Unsupported selector: %s" % sel) + raise _InvalidSelector(f"Unsupported selector: {sel}") if sel.root.tag not in ('a', 'link'): - raise _InvalidSelector("Only and elements are supported; got <%s>" % - sel.root.tag) + raise _InvalidSelector("Only and elements are supported; " + f"got <{sel.root.tag}>") href = sel.root.get('href') if href is None: - raise _InvalidSelector("<%s> element has no href attribute: %s" % - (sel.root.tag, sel)) + raise _InvalidSelector(f"<{sel.root.tag}> element has no href attribute: {sel}") return strip_html5_whitespace(href) diff --git a/scrapy/item.py b/scrapy/item.py index c262a153ccc..af3849302bd 100644 --- a/scrapy/item.py +++ b/scrapy/item.py @@ -96,19 +96,19 @@ def __setitem__(self, key, value): if key in self.fields: self._values[key] = value else: - raise KeyError("%s does not support field: %s" % (self.__class__.__name__, key)) + raise KeyError(f"{self.__class__.__name__} does not support field: {key}") def __delitem__(self, key): del self._values[key] def __getattr__(self, name): if name in self.fields: - raise AttributeError("Use item[%r] to get field value" % name) + raise AttributeError(f"Use item[{name!r}] to get field value") raise AttributeError(name) def __setattr__(self, name, value): if not name.startswith('_'): - raise AttributeError("Use item[%r] = %r to set field value" % (name, value)) + raise AttributeError(f"Use item[{name!r}] = {value!r} to set field value") super().__setattr__(name, value) def __len__(self): diff --git a/scrapy/link.py b/scrapy/link.py index 1ef50b11362..684735f6e75 100644 --- a/scrapy/link.py +++ b/scrapy/link.py @@ -14,7 +14,7 @@ class Link: def __init__(self, url, text='', fragment='', nofollow=False): if not isinstance(url, str): got = url.__class__.__name__ - raise TypeError("Link urls must be str objects, got %s" % got) + raise TypeError(f"Link urls must be str objects, got {got}") self.url = url self.text = text self.fragment = fragment @@ -33,6 +33,6 @@ def __hash__(self): def __repr__(self): return ( - 'Link(url=%r, text=%r, fragment=%r, nofollow=%r)' - % (self.url, self.text, self.fragment, self.nofollow) + f'Link(url={self.url!r}, text={self.text!r}, ' + f'fragment={self.fragment!r}, nofollow={self.nofollow!r})' ) diff --git a/scrapy/logformatter.py b/scrapy/logformatter.py index 0f9e6f1cb31..87568b2d1cd 100644 --- a/scrapy/logformatter.py +++ b/scrapy/logformatter.py @@ -54,8 +54,8 @@ def dropped(self, item, exception, response, spider): def crawled(self, request, response, spider): """Logs a message when the crawler finds a webpage.""" - request_flags = ' %s' % str(request.flags) if request.flags else '' - response_flags = ' %s' % str(response.flags) if response.flags else '' + request_flags = f' {str(request.flags)}' if request.flags else '' + response_flags = f' {str(response.flags)}' if response.flags else '' return { 'level': logging.DEBUG, 'msg': CRAWLEDMSG, diff --git a/scrapy/pipelines/files.py b/scrapy/pipelines/files.py index 5a21846816b..99a72aa707f 100644 --- a/scrapy/pipelines/files.py +++ b/scrapy/pipelines/files.py @@ -108,7 +108,7 @@ def __init__(self, uri): from boto.s3.connection import S3Connection self.S3Connection = S3Connection if not uri.startswith("s3://"): - raise ValueError("Incorrect URI scheme in %s, expected 's3'" % uri) + raise ValueError(f"Incorrect URI scheme in {uri}, expected 's3'") self.bucket, self.prefix = uri[5:].split('/', 1) def stat_file(self, path, info): @@ -133,7 +133,7 @@ def _get_boto_bucket(self): return c.get_bucket(self.bucket, validate=False) def _get_boto_key(self, path): - key_name = '%s%s' % (self.prefix, path) + key_name = f'{self.prefix}{path}' if self.is_botocore: return threads.deferToThread( self.s3_client.head_object, @@ -145,7 +145,7 @@ def _get_boto_key(self, path): def persist_file(self, path, buf, info, meta=None, headers=None): """Upload file to S3 storage""" - key_name = '%s%s' % (self.prefix, path) + key_name = f'{self.prefix}{path}' buf.seek(0) if self.is_botocore: extra = self._headers_to_botocore_kwargs(self.HEADERS) @@ -208,8 +208,7 @@ def _headers_to_botocore_kwargs(self, headers): try: kwarg = mapping[key] except KeyError: - raise TypeError( - 'Header "%s" is not supported by botocore' % key) + raise TypeError(f'Header "{key}" is not supported by botocore') else: extra[kwarg] = value return extra @@ -283,7 +282,7 @@ class FTPFilesStore: def __init__(self, uri): if not uri.startswith("ftp://"): - raise ValueError("Incorrect URI scheme in %s, expected 'ftp'" % uri) + raise ValueError(f"Incorrect URI scheme in {uri}, expected 'ftp'") u = urlparse(uri) self.port = u.port self.host = u.hostname @@ -293,7 +292,7 @@ def __init__(self, uri): self.basedir = u.path.rstrip('/') def persist_file(self, path, buf, info, meta=None, headers=None): - path = '%s/%s' % (self.basedir, path) + path = f'{self.basedir}/{path}' return threads.deferToThread( ftp_store_file, path=path, file=buf, host=self.host, port=self.port, username=self.username, @@ -308,10 +307,10 @@ def _stat_file(path): ftp.login(self.username, self.password) if self.USE_ACTIVE_MODE: ftp.set_pasv(False) - file_path = "%s/%s" % (self.basedir, path) - last_modified = float(ftp.voidcmd("MDTM %s" % file_path)[4:].strip()) + file_path = f"{self.basedir}/{path}" + last_modified = float(ftp.voidcmd(f"MDTM {file_path}")[4:].strip()) m = hashlib.md5() - ftp.retrbinary('RETR %s' % file_path, m.update) + ftp.retrbinary(f'RETR {file_path}', m.update) return {'last_modified': last_modified, 'checksum': m.hexdigest()} # The file doesn't exist except Exception: @@ -515,7 +514,7 @@ def media_downloaded(self, response, request, info, *, item=None): def inc_stats(self, spider, status): spider.crawler.stats.inc_value('file_count', spider=spider) - spider.crawler.stats.inc_value('file_status_count/%s' % status, spider=spider) + spider.crawler.stats.inc_value(f'file_status_count/{status}', spider=spider) # Overridable Interface def get_media_requests(self, item, info): @@ -545,4 +544,4 @@ def file_path(self, request, response=None, info=None, *, item=None): media_type = mimetypes.guess_type(request.url)[0] if media_type: media_ext = mimetypes.guess_extension(media_type) - return 'full/%s%s' % (media_guid, media_ext) + return f'full/{media_guid}{media_ext}' diff --git a/scrapy/pipelines/images.py b/scrapy/pipelines/images.py index 0a67a0b1dc4..aafd1d8b20d 100644 --- a/scrapy/pipelines/images.py +++ b/scrapy/pipelines/images.py @@ -125,8 +125,9 @@ def get_images(self, response, request, info, *, item=None): width, height = orig_image.size if width < self.min_width or height < self.min_height: - raise ImageException("Image too small (%dx%d < %dx%d)" % - (width, height, self.min_width, self.min_height)) + raise ImageException("Image too small " + f"({width}x{height} < " + f"{self.min_width}x{self.min_height})") image, buf = self.convert_image(orig_image) yield path, image, buf @@ -168,8 +169,8 @@ def item_completed(self, results, item, info): def file_path(self, request, response=None, info=None, *, item=None): image_guid = hashlib.sha1(to_bytes(request.url)).hexdigest() - return 'full/%s.jpg' % (image_guid) + return f'full/{image_guid}.jpg' def thumb_path(self, request, thumb_id, response=None, info=None): thumb_guid = hashlib.sha1(to_bytes(request.url)).hexdigest() - return 'thumbs/%s/%s.jpg' % (thumb_id, thumb_guid) + return f'thumbs/{thumb_id}/{thumb_guid}.jpg' diff --git a/scrapy/pipelines/media.py b/scrapy/pipelines/media.py index 2439de9a5a1..0a12f3e2c14 100644 --- a/scrapy/pipelines/media.py +++ b/scrapy/pipelines/media.py @@ -61,7 +61,7 @@ def _key_for_pipe(self, key, base_class_name=None, settings=None): 'MYPIPE_IMAGES' """ class_name = self.__class__.__name__ - formatted_key = "{}_{}".format(class_name.upper(), key) + formatted_key = f"{class_name.upper()}_{key}" if ( not base_class_name or class_name == base_class_name @@ -151,9 +151,8 @@ def _check_signature(self, func): if 'item' not in sig.parameters: old_params = str(sig)[1:-1] new_params = old_params + ", *, item=None" - warn('%s(self, %s) is deprecated, ' - 'please use %s(self, %s)' - % (func.__name__, old_params, func.__name__, new_params), + warn(f'{func.__name__}(self, {old_params}) is deprecated, ' + f'please use {func.__name__}(self, {new_params})', ScrapyDeprecationWarning, stacklevel=2) self._expects_item[func.__name__] = False diff --git a/scrapy/pqueues.py b/scrapy/pqueues.py index e13d389eeec..a9aa6c649da 100644 --- a/scrapy/pqueues.py +++ b/scrapy/pqueues.py @@ -141,17 +141,16 @@ def from_crawler(cls, crawler, downstream_queue_cls, key, startprios=()): def __init__(self, crawler, downstream_queue_cls, key, slot_startprios=()): if crawler.settings.getint('CONCURRENT_REQUESTS_PER_IP') != 0: - raise ValueError('"%s" does not support CONCURRENT_REQUESTS_PER_IP' - % (self.__class__,)) + raise ValueError(f'"{self.__class__}" does not support CONCURRENT_REQUESTS_PER_IP') if slot_startprios and not isinstance(slot_startprios, dict): raise ValueError("DownloaderAwarePriorityQueue accepts " - "``slot_startprios`` as a dict; %r instance " + "``slot_startprios`` as a dict; " + f"{slot_startprios.__class__!r} instance " "is passed. Most likely, it means the state is" "created by an incompatible priority queue. " "Only a crawl started with the same priority " - "queue class can be resumed." % - slot_startprios.__class__) + "queue class can be resumed.") self._downloader_interface = DownloaderInterface(crawler) self.downstream_queue_cls = downstream_queue_cls diff --git a/scrapy/responsetypes.py b/scrapy/responsetypes.py index d207088e684..6ed9f8b8fbc 100644 --- a/scrapy/responsetypes.py +++ b/scrapy/responsetypes.py @@ -45,7 +45,7 @@ def from_mimetype(self, mimetype): elif mimetype in self.classes: return self.classes[mimetype] else: - basetype = "%s/*" % mimetype.split('/')[0] + basetype = f"{mimetype.split('/')[0]}/*" return self.classes.get(basetype, Response) def from_content_type(self, content_type, content_encoding=None): diff --git a/scrapy/selector/unified.py b/scrapy/selector/unified.py index f12c61081ab..a2587143301 100644 --- a/scrapy/selector/unified.py +++ b/scrapy/selector/unified.py @@ -66,8 +66,8 @@ class Selector(_ParselSelector, object_ref): def __init__(self, response=None, text=None, type=None, root=None, **kwargs): if response is not None and text is not None: - raise ValueError('%s.__init__() received both response and text' - % self.__class__.__name__) + raise ValueError(f'{self.__class__.__name__}.__init__() received ' + 'both response and text') st = _st(response, type or self._default_type) diff --git a/scrapy/settings/__init__.py b/scrapy/settings/__init__.py index 951fc65e2bd..1fe1e6fd17c 100644 --- a/scrapy/settings/__init__.py +++ b/scrapy/settings/__init__.py @@ -52,7 +52,7 @@ def set(self, value, priority): self.priority = priority def __str__(self): - return "".format(self=self) + return f"" __repr__ = __str__ diff --git a/scrapy/settings/default_settings.py b/scrapy/settings/default_settings.py index a0251394b70..4ef330dd261 100644 --- a/scrapy/settings/default_settings.py +++ b/scrapy/settings/default_settings.py @@ -287,7 +287,7 @@ URLLENGTH_LIMIT = 2083 -USER_AGENT = 'Scrapy/%s (+https://scrapy.org)' % import_module('scrapy').__version__ +USER_AGENT = f'Scrapy/{import_module("scrapy").__version__} (+https://scrapy.org)' TELNETCONSOLE_ENABLED = 1 TELNETCONSOLE_PORT = [6023, 6073] diff --git a/scrapy/shell.py b/scrapy/shell.py index 10de119ce1a..c370ccaff40 100644 --- a/scrapy/shell.py +++ b/scrapy/shell.py @@ -140,7 +140,7 @@ def get_help(self): b.append(" scrapy scrapy module (contains scrapy.Request, scrapy.Selector, etc)") for k, v in sorted(self.vars.items()): if self._is_relevant(v): - b.append(" %-10s %s" % (k, v)) + b.append(f" {k:<10} {v}") b.append("Useful shortcuts:") if self.inthread: b.append(" fetch(url[, redirect=True]) " @@ -150,7 +150,7 @@ def get_help(self): b.append(" shelp() Shell help (print this help)") b.append(" view(response) View response in a browser") - return "\n".join("[s] %s" % line for line in b) + return "\n".join(f"[s] {line}" for line in b) def _is_relevant(self, value): return isinstance(value, self.relevant_classes) or is_item(value) diff --git a/scrapy/spiderloader.py b/scrapy/spiderloader.py index db4193430c3..04fda311fd0 100644 --- a/scrapy/spiderloader.py +++ b/scrapy/spiderloader.py @@ -27,7 +27,7 @@ def _check_name_duplicates(self): dupes = [] for name, locations in self._found.items(): dupes.extend([ - " {cls} named {name!r} (in {module})".format(module=mod, cls=cls, name=name) + f" {cls} named {name!r} (in {mod})" for mod, cls in locations if len(locations) > 1 ]) @@ -36,7 +36,7 @@ def _check_name_duplicates(self): dupes_string = "\n\n".join(dupes) warnings.warn( "There are several spiders with the same name:\n\n" - "{}\n\n This can cause unexpected behavior.".format(dupes_string), + f"{dupes_string}\n\n This can cause unexpected behavior.", category=UserWarning, ) @@ -53,10 +53,9 @@ def _load_all_spiders(self): except ImportError: if self.warn_only: warnings.warn( - "\n{tb}Could not load spiders from module '{modname}'. " - "See above traceback for details.".format( - modname=name, tb=traceback.format_exc() - ), + f"\n{traceback.format_exc()}Could not load spiders " + f"from module '{name}'. " + "See above traceback for details.", category=RuntimeWarning, ) else: @@ -75,7 +74,7 @@ def load(self, spider_name): try: return self._spiders[spider_name] except KeyError: - raise KeyError("Spider not found: {}".format(spider_name)) + raise KeyError(f"Spider not found: {spider_name}") def find_by_request(self, request): """ diff --git a/scrapy/spidermiddlewares/depth.py b/scrapy/spidermiddlewares/depth.py index fa7f5bef956..776a6879a87 100644 --- a/scrapy/spidermiddlewares/depth.py +++ b/scrapy/spidermiddlewares/depth.py @@ -43,7 +43,7 @@ def _filter(request): return False else: if self.verbose_stats: - self.stats.inc_value('request_depth_count/%s' % depth, + self.stats.inc_value(f'request_depth_count/{depth}', spider=spider) self.stats.max_value('request_depth_max', depth, spider=spider) diff --git a/scrapy/spidermiddlewares/httperror.py b/scrapy/spidermiddlewares/httperror.py index db9d0f2ae3f..ae5c258df08 100644 --- a/scrapy/spidermiddlewares/httperror.py +++ b/scrapy/spidermiddlewares/httperror.py @@ -48,7 +48,7 @@ def process_spider_exception(self, response, exception, spider): if isinstance(exception, HttpError): spider.crawler.stats.inc_value('httperror/response_ignored_count') spider.crawler.stats.inc_value( - 'httperror/response_ignored_status_count/%s' % response.status + f'httperror/response_ignored_status_count/{response.status}' ) logger.info( "Ignoring response %(response)r: HTTP status code is not handled or not allowed", diff --git a/scrapy/spidermiddlewares/offsite.py b/scrapy/spidermiddlewares/offsite.py index a006f317769..6e4efda97f5 100644 --- a/scrapy/spidermiddlewares/offsite.py +++ b/scrapy/spidermiddlewares/offsite.py @@ -61,15 +61,15 @@ def get_host_regex(self, spider): continue elif url_pattern.match(domain): message = ("allowed_domains accepts only domains, not URLs. " - "Ignoring URL entry %s in allowed_domains." % domain) + f"Ignoring URL entry {domain} in allowed_domains.") warnings.warn(message, URLWarning) elif port_pattern.search(domain): message = ("allowed_domains accepts only domains without ports. " - "Ignoring entry %s in allowed_domains." % domain) + f"Ignoring entry {domain} in allowed_domains.") warnings.warn(message, PortWarning) else: domains.append(re.escape(domain)) - regex = r'^(.*\.)?(%s)$' % '|'.join(domains) + regex = fr'^(.*\.)?({"|".join(domains)})$' return re.compile(regex) def spider_opened(self, spider): diff --git a/scrapy/spidermiddlewares/referer.py b/scrapy/spidermiddlewares/referer.py index 434067b009d..f8104137600 100644 --- a/scrapy/spidermiddlewares/referer.py +++ b/scrapy/spidermiddlewares/referer.py @@ -278,7 +278,7 @@ def _load_policy_class(policy, warning_only=False): try: return _policy_classes[policy.lower()] except KeyError: - msg = "Could not load referrer policy %r" % policy + msg = f"Could not load referrer policy {policy!r}" if not warning_only: raise RuntimeError(msg) else: diff --git a/scrapy/spiders/__init__.py b/scrapy/spiders/__init__.py index 12b4fba09af..3da0a11db73 100644 --- a/scrapy/spiders/__init__.py +++ b/scrapy/spiders/__init__.py @@ -25,7 +25,7 @@ def __init__(self, name=None, **kwargs): if name is not None: self.name = name elif not getattr(self, 'name', None): - raise ValueError("%s must have a name" % type(self).__name__) + raise ValueError(f"{type(self).__name__} must have a name") self.__dict__.update(kwargs) if not hasattr(self, 'start_urls'): self.start_urls = [] @@ -66,9 +66,8 @@ def start_requests(self): warnings.warn( "Spider.make_requests_from_url method is deprecated; it " "won't be called in future Scrapy releases. Please " - "override Spider.start_requests method instead (see %s.%s)." % ( - cls.__module__, cls.__name__ - ), + "override Spider.start_requests method instead " + f"(see {cls.__module__}.{cls.__name__}).", ) for url in self.start_urls: yield self.make_requests_from_url(url) @@ -90,7 +89,7 @@ def _parse(self, response, **kwargs): return self.parse(response, **kwargs) def parse(self, response, **kwargs): - raise NotImplementedError('{}.parse callback is not defined'.format(self.__class__.__name__)) + raise NotImplementedError(f'{self.__class__.__name__}.parse callback is not defined') @classmethod def update_settings(cls, settings): @@ -107,7 +106,7 @@ def close(spider, reason): return closed(reason) def __str__(self): - return "<%s %r at 0x%0x>" % (type(self).__name__, self.name, id(self)) + return f"<{type(self).__name__} {self.name!r} at 0x{id(self):0x}>" __repr__ = __str__ diff --git a/scrapy/spiders/feed.py b/scrapy/spiders/feed.py index cf658aec4c7..6ed17e4dd67 100644 --- a/scrapy/spiders/feed.py +++ b/scrapy/spiders/feed.py @@ -71,11 +71,11 @@ def _parse(self, response, **kwargs): elif self.iterator == 'xml': selector = Selector(response, type='xml') self._register_namespaces(selector) - nodes = selector.xpath('//%s' % self.itertag) + nodes = selector.xpath(f'//{self.itertag}') elif self.iterator == 'html': selector = Selector(response, type='html') self._register_namespaces(selector) - nodes = selector.xpath('//%s' % self.itertag) + nodes = selector.xpath(f'//{self.itertag}') else: raise NotSupported('Unsupported node iterator') diff --git a/scrapy/utils/benchserver.py b/scrapy/utils/benchserver.py index f595a1acbd0..86238c4cd29 100644 --- a/scrapy/utils/benchserver.py +++ b/scrapy/utils/benchserver.py @@ -21,8 +21,8 @@ def render(self, request): for nl in nlist: args['n'] = nl argstr = urlencode(args, doseq=True) - request.write("follow {1}
" - .format(argstr, nl).encode('utf8')) + request.write(f"follow {nl}
" + .encode('utf8')) request.write(b"") return b'' @@ -39,6 +39,6 @@ def _getarg(request, name, default=None, type=str): def _print_listening(): httpHost = httpPort.getHost() - print("Bench server at http://{}:{}".format(httpHost.host, httpHost.port)) + print(f"Bench server at http://{httpHost.host}:{httpHost.port}") reactor.callWhenRunning(_print_listening) reactor.run() diff --git a/scrapy/utils/conf.py b/scrapy/utils/conf.py index 90a52b25b3e..05cd5f25c84 100644 --- a/scrapy/utils/conf.py +++ b/scrapy/utils/conf.py @@ -17,8 +17,8 @@ def build_component_list(compdict, custom=None, convert=update_classpath): def _check_components(complist): if len({convert(c) for c in complist}) != len(complist): - raise ValueError('Some paths in {!r} convert to the same object, ' - 'please update your settings'.format(complist)) + raise ValueError('Some paths in {complist!r} convert to the same object, ' + 'please update your settings') def _map_keys(compdict): if isinstance(compdict, BaseSettings): @@ -26,9 +26,10 @@ def _map_keys(compdict): for k, v in compdict.items(): prio = compdict.getpriority(k) if compbs.getpriority(convert(k)) == prio: - raise ValueError('Some paths in {!r} convert to the same ' + raise ValueError(f'Some paths in {list(compdict.keys())!r} ' + 'convert to the same ' 'object, please update your settings' - ''.format(list(compdict.keys()))) + ) else: compbs.set(convert(k), v, priority=prio) return compbs @@ -40,8 +41,9 @@ def _validate_values(compdict): """Fail if a value in the components dict is not a real number or None.""" for name, value in compdict.items(): if value is not None and not isinstance(value, numbers.Real): - raise ValueError('Invalid value {} for component {}, please provide ' - 'a real number or None instead'.format(value, name)) + raise ValueError(f'Invalid value {value} for component {name}, ' + 'please provide a real number or None instead' + ) # BEGIN Backward compatibility for old (base, custom) call signature if isinstance(custom, (list, tuple)): @@ -141,12 +143,10 @@ def feed_process_params_from_cli(settings, output, output_format=None, def check_valid_format(output_format): if output_format not in valid_output_formats: raise UsageError( - "Unrecognized output format '%s'. Set a supported one (%s) " + f"Unrecognized output format '{output_format}'. " + f"Set a supported one ({tuple(valid_output_formats)}) " "after a colon at the end of the output URI (i.e. -o/-O " - ":) or as a file extension." % ( - output_format, - tuple(valid_output_formats), - ) + ":) or as a file extension." ) overwrite = False diff --git a/scrapy/utils/curl.py b/scrapy/utils/curl.py index 9c0efcec414..6660b9dc060 100644 --- a/scrapy/utils/curl.py +++ b/scrapy/utils/curl.py @@ -9,7 +9,7 @@ class CurlParser(argparse.ArgumentParser): def error(self, message): - error_msg = 'There was an error parsing the curl command: {}'.format(message) + error_msg = f'There was an error parsing the curl command: {message}' raise ValueError(error_msg) @@ -52,7 +52,7 @@ def curl_to_request_kwargs(curl_command, ignore_unknown_options=True): parsed_args, argv = curl_parser.parse_known_args(curl_args[1:]) if argv: - msg = 'Unrecognized options: {}'.format(', '.join(argv)) + msg = f'Unrecognized options: {", ".join(argv)}' if ignore_unknown_options: warnings.warn(msg) else: diff --git a/scrapy/utils/decorators.py b/scrapy/utils/decorators.py index 2e2c7adc114..fef3882cb6e 100644 --- a/scrapy/utils/decorators.py +++ b/scrapy/utils/decorators.py @@ -14,9 +14,9 @@ def deprecated(use_instead=None): def deco(func): @wraps(func) def wrapped(*args, **kwargs): - message = "Call to deprecated function %s." % func.__name__ + message = f"Call to deprecated function {func.__name__}." if use_instead: - message += " Use %s instead." % use_instead + message += f" Use {use_instead} instead." warnings.warn(message, category=ScrapyDeprecationWarning, stacklevel=2) return func(*args, **kwargs) return wrapped diff --git a/scrapy/utils/deprecate.py b/scrapy/utils/deprecate.py index 3c8e3c8b5cf..fb7e69889f3 100644 --- a/scrapy/utils/deprecate.py +++ b/scrapy/utils/deprecate.py @@ -8,9 +8,8 @@ def attribute(obj, oldattr, newattr, version='0.12'): cname = obj.__class__.__name__ warnings.warn( - "%s.%s attribute is deprecated and will be no longer supported " - "in Scrapy %s, use %s.%s attribute instead" - % (cname, oldattr, version, cname, newattr), + f"{cname}.{oldattr} attribute is deprecated and will be no longer supported " + f"in Scrapy {version}, use {cname}.{newattr} attribute instead", ScrapyDeprecationWarning, stacklevel=3) @@ -116,7 +115,7 @@ def __call__(cls, *args, **kwargs): # deprecated class is in jinja2 template). __module__ attribute is not # important enough to raise an exception as users may be unable # to fix inspect.stack() errors. - warnings.warn("Error detecting parent module: %r" % e) + warnings.warn(f"Error detecting parent module: {e!r}") return deprecated_cls @@ -124,7 +123,7 @@ def __call__(cls, *args, **kwargs): def _clspath(cls, forced=None): if forced is not None: return forced - return '{}.{}'.format(cls.__module__, cls.__name__) + return f'{cls.__module__}.{cls.__name__}' DEPRECATION_RULES = [ @@ -137,7 +136,7 @@ def update_classpath(path): for prefix, replacement in DEPRECATION_RULES: if path.startswith(prefix): new_path = path.replace(prefix, replacement, 1) - warnings.warn("`{}` class is deprecated, use `{}` instead".format(path, new_path), + warnings.warn(f"`{path}` class is deprecated, use `{new_path}` instead", ScrapyDeprecationWarning) return new_path return path diff --git a/scrapy/utils/engine.py b/scrapy/utils/engine.py index 267c7ecd193..0c1cee1a04b 100644 --- a/scrapy/utils/engine.py +++ b/scrapy/utils/engine.py @@ -29,7 +29,7 @@ def get_engine_status(engine): try: checks += [(test, eval(test))] except Exception as e: - checks += [(test, "%s (exception)" % type(e).__name__)] + checks += [(test, f"{type(e).__name__} (exception)")] return checks @@ -38,7 +38,7 @@ def format_engine_status(engine=None): checks = get_engine_status(engine) s = "Execution engine status\n\n" for test, result in checks: - s += "%-47s : %s\n" % (test, result) + s += f"{test:<47} : {result}\n" s += "\n" return s diff --git a/scrapy/utils/ftp.py b/scrapy/utils/ftp.py index 19d56d6ecb5..6cace4f079b 100644 --- a/scrapy/utils/ftp.py +++ b/scrapy/utils/ftp.py @@ -33,5 +33,5 @@ def ftp_store_file( dirname, filename = posixpath.split(path) ftp_makedirs_cwd(ftp, dirname) command = 'STOR' if overwrite else 'APPE' - ftp.storbinary('%s %s' % (command, filename), file) + ftp.storbinary(f'{command} {filename}', file) file.close() diff --git a/scrapy/utils/iterators.py b/scrapy/utils/iterators.py index 5e15bf0c8af..789da139253 100644 --- a/scrapy/utils/iterators.py +++ b/scrapy/utils/iterators.py @@ -22,8 +22,8 @@ def xmliter(obj, nodename): """ nodename_patt = re.escape(nodename) - HEADER_START_RE = re.compile(r'^(.*?)<\s*%s(?:\s|>)' % nodename_patt, re.S) - HEADER_END_RE = re.compile(r'<\s*/%s\s*>' % nodename_patt, re.S) + HEADER_START_RE = re.compile(fr'^(.*?)<\s*{nodename_patt}(?:\s|>)', re.S) + HEADER_END_RE = re.compile(fr'<\s*/{nodename_patt}\s*>', re.S) text = _body_or_str(obj) header_start = re.search(HEADER_START_RE, text) @@ -31,7 +31,7 @@ def xmliter(obj, nodename): header_end = re_rsearch(HEADER_END_RE, text) header_end = text[header_end[1]:].strip() if header_end else '' - r = re.compile(r'<%(np)s[\s>].*?' % {'np': nodename_patt}, re.DOTALL) + r = re.compile(fr'<{nodename_patt}[\s>].*?', re.DOTALL) for match in r.finditer(text): nodetext = header_start + match.group() + header_end yield Selector(text=nodetext, type='xml').xpath('//' + nodename)[0] @@ -40,9 +40,9 @@ def xmliter(obj, nodename): def xmliter_lxml(obj, nodename, namespace=None, prefix='x'): from lxml import etree reader = _StreamReader(obj) - tag = '{%s}%s' % (namespace, nodename) if namespace else nodename + tag = f'{{{namespace}}}{nodename}'if namespace else nodename iterable = etree.iterparse(reader, tag=tag, encoding=reader.encoding) - selxpath = '//' + ('%s:%s' % (prefix, nodename) if namespace else nodename) + selxpath = '//' + (f'{prefix}:{nodename}' if namespace else nodename) for _, node in iterable: nodetext = etree.tostring(node, encoding='unicode') node.clear() @@ -131,8 +131,7 @@ def _body_or_str(obj, unicode=True): if not isinstance(obj, expected_types): expected_types_str = " or ".join(t.__name__ for t in expected_types) raise TypeError( - "Object %r must be %s, not %s" - % (obj, expected_types_str, type(obj).__name__) + f"Object {obj!r} must be {expected_types_str}, not {type(obj).__name__}" ) if isinstance(obj, Response): if not unicode: diff --git a/scrapy/utils/log.py b/scrapy/utils/log.py index e4131573864..62df7a6ab29 100644 --- a/scrapy/utils/log.py +++ b/scrapy/utils/log.py @@ -143,7 +143,7 @@ def log_scrapy_info(settings): logger.info("Scrapy %(version)s started (bot: %(bot)s)", {'version': scrapy.__version__, 'bot': settings['BOT_NAME']}) versions = [ - "%s %s" % (name, version) + f"{name} {version}" for name, version in scrapy_components_versions() if name != "Scrapy" ] @@ -187,7 +187,7 @@ def __init__(self, crawler, *args, **kwargs): self.crawler = crawler def emit(self, record): - sname = 'log_count/{}'.format(record.levelname) + sname = f'log_count/{record.levelname}' self.crawler.stats.inc_value(sname) diff --git a/scrapy/utils/misc.py b/scrapy/utils/misc.py index bd400bd3006..9107f30ef0c 100644 --- a/scrapy/utils/misc.py +++ b/scrapy/utils/misc.py @@ -46,7 +46,7 @@ def load_object(path): try: dot = path.rindex('.') except ValueError: - raise ValueError("Error loading object '%s': not a full path" % path) + raise ValueError(f"Error loading object '{path}': not a full path") module, name = path[:dot], path[dot + 1:] mod = import_module(module) @@ -54,7 +54,7 @@ def load_object(path): try: obj = getattr(mod, name) except AttributeError: - raise NameError("Module '%s' doesn't define any object named '%s'" % (module, name)) + raise NameError(f"Module '{module}' doesn't define any object named '{name}'") return obj @@ -163,7 +163,7 @@ def create_instance(objcls, settings, crawler, *args, **kwargs): instance = objcls(*args, **kwargs) method_name = '__new__' if instance is None: - raise TypeError("%s.%s returned None" % (objcls.__qualname__, method_name)) + raise TypeError(f"{objcls.__qualname__}.{method_name} returned None") return instance @@ -234,9 +234,10 @@ def warn_on_generator_with_return_value(spider, callable): """ if is_generator_with_return_value(callable): warnings.warn( - 'The "{}.{}" method is a generator and includes a "return" statement with a ' - 'value different than None. This could lead to unexpected behaviour. Please see ' + f'The "{spider.__class__.__name__}.{callable.__name__}" method is ' + 'a generator and includes a "return" statement with a value ' + 'different than None. This could lead to unexpected behaviour. Please see ' 'https://docs.python.org/3/reference/simple_stmts.html#the-return-statement ' - 'for details about the semantics of the "return" statement within generators' - .format(spider.__class__.__name__, callable.__name__), stacklevel=2, + 'for details about the semantics of the "return" statement within generators', + stacklevel=2, ) diff --git a/scrapy/utils/project.py b/scrapy/utils/project.py index b8d3ebf9d4c..fd13d85e310 100644 --- a/scrapy/utils/project.py +++ b/scrapy/utils/project.py @@ -20,7 +20,7 @@ def inside_project(): try: import_module(scrapy_module) except ImportError as exc: - warnings.warn("Cannot import scrapy settings module %s: %s" % (scrapy_module, exc)) + warnings.warn(f"Cannot import scrapy settings module {scrapy_module}: {exc}") else: return True return bool(closest_scrapy_cfg()) @@ -90,7 +90,7 @@ def get_project_settings(): warnings.warn( 'Use of environment variables prefixed with SCRAPY_ to override ' 'settings is deprecated. The following environment variables are ' - 'currently defined: {}'.format(setting_envvar_list), + f'currently defined: {setting_envvar_list}', ScrapyDeprecationWarning ) settings.setdict(scrapy_envvars, priority='project') diff --git a/scrapy/utils/python.py b/scrapy/utils/python.py index 1f23332646d..5703fd4c3ae 100644 --- a/scrapy/utils/python.py +++ b/scrapy/utils/python.py @@ -91,7 +91,7 @@ def to_unicode(text, encoding=None, errors='strict'): return text if not isinstance(text, (bytes, str)): raise TypeError('to_unicode must receive a bytes or str ' - 'object, got %s' % type(text).__name__) + f'object, got {type(text).__name__}') if encoding is None: encoding = 'utf-8' return text.decode(encoding, errors) @@ -104,7 +104,7 @@ def to_bytes(text, encoding=None, errors='strict'): return text if not isinstance(text, str): raise TypeError('to_bytes must receive a str or bytes ' - 'object, got %s' % type(text).__name__) + f'object, got {type(text).__name__}') if encoding is None: encoding = 'utf-8' return text.encode(encoding, errors) @@ -174,7 +174,7 @@ def binary_is_text(data): does not contain unprintable control characters. """ if not isinstance(data, bytes): - raise TypeError("data must be bytes, got '%s'" % type(data).__name__) + raise TypeError(f"data must be bytes, got '{type(data).__name__}'") return all(c not in _BINARYCHARS for c in data) @@ -217,7 +217,7 @@ def get_func_args(func, stripself=False): else: return get_func_args(func.__call__, True) else: - raise TypeError('%s is not callable' % type(func)) + raise TypeError(f'{type(func)} is not callable') if stripself: func_args.pop(0) return func_args @@ -250,7 +250,7 @@ def get_spec(func): elif hasattr(func, '__call__'): spec = _getargspec_py23(func.__call__) else: - raise TypeError('%s is not callable' % type(func)) + raise TypeError(f'{type(func)} is not callable') defaults = spec.defaults or [] @@ -322,7 +322,7 @@ def global_object_name(obj): >>> global_object_name(Request) 'scrapy.http.request.Request' """ - return "%s.%s" % (obj.__module__, obj.__name__) + return f"{obj.__module__}.{obj.__name__}" if hasattr(sys, "pypy_version_info"): diff --git a/scrapy/utils/reactor.py b/scrapy/utils/reactor.py index 879d27907bc..831d29462be 100644 --- a/scrapy/utils/reactor.py +++ b/scrapy/utils/reactor.py @@ -10,7 +10,7 @@ def listen_tcp(portrange, host, factory): """Like reactor.listenTCP but tries different ports in a range.""" from twisted.internet import reactor if len(portrange) > 2: - raise ValueError("invalid portrange: %s" % portrange) + raise ValueError(f"invalid portrange: {portrange}") if not portrange: return reactor.listenTCP(0, factory, interface=host) if not hasattr(portrange, '__iter__'): @@ -78,9 +78,9 @@ def verify_installed_reactor(reactor_path): from twisted.internet import reactor reactor_class = load_object(reactor_path) if not isinstance(reactor, reactor_class): - msg = "The installed reactor ({}.{}) does not match the requested one ({})".format( - reactor.__module__, reactor.__class__.__name__, reactor_path - ) + msg = ("The installed reactor " + f"({reactor.__module__}.{reactor.__class__.__name__}) does not " + f"match the requested one ({reactor_path})") raise Exception(msg) diff --git a/scrapy/utils/reqser.py b/scrapy/utils/reqser.py index 503d7b1332a..d38b1bc4d23 100644 --- a/scrapy/utils/reqser.py +++ b/scrapy/utils/reqser.py @@ -84,7 +84,7 @@ def _find_method(obj, func): # https://docs.python.org/3/reference/datamodel.html if obj_func.__func__ is func.__func__: return name - raise ValueError("Function %s is not an instance method in: %s" % (func, obj)) + raise ValueError(f"Function {func} is not an instance method in: {obj}") def _get_method(obj, name): @@ -92,4 +92,4 @@ def _get_method(obj, name): try: return getattr(obj, name) except AttributeError: - raise ValueError("Method %r not found in: %s" % (name, obj)) + raise ValueError(f"Method {name!r} not found in: {obj}") diff --git a/scrapy/utils/response.py b/scrapy/utils/response.py index c29b619ceec..99b089b6fe2 100644 --- a/scrapy/utils/response.py +++ b/scrapy/utils/response.py @@ -39,7 +39,7 @@ def response_status_message(status): """Return status code plus status text descriptive message """ message = http.RESPONSES.get(int(status), "Unknown Status") - return '%s %s' % (status, to_unicode(message)) + return f'{status} {to_unicode(message)}' def response_httprepr(response): @@ -69,15 +69,15 @@ def open_in_browser(response, _openfunc=webbrowser.open): body = response.body if isinstance(response, HtmlResponse): if b'' body = body.replace(b'', to_bytes(repl)) ext = '.html' elif isinstance(response, TextResponse): ext = '.txt' else: - raise TypeError("Unsupported response type: %s" % - response.__class__.__name__) + raise TypeError("Unsupported response type: " + f"{response.__class__.__name__}") fd, fname = tempfile.mkstemp(ext) os.write(fd, body) os.close(fd) - return _openfunc("file://%s" % fname) + return _openfunc(f"file://{fname}") diff --git a/scrapy/utils/serialize.py b/scrapy/utils/serialize.py index cc326360256..a73cf03c550 100644 --- a/scrapy/utils/serialize.py +++ b/scrapy/utils/serialize.py @@ -17,7 +17,7 @@ def default(self, o): if isinstance(o, set): return list(o) elif isinstance(o, datetime.datetime): - return o.strftime("%s %s" % (self.DATE_FORMAT, self.TIME_FORMAT)) + return o.strftime(f"{self.DATE_FORMAT} {self.TIME_FORMAT}") elif isinstance(o, datetime.date): return o.strftime(self.DATE_FORMAT) elif isinstance(o, datetime.time): @@ -29,9 +29,9 @@ def default(self, o): elif is_item(o): return ItemAdapter(o).asdict() elif isinstance(o, Request): - return "<%s %s %s>" % (type(o).__name__, o.method, o.url) + return f"<{type(o).__name__} {o.method} {o.url}>" elif isinstance(o, Response): - return "<%s %s %s>" % (type(o).__name__, o.status, o.url) + return f"<{type(o).__name__} {o.status} {o.url}>" else: return super().default(o) diff --git a/scrapy/utils/ssl.py b/scrapy/utils/ssl.py index c3c5e329b5b..ea4dde882b5 100644 --- a/scrapy/utils/ssl.py +++ b/scrapy/utils/ssl.py @@ -50,7 +50,7 @@ def get_temp_key_info(ssl_object): key_info.append(ffi_buf_to_string(cname)) else: key_info.append(ffi_buf_to_string(pyOpenSSLutil.lib.OBJ_nid2sn(key_type))) - key_info.append('%s bits' % pyOpenSSLutil.lib.EVP_PKEY_bits(temp_key)) + key_info.append(f'{pyOpenSSLutil.lib.EVP_PKEY_bits(temp_key)} bits') return ', '.join(key_info) @@ -58,4 +58,4 @@ def get_openssl_version(): system_openssl = OpenSSL.SSL.SSLeay_version( OpenSSL.SSL.SSLEAY_VERSION ).decode('ascii', errors='replace') - return '{} ({})'.format(OpenSSL.version.__version__, system_openssl) + return f'{OpenSSL.version.__version__} ({system_openssl})' diff --git a/scrapy/utils/test.py b/scrapy/utils/test.py index 7442a2f3307..f54942ffb13 100644 --- a/scrapy/utils/test.py +++ b/scrapy/utils/test.py @@ -79,7 +79,7 @@ def get_ftp_content_and_delete( def buffer_data(data): ftp_data.append(data) - ftp.retrbinary('RETR %s' % path, buffer_data) + ftp.retrbinary(f'RETR {path}', buffer_data) dirname, filename = split(path) ftp.cwd(dirname) ftp.delete(filename) diff --git a/scrapy/utils/testproc.py b/scrapy/utils/testproc.py index a63c9a9424c..a54c7db953e 100644 --- a/scrapy/utils/testproc.py +++ b/scrapy/utils/testproc.py @@ -23,10 +23,10 @@ def execute(self, args, check_code=True, settings=None): def _process_finished(self, pp, cmd, check_code): if pp.exitcode and check_code: - msg = "process %s exit with code %d" % (cmd, pp.exitcode) - msg += "\n>>> stdout <<<\n%s" % pp.out + msg = f"process {cmd} exit with code {pp.exitcode}" + msg += f"\n>>> stdout <<<\n{pp.out}" msg += "\n" - msg += "\n>>> stderr <<<\n%s" % pp.err + msg += f"\n>>> stderr <<<\n{pp.err}" raise RuntimeError(msg) return pp.exitcode, pp.out, pp.err diff --git a/scrapy/utils/testsite.py b/scrapy/utils/testsite.py index 397e547035c..fce77be3249 100644 --- a/scrapy/utils/testsite.py +++ b/scrapy/utils/testsite.py @@ -9,7 +9,7 @@ def setUp(self): from twisted.internet import reactor super().setUp() self.site = reactor.listenTCP(0, test_site(), interface="127.0.0.1") - self.baseurl = "http://localhost:%d/" % self.site.getHost().port + self.baseurl = f"http://localhost:{self.site.getHost().port}/" def tearDown(self): super().tearDown() @@ -40,5 +40,5 @@ def test_site(): if __name__ == '__main__': from twisted.internet import reactor port = reactor.listenTCP(0, test_site(), interface="127.0.0.1") - print("http://localhost:%d/" % port.getHost().port) + print(f"http://localhost:{port.getHost().port}/") reactor.run() diff --git a/scrapy/utils/trackref.py b/scrapy/utils/trackref.py index baed5c5367c..3e40acd69c1 100644 --- a/scrapy/utils/trackref.py +++ b/scrapy/utils/trackref.py @@ -41,9 +41,7 @@ def format_live_refs(ignore=NoneType): if issubclass(cls, ignore): continue oldest = min(wdict.values()) - s += "%-30s %6d oldest: %ds ago\n" % ( - cls.__name__, len(wdict), now - oldest - ) + s += f"{cls.__name__:<30} {len(wdict):6} oldest: {int(now - oldest)}s ago\n" return s diff --git a/scrapy/utils/url.py b/scrapy/utils/url.py index b23ddb45953..a6a2a9e8b67 100644 --- a/scrapy/utils/url.py +++ b/scrapy/utils/url.py @@ -22,7 +22,7 @@ def url_is_from_any_domain(url, domains): if not host: return False domains = [d.lower() for d in domains] - return any((host == d) or (host.endswith('.%s' % d)) for d in domains) + return any((host == d) or (host.endswith(f'.{d}')) for d in domains) def url_is_from_spider(url, spider): @@ -153,7 +153,7 @@ def strip_url(url, strip_credentials=True, strip_default_port=True, origin_only= if (parsed_url.scheme, parsed_url.port) in (('http', 80), ('https', 443), ('ftp', 21)): - netloc = netloc.replace(':{p.port}'.format(p=parsed_url), '') + netloc = netloc.replace(f':{parsed_url.port}', '') return urlunparse(( parsed_url.scheme, netloc, diff --git a/sep/sep-002.rst b/sep/sep-002.rst index c467cb40279..2e8a283406b 100644 --- a/sep/sep-002.rst +++ b/sep/sep-002.rst @@ -30,7 +30,7 @@ Proposed Implementation if hasattr(value, '__iter__'): # str/unicode not allowed return [self._field.to_python(v) for v in value] else: - raise TypeError("Expected iterable, got %s" % type(value).__name__) + raise TypeError(f"Expected iterable, got {type(value).__name__}") def get_default(self): # must return a new copy to avoid unexpected behaviors with mutable defaults diff --git a/sep/sep-004.rst b/sep/sep-004.rst index 05b0eb99c6f..b9f5e556f42 100644 --- a/sep/sep-004.rst +++ b/sep/sep-004.rst @@ -11,7 +11,7 @@ SEP-004: Library API ==================== .. note:: the library API has been implemented, but slightly different from proposed in this SEP. You can run a Scrapy crawler inside a Twisted - reactor, but not outside it. + reactor, but not outside it. Introduction ============ @@ -49,7 +49,7 @@ Here's a simple proof-of-concept code of such script: cr = Crawler(start_urls, callback=parse_start_page) cr.run() # blocking call - this populates scraped_items - print "%d items scraped" % len(scraped_items) + print(f"{len(scraped_items)} items scraped") # ... do something more interesting with scraped_items ... The behaviour of the Scrapy crawler would be controller by the Scrapy settings, diff --git a/sep/sep-014.rst b/sep/sep-014.rst index 8ca81824d47..4e3340521fe 100644 --- a/sep/sep-014.rst +++ b/sep/sep-014.rst @@ -21,7 +21,7 @@ Current flaws and inconsistencies 2. Link extractors are inflexible and hard to maintain, link processing/filtering is tightly coupled. (e.g. canonicalize) 3. Isn't possible to crawl an url directly from command line because the Spider - does not know which callback use. + does not know which callback use. These flaws will be corrected by the changes proposed in this SEP. @@ -55,7 +55,7 @@ Request Extractors Request Extractors takes response object and determines which requests follow. This is an enhancement to ``LinkExtractors`` which returns urls (links), -Request Extractors return Request objects. +Request Extractors return Request objects. Request Processors ------------------ @@ -142,7 +142,7 @@ Custom Processor and External Callback # Callback defined out of spider def my_external_callback(response): - # process item + # process item pass class SampleSpider(CrawlSpider): @@ -233,7 +233,7 @@ Request/Response Matchers def matches_request(self, request): """Returns True if Request's url matches initial url""" - return self.matches_url(request.url) + return self.matches_url(request.url) def matches_response(self, response): """REturns True if Response's url matches initial url""" @@ -305,14 +305,14 @@ Request Extractor for req in self.requests: req.meta.setdefault('link_text', '') req.meta['link_text'] = str_to_unicode(req.meta['link_text'], - encoding) + encoding) def reset(self): """Reset state""" FixedSGMLParser.reset(self) self.requests = [] self.base_url = None - + def unknown_starttag(self, tag, attrs): """Process unknown start tag""" if 'base' tag: @@ -376,7 +376,7 @@ Request Processor #!python # - # Request Processors + # Request Processors # Processors receive list of requests and return list of requests # """Request Processors""" @@ -390,7 +390,7 @@ Request Processor # replace in-place req.url = canonicalize_url(req.url) yield req - + class Unique(object): """Filter duplicate Requests""" @@ -455,9 +455,9 @@ Request Processor """Initialize allow/deny attributes""" _re_type = type(re.compile('', 0)) - self.allow_res = [x if isinstance(x, _re_type) else re.compile(x) + self.allow_res = [x if isinstance(x, _re_type) else re.compile(x) for x in arg_to_iter(allow)] - self.deny_res = [x if isinstance(x, _re_type) else re.compile(x) + self.deny_res = [x if isinstance(x, _re_type) else re.compile(x) for x in arg_to_iter(deny)] def __call__(self, requests): @@ -524,7 +524,7 @@ Rules Manager # # Handles rules matcher/callbacks # Resolve rule for given response - # + # class RulesManager(object): """Rules Manager""" def __init__(self, rules, spider, default_matcher=UrlRegexMatcher): @@ -542,8 +542,8 @@ Rules Manager # instance default matcher matcher = default_matcher(rule.matcher) else: - raise ValueError('Not valid matcher given %r in %r' \ - % (rule.matcher, rule)) + raise ValueError('Not valid matcher given ' + f'{rule.matcher!r} in {rule!r}') # prepare callback if callable(rule.callback): @@ -553,8 +553,7 @@ Rules Manager callback = getattr(spider, rule.callback) if not callable(callback): - raise AttributeError('Invalid callback %r can not be resolved' \ - % callback) + raise AttributeError(f'Invalid callback {callback!r} can not be resolved') else: callback = None diff --git a/sep/sep-018.rst b/sep/sep-018.rst index fe707923a89..d0169b81e3f 100644 --- a/sep/sep-018.rst +++ b/sep/sep-018.rst @@ -171,7 +171,7 @@ the same spider: #!python class MySpider(BaseSpider): - middlewares = [RegexLinkExtractor(), CallbackRules(), CanonicalizeUrl(), + middlewares = [RegexLinkExtractor(), CallbackRules(), CanonicalizeUrl(), ItemIdSetter(), OffsiteMiddleware()] allowed_domains = ['example.com', 'sub.example.com'] @@ -196,7 +196,7 @@ the same spider: # extract item from response return item -The Spider Middleware that implements spider code +The Spider Middleware that implements spider code ================================================= There's gonna be one middleware that will take care of calling the proper @@ -324,7 +324,7 @@ Another example could be for building URL canonicalizers: class CanonializeUrl(object): def process_request(self, request, response, spider): - curl = canonicalize_url(request.url, + curl = canonicalize_url(request.url, rules=spider.canonicalization_rules) return request.replace(url=curl) @@ -332,7 +332,7 @@ Another example could be for building URL canonicalizers: class MySpider(BaseSpider): middlewares = [CanonicalizeUrl()] - canonicalization_rules = ['sort-query-args', + canonicalization_rules = ['sort-query-args', 'normalize-percent-encoding', ...] # ... @@ -414,7 +414,7 @@ A spider middleware to avoid visiting pages forbidden by robots.txt: if netloc in info.pending: res = None else: - robotsurl = "%s://%s/robots.txt" % (url.scheme, netloc) + robotsurl = f"{url.scheme}://{netloc}/robots.txt" meta = {'spider': spider, {'handle_httpstatus_list': [403, 404, 500]} res = Request(robotsurl, callback=self.parse_robots, meta=meta, priority=self.REQUEST_PRIORITY) @@ -474,7 +474,7 @@ This is a port of the Offsite middleware to the new spider middleware API: if host and host not in info.hosts_seen: spider.log("Filtered offsite request to %r: %s" % (host, request)) info.hosts_seen.add(host) - + def should_follow(self, request, spider): info = self.spiders[spider] # hostname can be None for wrong urls (like javascript links) @@ -484,7 +484,7 @@ This is a port of the Offsite middleware to the new spider middleware API: def get_host_regex(self, spider): """Override this method to implement a different offsite policy""" domains = [d.replace('.', r'\.') for d in spider.allowed_domains] - regex = r'^(.*\.)?(%s)$' % '|'.join(domains) + regex = fr'^(.*\.)?({"|".join(domains)})$' return re.compile(regex) def spider_opened(self, spider): @@ -570,7 +570,7 @@ A middleware to filter out requests already seen: self.dupefilter = load_object(clspath)() dispatcher.connect(self.spider_opened, signal=signals.spider_opened) dispatcher.connect(self.spider_closed, signal=signals.spider_closed) - + def enqueue_request(self, spider, request): seen = self.dupefilter.request_seen(spider, request) if not seen or request.dont_filter: @@ -601,8 +601,8 @@ A middleware to Scrape data using Parsley as described in UsingParsley for name in parslet.keys(): self.fields[name] = Field() super(ParsleyItem, self).__init__(*a, **kw) - self.item_class = ParsleyItem - self.parsley = PyParsley(parslet, output='python') + self.item_class = ParsleyItem + self.parsley = PyParsley(parslet, output='python') def process_response(self, response, request, spider): return self.item_class(self.parsly.parse(string=response.body)) @@ -627,7 +627,7 @@ Resolved: not the original one (think of redirections), but it does carry the ``meta`` of the original one. The original one may not be available anymore (in memory) if we're using a persistent scheduler., but in that case it would be - the deserialized request from the persistent scheduler queue. + the deserialized request from the persistent scheduler queue. - No - this would make implementation more complex and we're not sure it's really needed diff --git a/tests/CrawlerRunner/ip_address.py b/tests/CrawlerRunner/ip_address.py index 3f97387988f..f545de39f55 100644 --- a/tests/CrawlerRunner/ip_address.py +++ b/tests/CrawlerRunner/ip_address.py @@ -38,7 +38,7 @@ def parse(self, response): if __name__ == "__main__": with MockServer() as mock_http_server, MockDNSServer() as mock_dns_server: port = urlparse(mock_http_server.http_address).port - url = "http://not.a.real.domain:{port}/echo".format(port=port) + url = f"http://not.a.real.domain:{port}/echo" servers = [(mock_dns_server.host, mock_dns_server.port)] reactor.installResolver(createResolver(servers=servers)) diff --git a/tests/mockserver.py b/tests/mockserver.py index 6f0c274b915..ab9aec6a670 100644 --- a/tests/mockserver.py +++ b/tests/mockserver.py @@ -73,7 +73,7 @@ def renderRequest(self, request, nlist): for nl in nlist: args[b"n"] = [to_bytes(str(nl))] argstr = urlencode(args, doseq=True) - s += "follow %d
" % (argstr, nl) + s += f"follow {nl}
" s += """""" request.write(to_bytes(s)) request.finish() @@ -91,7 +91,7 @@ def render_GET(self, request): return NOT_DONE_YET def _delayedRender(self, request, n): - request.write(to_bytes("Response delayed for %0.3f seconds\n" % n)) + request.write(to_bytes(f"Response delayed for {n:.3f} seconds\n")) request.finish() @@ -310,8 +310,8 @@ def ssl_context_factory(keyfile='keys/localhost.key', certfile='keys/localhost.c def print_listening(): httpHost = httpPort.getHost() httpsHost = httpsPort.getHost() - httpAddress = "http://%s:%d" % (httpHost.host, httpHost.port) - httpsAddress = "https://%s:%d" % (httpsHost.host, httpsHost.port) + httpAddress = f'http://{httpHost.host}:{httpHost.port}' + httpsAddress = f'https://{httpsHost.host}:{httpsHost.port}' print(httpAddress) print(httpsAddress) @@ -323,7 +323,7 @@ def print_listening(): def print_listening(): host = listener.getHost() - print("%s:%s" % (host.host, host.port)) + print(f"{host.host}:{host.port}") reactor.callWhenRunning(print_listening) reactor.run() diff --git a/tests/py36/_test_crawl.py b/tests/py36/_test_crawl.py index 162a5376055..e4934028482 100644 --- a/tests/py36/_test_crawl.py +++ b/tests/py36/_test_crawl.py @@ -33,7 +33,7 @@ class AsyncDefAsyncioGenComplexSpider(SimpleSpider): depth = 2 def _get_req(self, index, cb=None): - return Request(self.mockserver.url("/status?n=200&request=%d" % index), + return Request(self.mockserver.url(f"/status?n=200&request={index}"), meta={'index': index}, dont_filter=True, callback=cb) diff --git a/tests/spiders.py b/tests/spiders.py index 63bd726fb26..3e525e62f3f 100644 --- a/tests/spiders.py +++ b/tests/spiders.py @@ -45,7 +45,7 @@ def __init__(self, total=10, show=20, order="rand", maxlatency=0.0, *args, **kwa self.urls_visited = [] self.times = [] qargs = {'total': total, 'show': show, 'order': order, 'maxlatency': maxlatency} - url = self.mockserver.url("/follow?%s" % urlencode(qargs, doseq=1)) + url = self.mockserver.url(f"/follow?{urlencode(qargs, doseq=1)}") self.start_urls = [url] def parse(self, response): @@ -67,7 +67,7 @@ def __init__(self, n=1, b=0, *args, **kwargs): def start_requests(self): self.t1 = time.time() - url = self.mockserver.url("/delay?n=%s&b=%s" % (self.n, self.b)) + url = self.mockserver.url(f"/delay?n={self.n}&b={self.b}") yield Request(url, callback=self.parse, errback=self.errback) def parse(self, response): @@ -192,7 +192,7 @@ def start_requests(self): for s in range(100): qargs = {'total': 10, 'seed': s} - url = self.mockserver.url("/follow?%s") % urlencode(qargs, doseq=1) + url = self.mockserver.url(f"/follow?{urlencode(qargs, doseq=1)}") yield Request(url, meta={'seed': s}) if self.fail_yielding: 2 / 0 @@ -239,7 +239,7 @@ class DuplicateStartRequestsSpider(MockServerSpider): def start_requests(self): for i in range(0, self.distinct_urls): for j in range(0, self.dupe_factor): - url = self.mockserver.url("/echo?headers=1&body=test%d" % i) + url = self.mockserver.url(f"/echo?headers=1&body=test{i}") yield Request(url, dont_filter=self.dont_filter) def __init__(self, url="http://localhost:8998", *args, **kwargs): diff --git a/tests/test_cmdline/extensions.py b/tests/test_cmdline/extensions.py index 6504b4d2cb3..005e452140b 100644 --- a/tests/test_cmdline/extensions.py +++ b/tests/test_cmdline/extensions.py @@ -4,7 +4,7 @@ class TestExtension: def __init__(self, settings): - settings.set('TEST1', "%s + %s" % (settings['TEST1'], 'started')) + settings.set('TEST1', f"{settings['TEST1']} + started") @classmethod def from_crawler(cls, crawler): diff --git a/tests/test_command_check.py b/tests/test_command_check.py index f27f526a3cc..34f5e59ddba 100644 --- a/tests/test_command_check.py +++ b/tests/test_command_check.py @@ -14,20 +14,20 @@ def setUp(self): def _write_contract(self, contracts, parse_def): with open(self.spider, 'w') as file: - file.write(""" + file.write(f""" import scrapy class CheckSpider(scrapy.Spider): - name = '{0}' + name = '{self.spider_name}' start_urls = ['http://example.com'] def parse(self, response, **cb_kwargs): \"\"\" @url http://example.com - {1} + {contracts} \"\"\" - {2} - """.format(self.spider_name, contracts, parse_def)) + {parse_def} + """) def _test_contract(self, contracts='', parse_def='pass'): self._write_contract(contracts, parse_def) diff --git a/tests/test_command_parse.py b/tests/test_command_parse.py index e115f420f25..ed3848d8832 100644 --- a/tests/test_command_parse.py +++ b/tests/test_command_parse.py @@ -21,14 +21,14 @@ def setUp(self): self.spider_name = 'parse_spider' fname = abspath(join(self.proj_mod_path, 'spiders', 'myspider.py')) with open(fname, 'w') as f: - f.write(""" + f.write(f""" import scrapy from scrapy.linkextractors import LinkExtractor from scrapy.spiders import CrawlSpider, Rule class MySpider(scrapy.Spider): - name = '{0}' + name = '{self.spider_name}' def parse(self, response): if getattr(self, 'test_arg', None): @@ -58,7 +58,7 @@ def parse_request_without_meta(self, response): self.logger.debug('It Does Not Work :(') class MyGoodCrawlSpider(CrawlSpider): - name = 'goodcrawl{0}' + name = 'goodcrawl{self.spider_name}' rules = ( Rule(LinkExtractor(allow=r'/html'), callback='parse_item', follow=True), @@ -74,7 +74,7 @@ def parse(self, response): class MyBadCrawlSpider(CrawlSpider): '''Spider which doesn't define a parse_item callback while using it in a rule.''' - name = 'badcrawl{0}' + name = 'badcrawl{self.spider_name}' rules = ( Rule(LinkExtractor(allow=r'/html'), callback='parse_item', follow=True), @@ -82,7 +82,7 @@ class MyBadCrawlSpider(CrawlSpider): def parse(self, response): return [scrapy.Item(), dict(foo='bar')] -""".format(self.spider_name)) +""") fname = abspath(join(self.proj_mod_path, 'pipelines.py')) with open(fname, 'w') as f: @@ -99,9 +99,9 @@ def process_item(self, item, spider): fname = abspath(join(self.proj_mod_path, 'settings.py')) with open(fname, 'a') as f: - f.write(""" -ITEM_PIPELINES = {'%s.pipelines.MyPipeline': 1} -""" % self.project_name) + f.write(f""" +ITEM_PIPELINES = {{'{self.project_name}.pipelines.MyPipeline': 1}} +""") @defer.inlineCallbacks def test_spider_arguments(self): diff --git a/tests/test_command_shell.py b/tests/test_command_shell.py index 66c293c00b9..16c9559b570 100644 --- a/tests/test_command_shell.py +++ b/tests/test_command_shell.py @@ -65,8 +65,8 @@ def test_redirect_not_follow_302(self): def test_fetch_redirect_follow_302(self): """Test that calling ``fetch(url)`` follows HTTP redirects by default.""" url = self.url('/redirect-no-meta-refresh') - code = "fetch('{0}')" - errcode, out, errout = yield self.execute(['-c', code.format(url)]) + code = f"fetch('{url}')" + errcode, out, errout = yield self.execute(['-c', code]) self.assertEqual(errcode, 0, out) assert b'Redirecting (302)' in errout assert b'Crawled (200)' in errout @@ -75,23 +75,23 @@ def test_fetch_redirect_follow_302(self): def test_fetch_redirect_not_follow_302(self): """Test that calling ``fetch(url, redirect=False)`` disables automatic redirects.""" url = self.url('/redirect-no-meta-refresh') - code = "fetch('{0}', redirect=False)" - errcode, out, errout = yield self.execute(['-c', code.format(url)]) + code = f"fetch('{url}', redirect=False)" + errcode, out, errout = yield self.execute(['-c', code]) self.assertEqual(errcode, 0, out) assert b'Crawled (302)' in errout @defer.inlineCallbacks def test_request_replace(self): url = self.url('/text') - code = "fetch('{0}') or fetch(response.request.replace(method='POST'))" - errcode, out, _ = yield self.execute(['-c', code.format(url)]) + code = f"fetch('{url}') or fetch(response.request.replace(method='POST'))" + errcode, out, _ = yield self.execute(['-c', code]) self.assertEqual(errcode, 0, out) @defer.inlineCallbacks def test_scrapy_import(self): url = self.url('/text') - code = "fetch(scrapy.Request('{0}'))" - errcode, out, _ = yield self.execute(['-c', code.format(url)]) + code = f"fetch(scrapy.Request('{url}'))" + errcode, out, _ = yield self.execute(['-c', code]) self.assertEqual(errcode, 0, out) @defer.inlineCallbacks diff --git a/tests/test_command_version.py b/tests/test_command_version.py index 99c01c2b72a..00d998388db 100644 --- a/tests/test_command_version.py +++ b/tests/test_command_version.py @@ -16,7 +16,7 @@ def test_output(self): _, out, _ = yield self.execute([]) self.assertEqual( out.strip().decode(encoding), - "Scrapy %s" % scrapy.__version__, + f"Scrapy {scrapy.__version__}", ) @defer.inlineCallbacks diff --git a/tests/test_commands.py b/tests/test_commands.py index ee8a9260476..5faaca73860 100644 --- a/tests/test_commands.py +++ b/tests/test_commands.py @@ -42,7 +42,7 @@ def setUp(self): def test_settings_json_string(self): feeds_json = '{"data.json": {"format": "json"}, "data.xml": {"format": "xml"}}' - opts, args = self.parser.parse_args(args=['-s', 'FEEDS={}'.format(feeds_json), 'spider.py']) + opts, args = self.parser.parse_args(args=['-s', f'FEEDS={feeds_json}', 'spider.py']) self.command.process_options(args, opts) self.assertIsInstance(self.command.settings['FEEDS'], scrapy.settings.BaseSettings) self.assertEqual(dict(self.command.settings['FEEDS']), json.loads(feeds_json)) @@ -163,10 +163,10 @@ def test_startproject_template_override(self): pass assert exists(join(self.tmpl_proj, 'root_template')) - args = ['--set', 'TEMPLATES_DIR=%s' % self.tmpl] + args = ['--set', f'TEMPLATES_DIR={self.tmpl}'] p, out, err = self.proc('startproject', self.project_name, *args) - self.assertIn("New Scrapy project '%s', using template directory" - % self.project_name, out) + self.assertIn(f"New Scrapy project '{self.project_name}', " + "using template directory", out) self.assertIn(self.tmpl_proj, out) assert exists(join(self.proj_path, 'root_template')) @@ -247,7 +247,7 @@ def _make_read_only(path): 'startproject', project_name, '--set', - 'TEMPLATES_DIR={}'.format(read_only_templates_dir), + f'TEMPLATES_DIR={read_only_templates_dir}', ), cwd=destination, env=self.env, @@ -320,7 +320,7 @@ def setUp(self): super().setUp() self.call('startproject', self.project_name) self.cwd = join(self.temp_path, self.project_name) - self.env['SCRAPY_SETTINGS_MODULE'] = '%s.settings' % self.project_name + self.env['SCRAPY_SETTINGS_MODULE'] = f'{self.project_name}.settings' class GenspiderCommandTest(CommandTest): @@ -334,14 +334,14 @@ def test_arguments(self): assert exists(join(self.proj_mod_path, 'spiders', 'test_name.py')) def test_template(self, tplname='crawl'): - args = ['--template=%s' % tplname] if tplname else [] + args = [f'--template={tplname}'] if tplname else [] spname = 'test_spider' p, out, err = self.proc('genspider', spname, 'test.com', *args) - self.assertIn("Created spider %r using template %r in module" % (spname, tplname), out) + self.assertIn(f"Created spider {spname!r} using template {tplname!r} in module", out) self.assertTrue(exists(join(self.proj_mod_path, 'spiders', 'test_spider.py'))) modify_time_before = getmtime(join(self.proj_mod_path, 'spiders', 'test_spider.py')) p, out, err = self.proc('genspider', spname, 'test.com', *args) - self.assertIn("Spider %r already exists in module" % spname, out) + self.assertIn(f"Spider {spname!r} already exists in module", out) modify_time_after = getmtime(join(self.proj_mod_path, 'spiders', 'test_spider.py')) self.assertEqual(modify_time_after, modify_time_before) @@ -363,11 +363,11 @@ def test_dump(self): def test_same_name_as_project(self): self.assertEqual(2, self.call('genspider', self.project_name)) - assert not exists(join(self.proj_mod_path, 'spiders', '%s.py' % self.project_name)) + assert not exists(join(self.proj_mod_path, 'spiders', f'{self.project_name}.py')) def test_same_filename_as_existing_spider(self, force=False): file_name = 'example' - file_path = join(self.proj_mod_path, 'spiders', '%s.py' % file_name) + file_path = join(self.proj_mod_path, 'spiders', f'{file_name}.py') self.assertEqual(0, self.call('genspider', file_name, 'example.com')) assert exists(file_path) @@ -383,14 +383,14 @@ def test_same_filename_as_existing_spider(self, force=False): if force: p, out, err = self.proc('genspider', '--force', file_name, 'example.com') - self.assertIn("Created spider %r using template \'basic\' in module" % file_name, out) + self.assertIn(f"Created spider {file_name!r} using template \'basic\' in module", out) modify_time_after = getmtime(file_path) self.assertNotEqual(modify_time_after, modify_time_before) file_contents_after = open(file_path, 'r').read() self.assertNotEqual(file_contents_after, file_contents_before) else: p, out, err = self.proc('genspider', file_name, 'example.com') - self.assertIn("%s already exists" % (file_path), out) + self.assertIn(f"{file_path} already exists", out) modify_time_after = getmtime(file_path) self.assertEqual(modify_time_after, modify_time_before) file_contents_after = open(file_path, 'r').read() @@ -410,7 +410,7 @@ def test_same_name_as_existing_file(self, force=False): file_name = 'example' file_path = join(self.temp_path, file_name + '.py') p, out, err = self.proc('genspider', file_name, 'example.com') - self.assertIn("Created spider %r using template \'basic\' " % file_name, out) + self.assertIn(f"Created spider {file_name!r} using template \'basic\' ", out) assert exists(file_path) modify_time_before = getmtime(file_path) file_contents_before = open(file_path, 'r').read() @@ -418,14 +418,14 @@ def test_same_name_as_existing_file(self, force=False): if force: # use different template to ensure contents were changed p, out, err = self.proc('genspider', '--force', '-t', 'crawl', file_name, 'example.com') - self.assertIn("Created spider %r using template \'crawl\' " % file_name, out) + self.assertIn(f"Created spider {file_name!r} using template \'crawl\' ", out) modify_time_after = getmtime(file_path) self.assertNotEqual(modify_time_after, modify_time_before) file_contents_after = open(file_path, 'r').read() self.assertNotEqual(file_contents_after, file_contents_before) else: p, out, err = self.proc('genspider', file_name, 'example.com') - self.assertIn("%s already exists" % join(self.temp_path, file_name + ".py"), out) + self.assertIn(f"{join(self.temp_path, file_name + '.py')} already exists", out) modify_time_after = getmtime(file_path) self.assertEqual(modify_time_after, modify_time_before) file_contents_after = open(file_path, 'r').read() diff --git a/tests/test_contracts.py b/tests/test_contracts.py index 2e7e3ccc4e0..d0f4a68c2ce 100644 --- a/tests/test_contracts.py +++ b/tests/test_contracts.py @@ -393,7 +393,7 @@ def parse_second(self, response): return TestItem() with MockServer() as mockserver: - contract_doc = '@url {}'.format(mockserver.url('/status?n=200')) + contract_doc = f'@url {mockserver.url("/status?n=200")}' TestSameUrlSpider.parse_first.__doc__ = contract_doc TestSameUrlSpider.parse_second.__doc__ = contract_doc diff --git a/tests/test_crawl.py b/tests/test_crawl.py index 642c2465156..e703f45decb 100644 --- a/tests/test_crawl.py +++ b/tests/test_crawl.py @@ -79,7 +79,7 @@ def _test_delay(self, total, delay, randomize=False): total_time = times[-1] - times[0] average = total_time / (len(times) - 1) self.assertTrue(average > delay * tolerance, - "download delay too small: %s" % average) + f"download delay too small: {average}") # Ensure that the same test parameters would cause a failure if no # download delay is set. Otherwise, it means we are using a combination @@ -204,7 +204,7 @@ def test_unbounded_response(self): '''}) crawler = self.runner.create_crawler(SimpleSpider) with LogCapture() as log: - yield crawler.crawl(self.mockserver.url("/raw?{0}".format(query)), mockserver=self.mockserver) + yield crawler.crawl(self.mockserver.url(f"/raw?{query}"), mockserver=self.mockserver) self.assertEqual(str(log).count("Got response 200"), 1) @defer.inlineCallbacks @@ -465,7 +465,7 @@ def test_async_def_asyncio_parse_reqs_list(self): with LogCapture() as log: yield crawler.crawl(self.mockserver.url("/status?n=200"), mockserver=self.mockserver) for req_id in range(3): - self.assertIn("Got response 200, req_id %d" % req_id, str(log)) + self.assertIn(f"Got response 200, req_id {req_id}", str(log)) @defer.inlineCallbacks def test_response_ssl_certificate_none(self): diff --git a/tests/test_downloader_handlers.py b/tests/test_downloader_handlers.py index 7059f089212..0d3c42797d1 100644 --- a/tests/test_downloader_handlers.py +++ b/tests/test_downloader_handlers.py @@ -121,7 +121,7 @@ def _test(response): return self.download_request(request, Spider('foo')).addCallback(_test) def test_non_existent(self): - request = Request('file://%s' % self.mktemp()) + request = Request(f'file://{self.mktemp()}') d = self.download_request(request, Spider('foo')) return self.assertFailure(d, IOError) @@ -249,7 +249,7 @@ def tearDown(self): shutil.rmtree(self.tmpname) def getURL(self, path): - return "%s://%s:%d/%s" % (self.scheme, self.host, self.portno, path) + return f"{self.scheme}://{self.host}:{self.portno}/{path}" def test_download(self): request = Request(self.getURL('file')) @@ -300,7 +300,7 @@ def test_timeout_download_from_spider_server_hangs(self): def test_host_header_not_in_request_headers(self): def _test(response): self.assertEqual( - response.body, to_bytes('%s:%d' % (self.host, self.portno))) + response.body, to_bytes(f'{self.host}:{self.portno}')) self.assertEqual(request.headers, {}) request = Request(self.getURL('host')) @@ -583,7 +583,7 @@ def tearDown(self): shutil.rmtree(self.tmpname) def getURL(self, path): - return "%s://%s:%d/%s" % (self.scheme, self.host, self.portno, path) + return f"{self.scheme}://{self.host}:{self.portno}/{path}" def test_download(self): request = Request(self.getURL('file')) @@ -678,7 +678,7 @@ def tearDown(self): yield self.download_handler.close() def getURL(self, path): - return "http://127.0.0.1:%d/%s" % (self.portno, path) + return f"http://127.0.0.1:{self.portno}/{path}" def test_download_with_proxy(self): def _test(response): @@ -696,7 +696,7 @@ def _test(response): self.assertEqual(response.url, request.url) self.assertEqual(response.body, b'https://example.com') - http_proxy = '%s?noconnect' % self.getURL('') + http_proxy = f'{self.getURL("")}?noconnect' request = Request('https://example.com', meta={'proxy': http_proxy}) with self.assertWarnsRegex(ScrapyDeprecationWarning, r'Using HTTPS proxies in the noconnect mode is deprecated'): @@ -977,7 +977,7 @@ def _clean(data): return deferred def test_ftp_download_success(self): - request = Request(url="ftp://127.0.0.1:%s/file.txt" % self.portNum, + request = Request(url=f"ftp://127.0.0.1:{self.portNum}/file.txt", meta=self.req_meta) d = self.download_handler.download_request(request, None) @@ -989,7 +989,7 @@ def _test(r): def test_ftp_download_path_with_spaces(self): request = Request( - url="ftp://127.0.0.1:%s/file with spaces.txt" % self.portNum, + url=f"ftp://127.0.0.1:{self.portNum}/file with spaces.txt", meta=self.req_meta ) d = self.download_handler.download_request(request, None) @@ -1001,7 +1001,7 @@ def _test(r): return self._add_test_callbacks(d, _test) def test_ftp_download_notexist(self): - request = Request(url="ftp://127.0.0.1:%s/notexist.txt" % self.portNum, + request = Request(url=f"ftp://127.0.0.1:{self.portNum}/notexist.txt", meta=self.req_meta) d = self.download_handler.download_request(request, None) @@ -1015,7 +1015,7 @@ def test_ftp_local_filename(self): os.close(f) meta = {"ftp_local_filename": local_fname} meta.update(self.req_meta) - request = Request(url="ftp://127.0.0.1:%s/file.txt" % self.portNum, + request = Request(url=f"ftp://127.0.0.1:{self.portNum}/file.txt", meta=meta) d = self.download_handler.download_request(request, None) @@ -1037,7 +1037,7 @@ def test_invalid_credentials(self): meta = dict(self.req_meta) meta.update({"ftp_password": 'invalid'}) - request = Request(url="ftp://127.0.0.1:%s/file.txt" % self.portNum, + request = Request(url=f"ftp://127.0.0.1:{self.portNum}/file.txt", meta=meta) d = self.download_handler.download_request(request, None) diff --git a/tests/test_downloadermiddleware.py b/tests/test_downloadermiddleware.py index a9190c62b61..79f24c8a150 100644 --- a/tests/test_downloadermiddleware.py +++ b/tests/test_downloadermiddleware.py @@ -84,7 +84,7 @@ def test_3xx_and_invalid_gzipped_body_must_redirect(self): }) ret = self._download(request=req, response=resp) self.assertTrue(isinstance(ret, Request), - "Not redirected: {0!r}".format(ret)) + f"Not redirected: {ret!r}") self.assertEqual(to_bytes(ret.url), resp.headers['Location'], "Not redirected to location header") diff --git a/tests/test_downloadermiddleware_decompression.py b/tests/test_downloadermiddleware_decompression.py index dbae4d3ae7a..b2b5ce77dc1 100644 --- a/tests/test_downloadermiddleware_decompression.py +++ b/tests/test_downloadermiddleware_decompression.py @@ -28,7 +28,7 @@ def test_known_compression_formats(self): for fmt in self.test_formats: rsp = self.test_responses[fmt] new = self.mw.process_response(None, rsp, self.spider) - error_msg = 'Failed %s, response type %s' % (fmt, type(new).__name__) + error_msg = f'Failed {fmt}, response type {type(new).__name__}' assert isinstance(new, XmlResponse), error_msg assert_samelines(self, new.body, self.uncompressed_body, fmt) diff --git a/tests/test_downloadermiddleware_httpcache.py b/tests/test_downloadermiddleware_httpcache.py index 299fb0eb81c..0c6dcf2aa53 100644 --- a/tests/test_downloadermiddleware_httpcache.py +++ b/tests/test_downloadermiddleware_httpcache.py @@ -324,7 +324,7 @@ def test_response_cacheability(self): ] with self._middleware() as mw: for idx, (shouldcache, status, headers) in enumerate(responses): - req0 = Request('http://example-%d.com' % idx) + req0 = Request(f'http://example-{idx}.com') res0 = Response(req0.url, status=status, headers=headers) res1 = self._process_requestresponse(mw, req0, res0) res304 = res0.replace(status=304) @@ -343,7 +343,7 @@ def test_response_cacheability(self): with self._middleware(HTTPCACHE_ALWAYS_STORE=True) as mw: for idx, (_, status, headers) in enumerate(responses): shouldcache = 'no-store' not in headers.get('Cache-Control', '') and status != 304 - req0 = Request('http://example2-%d.com' % idx) + req0 = Request(f'http://example2-{idx}.com') res0 = Response(req0.url, status=status, headers=headers) res1 = self._process_requestresponse(mw, req0, res0) res304 = res0.replace(status=304) @@ -386,7 +386,7 @@ def test_cached_and_fresh(self): ] with self._middleware() as mw: for idx, (status, headers) in enumerate(sampledata): - req0 = Request('http://example-%d.com' % idx) + req0 = Request(f'http://example-{idx}.com') res0 = Response(req0.url, status=status, headers=headers) # cache fresh response res1 = self._process_requestresponse(mw, req0, res0) @@ -423,7 +423,7 @@ def test_cached_and_stale(self): ] with self._middleware() as mw: for idx, (status, headers) in enumerate(sampledata): - req0 = Request('http://example-%d.com' % idx) + req0 = Request(f'http://example-{idx}.com') res0a = Response(req0.url, status=status, headers=headers) # cache expired response res1 = self._process_requestresponse(mw, req0, res0a) @@ -490,7 +490,7 @@ def test_ignore_response_cache_controls(self): ] with self._middleware(HTTPCACHE_IGNORE_RESPONSE_CACHE_CONTROLS=['no-cache', 'no-store']) as mw: for idx, (status, headers) in enumerate(sampledata): - req0 = Request('http://example-%d.com' % idx) + req0 = Request(f'http://example-{idx}.com') res0 = Response(req0.url, status=status, headers=headers) # cache fresh response res1 = self._process_requestresponse(mw, req0, res0) diff --git a/tests/test_downloadermiddleware_redirect.py b/tests/test_downloadermiddleware_redirect.py index 13133213134..816ac144069 100644 --- a/tests/test_downloadermiddleware_redirect.py +++ b/tests/test_downloadermiddleware_redirect.py @@ -22,7 +22,7 @@ def test_priority_adjust(self): def test_redirect_3xx_permanent(self): def _test(method, status=301): - url = 'http://www.example.com/{}'.format(status) + url = f'http://www.example.com/{status}' url2 = 'http://www.example.com/redirected' req = Request(url, method=method) rsp = Response(url, headers={'Location': url2}, status=status) @@ -79,7 +79,7 @@ def test_redirect_302(self): self.assertEqual(req2.method, 'GET') assert 'Content-Type' not in req2.headers, "Content-Type header must not be present in redirected request" assert 'Content-Length' not in req2.headers, "Content-Length header must not be present in redirected request" - assert not req2.body, "Redirected body must be empty, not '%s'" % req2.body + assert not req2.body, f"Redirected body must be empty, not '{req2.body}'" # response without Location header but with status code is 3XX should be ignored del rsp.headers['Location'] @@ -207,8 +207,8 @@ def setUp(self): self.mw = MetaRefreshMiddleware.from_crawler(crawler) def _body(self, interval=5, url='http://example.org/newpage'): - html = """""" - return html.format(interval, url).encode('utf-8') + html = f"""""" + return html.encode('utf-8') def test_priority_adjust(self): req = Request('http://a.com') @@ -243,7 +243,7 @@ def test_meta_refresh_trough_posted_request(self): self.assertEqual(req2.method, 'GET') assert 'Content-Type' not in req2.headers, "Content-Type header must not be present in redirected request" assert 'Content-Length' not in req2.headers, "Content-Length header must not be present in redirected request" - assert not req2.body, "Redirected body must be empty, not '%s'" % req2.body + assert not req2.body, f"Redirected body must be empty, not '{req2.body}'" def test_max_redirect_times(self): self.mw.max_redirect_times = 1 diff --git a/tests/test_downloadermiddleware_retry.py b/tests/test_downloadermiddleware_retry.py index 29357ba947e..364ce0c8948 100644 --- a/tests/test_downloadermiddleware_retry.py +++ b/tests/test_downloadermiddleware_retry.py @@ -94,7 +94,7 @@ def test_twistederrors(self): ] for exc in exceptions: - req = Request('http://www.scrapytest.org/%s' % exc.__name__) + req = Request(f'http://www.scrapytest.org/{exc.__name__}') self._test_retry_exception(req, exc('foo')) stats = self.crawler.stats diff --git a/tests/test_engine.py b/tests/test_engine.py index 1b848ac7298..3629aa1aa3b 100644 --- a/tests/test_engine.py +++ b/tests/test_engine.py @@ -127,8 +127,8 @@ def start_test_site(debug=False): port = reactor.listenTCP(0, server.Site(r), interface="127.0.0.1") if debug: - print("Test server running at http://localhost:%d/ - hit Ctrl-C to finish." - % port.getHost().port) + print(f"Test server running at http://localhost:{port.getHost().port}/ " + "- hit Ctrl-C to finish.") return port @@ -185,7 +185,7 @@ def stop(self): self.deferred.callback(None) def geturl(self, path): - return "http://localhost:%s%s" % (self.portno, path) + return f"http://localhost:{self.portno}{path}" def getpath(self, url): u = urlparse(url) @@ -265,7 +265,7 @@ def _assert_visited_urls(self): "/item1.html", "/item2.html", "/item999.html"] urls_visited = {rp[0].url for rp in self.run.respplug} urls_expected = {self.run.geturl(p) for p in must_be_visited} - assert urls_expected <= urls_visited, "URLs not visited: %s" % list(urls_expected - urls_visited) + assert urls_expected <= urls_visited, f"URLs not visited: {list(urls_expected - urls_visited)}" def _assert_scheduled_requests(self, urls_to_visit=None): self.assertEqual(urls_to_visit, len(self.run.reqplug)) @@ -413,16 +413,19 @@ def test_crawler(self): yield self.run.run() log.check_present(("scrapy.core.downloader.handlers.http11", "DEBUG", - "Download stopped for from signal handler" - " StopDownloadCrawlerRun.bytes_received".format(self.run.portno))) + f"Download stopped for " + "from signal handler" + " StopDownloadCrawlerRun.bytes_received")) log.check_present(("scrapy.core.downloader.handlers.http11", "DEBUG", - "Download stopped for from signal handler" - " StopDownloadCrawlerRun.bytes_received".format(self.run.portno))) + f"Download stopped for " + "from signal handler" + " StopDownloadCrawlerRun.bytes_received")) log.check_present(("scrapy.core.downloader.handlers.http11", "DEBUG", - "Download stopped for from signal handler" - " StopDownloadCrawlerRun.bytes_received".format(self.run.portno))) + f"Download stopped for " + "from signal handler" + " StopDownloadCrawlerRun.bytes_received")) self._assert_visited_urls() self._assert_scheduled_requests(urls_to_visit=9) self._assert_downloaded_responses() diff --git a/tests/test_feedexport.py b/tests/test_feedexport.py index 850485b5ed6..94568581aa2 100644 --- a/tests/test_feedexport.py +++ b/tests/test_feedexport.py @@ -184,8 +184,7 @@ def test_overwrite_active_mode(self): def test_uri_auth_quote(self): # RFC3986: 3.2.1. User Information pw_quoted = quote(string.punctuation, safe='') - st = FTPFeedStorage('ftp://foo:%s@example.com/some_path' % pw_quoted, - {}) + st = FTPFeedStorage(f'ftp://foo:{pw_quoted}@example.com/some_path', {}) self.assertEqual(st.password, string.punctuation) @@ -1230,7 +1229,7 @@ def test_multiple_feeds_success_logs_blocking_feed_storage(self): print(log) for fmt in ['json', 'xml', 'csv']: - self.assertIn('Stored %s feed (2 items)' % fmt, str(log)) + self.assertIn(f'Stored {fmt} feed (2 items)', str(log)) @defer.inlineCallbacks def test_multiple_feeds_failing_logs_blocking_feed_storage(self): @@ -1251,7 +1250,7 @@ def test_multiple_feeds_failing_logs_blocking_feed_storage(self): print(log) for fmt in ['json', 'xml', 'csv']: - self.assertIn('Error storing %s feed (2 items)' % fmt, str(log)) + self.assertIn(f'Error storing {fmt} feed (2 items)', str(log)) class BatchDeliveriesTest(FeedExportTestBase): @@ -1582,10 +1581,8 @@ def test_s3_export(self): chars = [random.choice(ascii_letters + digits) for _ in range(15)] filename = ''.join(chars) - prefix = 'tmp/{filename}'.format(filename=filename) - s3_test_file_uri = 's3://{bucket_name}/{prefix}/%(batch_time)s.json'.format( - bucket_name=s3_test_bucket_name, prefix=prefix - ) + prefix = f'tmp/{filename}' + s3_test_file_uri = f's3://{s3_test_bucket_name}/{prefix}/%(batch_time)s.json' storage = S3FeedStorage(s3_test_bucket_name, access_key, secret_key) settings = Settings({ 'FEEDS': { diff --git a/tests/test_loader_deprecated.py b/tests/test_loader_deprecated.py index 624dd9ab8cd..41afa289665 100644 --- a/tests/test_loader_deprecated.py +++ b/tests/test_loader_deprecated.py @@ -657,7 +657,7 @@ def test_output(self): self.assertEqual( test, expected, - msg='test "{}" got {} expected {}'.format(tl, test, expected) + msg=f'test "{tl}" got {test} expected {expected}' ) diff --git a/tests/test_logformatter.py b/tests/test_logformatter.py index 41ff3651d99..dc5be398f2c 100644 --- a/tests/test_logformatter.py +++ b/tests/test_logformatter.py @@ -20,7 +20,7 @@ class CustomItem(Item): name = Field() def __str__(self): - return "name: %s" % self['name'] + return f"name: {self['name']}" class LogFormatterTestCase(unittest.TestCase): diff --git a/tests/test_middleware.py b/tests/test_middleware.py index b2b75ef2019..e3e46db07b0 100644 --- a/tests/test_middleware.py +++ b/tests/test_middleware.py @@ -50,7 +50,7 @@ class TestMiddlewareManager(MiddlewareManager): @classmethod def _get_mwlist_from_settings(cls, settings): - return ['tests.test_middleware.%s' % x for x in ['M1', 'MOff', 'M3']] + return [f'tests.test_middleware.{x}' for x in ['M1', 'MOff', 'M3']] def _add_middleware(self, mw): super()._add_middleware(mw) diff --git a/tests/test_pipeline_crawl.py b/tests/test_pipeline_crawl.py index 9af5affec98..55fcfa7ba16 100644 --- a/tests/test_pipeline_crawl.py +++ b/tests/test_pipeline_crawl.py @@ -123,10 +123,10 @@ def _assert_files_download_failure(self, crawler, items, code, logs): self.assertEqual(crawler.stats.get_value('downloader/request_method_count/GET'), 4) self.assertEqual(crawler.stats.get_value('downloader/response_count'), 4) self.assertEqual(crawler.stats.get_value('downloader/response_status_count/200'), 1) - self.assertEqual(crawler.stats.get_value('downloader/response_status_count/%d' % code), 3) + self.assertEqual(crawler.stats.get_value(f'downloader/response_status_count/{code}'), 3) # check that logs do show the failure on the file downloads - file_dl_failure = 'File (code: %d): Error downloading file from' % code + file_dl_failure = f'File (code: {code}): Error downloading file from' self.assertEqual(logs.count(file_dl_failure), 3) # check that no files were written to the media store diff --git a/tests/test_pipeline_files.py b/tests/test_pipeline_files.py index b19b4ff2ab7..1dd7031fe77 100644 --- a/tests/test_pipeline_files.py +++ b/tests/test_pipeline_files.py @@ -167,7 +167,7 @@ def test_file_path_from_item(self): """ class CustomFilesPipeline(FilesPipeline): def file_path(self, request, response=None, info=None, item=None): - return 'full/%s' % item.get('path') + return f'full/{item.get("path")}' file_path = CustomFilesPipeline.from_settings(Settings({'FILES_STORE': self.tempdir})).file_path item = dict(path='path-to-store-file') @@ -495,7 +495,7 @@ def test_persist(self): self.assertIn('last_modified', stat) self.assertIn('checksum', stat) self.assertEqual(stat['checksum'], 'd113d66b2ec7258724a268bd88eef6b6') - path = '%s/%s' % (store.basedir, path) + path = f'{store.basedir}/{path}' content = get_ftp_content_and_delete( path, store.host, store.port, store.username, store.password, store.USE_ACTIVE_MODE) diff --git a/tests/test_pipeline_images.py b/tests/test_pipeline_images.py index 082e9ee2159..ad138a2dc55 100644 --- a/tests/test_pipeline_images.py +++ b/tests/test_pipeline_images.py @@ -128,11 +128,11 @@ def file_key(self, url): def image_key(self, url): image_guid = hashlib.sha1(to_bytes(url)).hexdigest() - return 'empty/%s.jpg' % (image_guid) + return f'empty/{image_guid}.jpg' def thumb_key(self, url, thumb_id): thumb_guid = hashlib.sha1(to_bytes(url)).hexdigest() - return 'thumbsup/%s/%s.jpg' % (thumb_id, thumb_guid) + return f'thumbsup/{thumb_id}/{thumb_guid}.jpg' class ImagesPipelineTestCaseFieldsMixin: diff --git a/tests/test_proxy_connect.py b/tests/test_proxy_connect.py index a56e3c39a1f..d3f58634e2f 100644 --- a/tests/test_proxy_connect.py +++ b/tests/test_proxy_connect.py @@ -37,14 +37,14 @@ def start(self): '-c', script, '--listen-host', '127.0.0.1', '--listen-port', '0', - '--proxyauth', '%s:%s' % (self.auth_user, self.auth_pass), + '--proxyauth', f'{self.auth_user}:{self.auth_pass}', '--certs', cert_path, '--ssl-insecure', ], stdout=PIPE, env=get_testenv()) line = self.proc.stdout.readline().decode('utf-8') host_port = re.search(r'listening at http://([^:]+:\d+)', line).group(1) - address = 'http://%s:%s@%s' % (self.auth_user, self.auth_pass, host_port) + address = f'http://{self.auth_user}:{self.auth_pass}@{host_port}' return address def stop(self): @@ -118,7 +118,7 @@ def test_https_tunnel_without_leak_proxy_authorization_header(self): def _assert_got_response_code(self, code, log): print(log) - self.assertEqual(str(log).count('Crawled (%d)' % code), 1) + self.assertEqual(str(log).count(f'Crawled ({code})'), 1) def _assert_got_tunnel_error(self, log): print(log) diff --git a/tests/test_request_attribute_binding.py b/tests/test_request_attribute_binding.py index b60b7c57955..907117468d3 100644 --- a/tests/test_request_attribute_binding.py +++ b/tests/test_request_attribute_binding.py @@ -79,7 +79,7 @@ def test_response_200(self): @defer.inlineCallbacks def test_response_error(self): for status in ("404", "500"): - url = self.mockserver.url("/status?n={}".format(status)) + url = self.mockserver.url(f"/status?n={status}") crawler = CrawlerRunner().create_crawler(SingleRequestSpider) yield crawler.crawl(seed=url, mockserver=self.mockserver) failure = crawler.spider.meta["failure"] @@ -135,7 +135,7 @@ def signal_handler(response, request, spider): self.assertEqual(signal_params["request"].url, OVERRIDEN_URL) log.check_present( - ("scrapy.core.engine", "DEBUG", "Crawled (200) (referer: None)".format(OVERRIDEN_URL)), + ("scrapy.core.engine", "DEBUG", f"Crawled (200) (referer: None)"), ) @defer.inlineCallbacks diff --git a/tests/test_responsetypes.py b/tests/test_responsetypes.py index a175f88caed..c07d3a99c3c 100644 --- a/tests/test_responsetypes.py +++ b/tests/test_responsetypes.py @@ -17,7 +17,7 @@ def test_from_filename(self): ] for source, cls in mappings: retcls = responsetypes.from_filename(source) - assert retcls is cls, "%s ==> %s != %s" % (source, retcls, cls) + assert retcls is cls, f"{source} ==> {retcls} != {cls}" def test_from_content_disposition(self): mappings = [ @@ -32,7 +32,7 @@ def test_from_content_disposition(self): ] for source, cls in mappings: retcls = responsetypes.from_content_disposition(source) - assert retcls is cls, "%s ==> %s != %s" % (source, retcls, cls) + assert retcls is cls, f"{source} ==> {retcls} != {cls}" def test_from_content_type(self): mappings = [ @@ -47,7 +47,7 @@ def test_from_content_type(self): ] for source, cls in mappings: retcls = responsetypes.from_content_type(source) - assert retcls is cls, "%s ==> %s != %s" % (source, retcls, cls) + assert retcls is cls, f"{source} ==> {retcls} != {cls}" def test_from_body(self): mappings = [ @@ -58,7 +58,7 @@ def test_from_body(self): ] for source, cls in mappings: retcls = responsetypes.from_body(source) - assert retcls is cls, "%s ==> %s != %s" % (source, retcls, cls) + assert retcls is cls, f"{source} ==> {retcls} != {cls}" def test_from_headers(self): mappings = [ @@ -70,7 +70,7 @@ def test_from_headers(self): for source, cls in mappings: source = Headers(source) retcls = responsetypes.from_headers(source) - assert retcls is cls, "%s ==> %s != %s" % (source, retcls, cls) + assert retcls is cls, f"{source} ==> {retcls} != {cls}" def test_from_args(self): # TODO: add more tests that check precedence between the different arguments @@ -86,7 +86,7 @@ def test_from_args(self): ] for source, cls in mappings: retcls = responsetypes.from_args(**source) - assert retcls is cls, "%s ==> %s != %s" % (source, retcls, cls) + assert retcls is cls, f"{source} ==> {retcls} != {cls}" def test_custom_mime_types_loaded(self): # check that mime.types files shipped with scrapy are loaded diff --git a/tests/test_selector.py b/tests/test_selector.py index 62036ad8c8b..cff8d03935a 100644 --- a/tests/test_selector.py +++ b/tests/test_selector.py @@ -88,7 +88,7 @@ def test_weakref_slots(self): """Check that classes are using slots and are weak-referenceable""" x = Selector(text='') weakref.ref(x) - assert not hasattr(x, '__dict__'), "%s does not use __slots__" % x.__class__.__name__ + assert not hasattr(x, '__dict__'), f"{x.__class__.__name__} does not use __slots__" def test_selector_bad_args(self): with self.assertRaisesRegex(ValueError, 'received both response and text'): diff --git a/tests/test_signals.py b/tests/test_signals.py index d6ae526bed8..a43f00b27c8 100644 --- a/tests/test_signals.py +++ b/tests/test_signals.py @@ -13,7 +13,7 @@ class ItemSpider(Spider): def start_requests(self): for index in range(10): - yield Request(self.mockserver.url('/status?n=200&id=%d' % index), + yield Request(self.mockserver.url(f'/status?n=200&id={index}'), meta={'index': index}) def parse(self, response): diff --git a/tests/test_spidermiddleware_output_chain.py b/tests/test_spidermiddleware_output_chain.py index 79eda35b304..2f454addcea 100644 --- a/tests/test_spidermiddleware_output_chain.py +++ b/tests/test_spidermiddleware_output_chain.py @@ -163,11 +163,11 @@ def parse(self, response): class _GeneratorDoNothingMiddleware: def process_spider_output(self, response, result, spider): for r in result: - r['processed'].append('{}.process_spider_output'.format(self.__class__.__name__)) + r['processed'].append(f'{self.__class__.__name__}.process_spider_output') yield r def process_spider_exception(self, response, exception, spider): - method = '{}.process_spider_exception'.format(self.__class__.__name__) + method = f'{self.__class__.__name__}.process_spider_exception' spider.logger.info('%s: %s caught', method, exception.__class__.__name__) return None @@ -175,12 +175,12 @@ def process_spider_exception(self, response, exception, spider): class GeneratorFailMiddleware: def process_spider_output(self, response, result, spider): for r in result: - r['processed'].append('{}.process_spider_output'.format(self.__class__.__name__)) + r['processed'].append(f'{self.__class__.__name__}.process_spider_output') yield r raise LookupError() def process_spider_exception(self, response, exception, spider): - method = '{}.process_spider_exception'.format(self.__class__.__name__) + method = f'{self.__class__.__name__}.process_spider_exception' spider.logger.info('%s: %s caught', method, exception.__class__.__name__) yield {'processed': [method]} @@ -192,11 +192,11 @@ class GeneratorDoNothingAfterFailureMiddleware(_GeneratorDoNothingMiddleware): class GeneratorRecoverMiddleware: def process_spider_output(self, response, result, spider): for r in result: - r['processed'].append('{}.process_spider_output'.format(self.__class__.__name__)) + r['processed'].append(f'{self.__class__.__name__}.process_spider_output') yield r def process_spider_exception(self, response, exception, spider): - method = '{}.process_spider_exception'.format(self.__class__.__name__) + method = f'{self.__class__.__name__}.process_spider_exception' spider.logger.info('%s: %s caught', method, exception.__class__.__name__) yield {'processed': [method]} @@ -229,12 +229,12 @@ class _NotGeneratorDoNothingMiddleware: def process_spider_output(self, response, result, spider): out = [] for r in result: - r['processed'].append('{}.process_spider_output'.format(self.__class__.__name__)) + r['processed'].append(f'{self.__class__.__name__}.process_spider_output') out.append(r) return out def process_spider_exception(self, response, exception, spider): - method = '{}.process_spider_exception'.format(self.__class__.__name__) + method = f'{self.__class__.__name__}.process_spider_exception' spider.logger.info('%s: %s caught', method, exception.__class__.__name__) return None @@ -243,13 +243,13 @@ class NotGeneratorFailMiddleware: def process_spider_output(self, response, result, spider): out = [] for r in result: - r['processed'].append('{}.process_spider_output'.format(self.__class__.__name__)) + r['processed'].append(f'{self.__class__.__name__}.process_spider_output') out.append(r) raise ReferenceError() return out def process_spider_exception(self, response, exception, spider): - method = '{}.process_spider_exception'.format(self.__class__.__name__) + method = f'{self.__class__.__name__}.process_spider_exception' spider.logger.info('%s: %s caught', method, exception.__class__.__name__) return [{'processed': [method]}] @@ -262,12 +262,12 @@ class NotGeneratorRecoverMiddleware: def process_spider_output(self, response, result, spider): out = [] for r in result: - r['processed'].append('{}.process_spider_output'.format(self.__class__.__name__)) + r['processed'].append(f'{self.__class__.__name__}.process_spider_output') out.append(r) return out def process_spider_exception(self, response, exception, spider): - method = '{}.process_spider_exception'.format(self.__class__.__name__) + method = f'{self.__class__.__name__}.process_spider_exception' spider.logger.info('%s: %s caught', method, exception.__class__.__name__) return [{'processed': [method]}] diff --git a/tests/test_utils_curl.py b/tests/test_utils_curl.py index 6b05c87715e..f5d684d3f50 100644 --- a/tests/test_utils_curl.py +++ b/tests/test_utils_curl.py @@ -16,7 +16,7 @@ def _test_command(self, curl_command, expected_result): try: Request(**result) except TypeError as e: - self.fail("Request kwargs are not correct {}".format(e)) + self.fail(f"Request kwargs are not correct {e}") def test_get(self): curl_command = "curl http://example.org/" diff --git a/tests/test_utils_datatypes.py b/tests/test_utils_datatypes.py index aa18ef1f3bd..e4bccf30e9a 100644 --- a/tests/test_utils_datatypes.py +++ b/tests/test_utils_datatypes.py @@ -299,7 +299,7 @@ def test_cache_without_limit(self): cache = LocalWeakReferencedCache() refs = [] for x in range(max): - refs.append(Request('https://example.org/{}'.format(x))) + refs.append(Request(f'https://example.org/{x}')) cache[refs[-1]] = x self.assertEqual(len(cache), max) for i, r in enumerate(refs): diff --git a/tests/test_utils_defer.py b/tests/test_utils_defer.py index 8c84331b917..e60242a3b61 100644 --- a/tests/test_utils_defer.py +++ b/tests/test_utils_defer.py @@ -40,15 +40,15 @@ def _append(v): def cb1(value, arg1, arg2): - return "(cb1 %s %s %s)" % (value, arg1, arg2) + return f"(cb1 {value} {arg1} {arg2})" def cb2(value, arg1, arg2): - return defer.succeed("(cb2 %s %s %s)" % (value, arg1, arg2)) + return defer.succeed(f"(cb2 {value} {arg1} {arg2})") def cb3(value, arg1, arg2): - return "(cb3 %s %s %s)" % (value, arg1, arg2) + return f"(cb3 {value} {arg1} {arg2})" def cb_fail(value, arg1, arg2): @@ -56,7 +56,7 @@ def cb_fail(value, arg1, arg2): def eb1(failure, arg1, arg2): - return "(eb1 %s %s %s)" % (failure.value.__class__.__name__, arg1, arg2) + return f"(eb1 {failure.value.__class__.__name__} {arg1} {arg2})" class DeferUtilsTest(unittest.TestCase): diff --git a/tests/test_utils_iterators.py b/tests/test_utils_iterators.py index 50190d4d1b9..79f5a2bbeba 100644 --- a/tests/test_utils_iterators.py +++ b/tests/test_utils_iterators.py @@ -409,7 +409,7 @@ def test_body_or_str(self): def _assert_type_and_value(self, a, b, obj): self.assertTrue(type(a) is type(b), - 'Got {}, expected {} for {!r}'.format(type(a), type(b), obj)) + f'Got {type(a)}, expected {type(b)} for { obj!r}') self.assertEqual(a, b) diff --git a/tests/test_utils_url.py b/tests/test_utils_url.py index 2f885a0e800..144c7bd76a9 100644 --- a/tests/test_utils_url.py +++ b/tests/test_utils_url.py @@ -213,7 +213,7 @@ def create_guess_scheme_t(args): def do_expected(self): url = guess_scheme(args[0]) assert url.startswith(args[1]), \ - 'Wrong scheme guessed: for `%s` got `%s`, expected `%s...`' % (args[0], url, args[1]) + f'Wrong scheme guessed: for `{args[0]}` got `{url}`, expected `{args[1]}...`' return do_expected @@ -254,7 +254,7 @@ def do_expected(self): start=1, ): t_method = create_guess_scheme_t(args) - t_method.__name__ = 'test_uri_%03d' % k + t_method.__name__ = f'test_uri_{k:03}' setattr(GuessSchemeTest, t_method.__name__, t_method) # TODO: the following tests do not pass with current implementation @@ -269,7 +269,7 @@ def do_expected(self): start=1, ): t_method = create_skipped_scheme_t(args) - t_method.__name__ = 'test_uri_skipped_%03d' % k + t_method.__name__ = f'test_uri_skipped_{k:03}' setattr(GuessSchemeTest, t_method.__name__, t_method) diff --git a/tests/test_webclient.py b/tests/test_webclient.py index ee64d455c55..a60181a3a89 100644 --- a/tests/test_webclient.py +++ b/tests/test_webclient.py @@ -253,7 +253,7 @@ def tearDown(self): shutil.rmtree(self.tmpname) def getURL(self, path): - return "http://127.0.0.1:%d/%s" % (self.portno, path) + return f"http://127.0.0.1:{self.portno}/{path}" def testPayload(self): s = "0123456789" * 10 @@ -265,7 +265,7 @@ def testHostHeader(self): # it should extract from url return defer.gatherResults([ getPage(self.getURL("host")).addCallback( - self.assertEqual, to_bytes("127.0.0.1:%d" % self.portno)), + self.assertEqual, to_bytes(f"127.0.0.1:{self.portno}")), getPage(self.getURL("host"), headers={"Host": "www.example.com"}).addCallback( self.assertEqual, to_bytes("www.example.com"))]) @@ -298,7 +298,7 @@ def test_timeoutNotTriggering(self): """ d = getPage(self.getURL("host"), timeout=100) d.addCallback( - self.assertEqual, to_bytes("127.0.0.1:%d" % self.portno)) + self.assertEqual, to_bytes(f"127.0.0.1:{self.portno}")) return d def test_timeoutTriggering(self): @@ -376,7 +376,7 @@ def _listen(self, site): interface="127.0.0.1") def getURL(self, path): - return "https://127.0.0.1:%d/%s" % (self.portno, path) + return f"https://127.0.0.1:{self.portno}/{path}" def setUp(self): self.tmpname = self.mktemp()