|
| 1 | +# -*- coding: utf-8 -*- |
| 2 | + |
| 3 | +# Scrapy settings for centralifact project |
| 4 | +# |
| 5 | +# For simplicity, this file contains only settings considered important or |
| 6 | +# commonly used. You can find more settings consulting the documentation: |
| 7 | +# |
| 8 | +# https://doc.scrapy.org/en/latest/topics/settings.html |
| 9 | +# https://doc.scrapy.org/en/latest/topics/downloader-middleware.html |
| 10 | +# https://doc.scrapy.org/en/latest/topics/spider-middleware.html |
| 11 | + |
| 12 | +BOT_NAME = 'centralifact' |
| 13 | +SPIDER_MODULES = ['centralifact.spiders'] |
| 14 | +NEWSPIDER_MODULE = 'centralifact.spiders' |
| 15 | +DATABASE={ |
| 16 | + 'drivername': 'mysql', |
| 17 | + 'host': 'burns.cs.indiana.edu', |
| 18 | + 'port': '3306', |
| 19 | + 'username': 'centralifact', |
| 20 | + 'password': 'Z-J-K+C-Fact=My+Sql', |
| 21 | + 'database': 'centralifact' |
| 22 | +} |
| 23 | +USER_AGENT = 'Indiana-University-Researcher-Zoher-Kachwala([email protected])' |
| 24 | +SPIDER_MIDDLEWARES = { |
| 25 | + 'scrapy_deltafetch.DeltaFetch': 100, |
| 26 | +} |
| 27 | +DELTAFETCH_ENABLED = True |
| 28 | +DELTAFETCH_RESET = False |
| 29 | +AUTOTHROTTLE_ENABLED = True |
| 30 | +ROBOTSTXT_OBEY = True |
| 31 | + |
| 32 | +# Crawl responsibly by identifying yourself (and your website) on the user-agent |
| 33 | +#USER_AGENT = 'centralifact (+http://www.yourdomain.com)' |
| 34 | + |
| 35 | +# Obey robots.txt rules |
| 36 | +#ROBOTSTXT_OBEY = True |
| 37 | + |
| 38 | +# Configure maximum concurrent requests performed by Scrapy (default: 16) |
| 39 | +#CONCURRENT_REQUESTS = 32 |
| 40 | + |
| 41 | +# Configure a delay for requests for the same website (default: 0) |
| 42 | +# See https://doc.scrapy.org/en/latest/topics/settings.html#download-delay |
| 43 | +# See also autothrottle settings and docs |
| 44 | +#DOWNLOAD_DELAY = 3 |
| 45 | +# The download delay setting will honor only one of: |
| 46 | +#CONCURRENT_REQUESTS_PER_DOMAIN = 16 |
| 47 | +#CONCURRENT_REQUESTS_PER_IP = 16 |
| 48 | + |
| 49 | +# Disable cookies (enabled by default) |
| 50 | +#COOKIES_ENABLED = False |
| 51 | + |
| 52 | +# Disable Telnet Console (enabled by default) |
| 53 | +#TELNETCONSOLE_ENABLED = False |
| 54 | + |
| 55 | +# Override the default request headers: |
| 56 | +#DEFAULT_REQUEST_HEADERS = { |
| 57 | +# 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', |
| 58 | +# 'Accept-Language': 'en', |
| 59 | +#} |
| 60 | + |
| 61 | +# Enable or disable spider middlewares |
| 62 | +# See https://doc.scrapy.org/en/latest/topics/spider-middleware.html |
| 63 | +#SPIDER_MIDDLEWARES = { |
| 64 | +# 'centralifact.middlewares.CentralifactSpiderMiddleware': 543, |
| 65 | +#} |
| 66 | + |
| 67 | +# Enable or disable downloader middlewares |
| 68 | +# See https://doc.scrapy.org/en/latest/topics/downloader-middleware.html |
| 69 | +#DOWNLOADER_MIDDLEWARES = { |
| 70 | +# 'centralifact.middlewares.CentralifactDownloaderMiddleware': 543, |
| 71 | +#} |
| 72 | + |
| 73 | +# Enable or disable extensions |
| 74 | +# See https://doc.scrapy.org/en/latest/topics/extensions.html |
| 75 | +#EXTENSIONS = { |
| 76 | +# 'scrapy.extensions.telnet.TelnetConsole': None, |
| 77 | +#} |
| 78 | + |
| 79 | +# Configure item pipelines |
| 80 | +# See https://doc.scrapy.org/en/latest/topics/item-pipeline.html |
| 81 | +#ITEM_PIPELINES = { |
| 82 | +# 'centralifact.pipelines.CentralifactPipeline': 300, |
| 83 | +#} |
| 84 | + |
| 85 | +# Enable and configure the AutoThrottle extension (disabled by default) |
| 86 | +# See https://doc.scrapy.org/en/latest/topics/autothrottle.html |
| 87 | +#AUTOTHROTTLE_ENABLED = True |
| 88 | +# The initial download delay |
| 89 | +#AUTOTHROTTLE_START_DELAY = 5 |
| 90 | +# The maximum download delay to be set in case of high latencies |
| 91 | +#AUTOTHROTTLE_MAX_DELAY = 60 |
| 92 | +# The average number of requests Scrapy should be sending in parallel to |
| 93 | +# each remote server |
| 94 | +#AUTOTHROTTLE_TARGET_CONCURRENCY = 1.0 |
| 95 | +# Enable showing throttling stats for every response received: |
| 96 | +#AUTOTHROTTLE_DEBUG = False |
| 97 | + |
| 98 | +# Enable and configure HTTP caching (disabled by default) |
| 99 | +# See https://doc.scrapy.org/en/latest/topics/downloader-middleware.html#httpcache-middleware-settings |
| 100 | +#HTTPCACHE_ENABLED = True |
| 101 | +#HTTPCACHE_EXPIRATION_SECS = 0 |
| 102 | +#HTTPCACHE_DIR = 'httpcache' |
| 103 | +#HTTPCACHE_IGNORE_HTTP_CODES = [] |
| 104 | +#HTTPCACHE_STORAGE = 'scrapy.extensions.httpcache.FilesystemCacheStorage' |
0 commit comments