Skip to content

Commit

Permalink
remove Elsevier API keys, add legal notice
Browse files Browse the repository at this point in the history
  • Loading branch information
hhaoyan committed Aug 16, 2019
1 parent 8542c2b commit 62b98e2
Show file tree
Hide file tree
Showing 3 changed files with 53 additions and 37 deletions.
64 changes: 31 additions & 33 deletions Borges/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,80 +14,79 @@
SPIDER_MODULES = ['Borges.spiders']
NEWSPIDER_MODULE = 'Borges.spiders'


# Crawl responsibly by identifying yourself (and your website) on the user-agent
#USER_AGENT = 'Borges (+http://www.yourdomain.com)'
# USER_AGENT = 'Borges (+http://www.yourdomain.com)'

# Obey robots.txt rules
ROBOTSTXT_OBEY = True

# Configure maximum concurrent requests performed by Scrapy (default: 16)
#CONCURRENT_REQUESTS = 32
# CONCURRENT_REQUESTS = 32

# Configure a delay for requests for the same website (default: 0)
# See https://doc.scrapy.org/en/latest/topics/settings.html#download-delay
# See also autothrottle settings and docs
#DOWNLOAD_DELAY = 3
# DOWNLOAD_DELAY = 3
# The download delay setting will honor only one of:
#CONCURRENT_REQUESTS_PER_DOMAIN = 16
#CONCURRENT_REQUESTS_PER_IP = 16
# CONCURRENT_REQUESTS_PER_DOMAIN = 16
# CONCURRENT_REQUESTS_PER_IP = 16

# Disable cookies (enabled by default)
#COOKIES_ENABLED = False
# COOKIES_ENABLED = False

# Disable Telnet Console (enabled by default)
#TELNETCONSOLE_ENABLED = False
# TELNETCONSOLE_ENABLED = False

# Override the default request headers:
#DEFAULT_REQUEST_HEADERS = {
# DEFAULT_REQUEST_HEADERS = {
# 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
# 'Accept-Language': 'en',
#}
# }

# Enable or disable spider middlewares
# See https://doc.scrapy.org/en/latest/topics/spider-middleware.html
#SPIDER_MIDDLEWARES = {
# SPIDER_MIDDLEWARES = {
# 'Borges.middlewares.BorgesSpiderMiddleware': 543,
#}
# }

# Enable or disable downloader middlewares
# See https://doc.scrapy.org/en/latest/topics/downloader-middleware.html
#DOWNLOADER_MIDDLEWARES = {
# DOWNLOADER_MIDDLEWARES = {
# 'Borges.middlewares.BorgesDownloaderMiddleware': 543,
#}
# }

# Enable or disable extensions
# See https://doc.scrapy.org/en/latest/topics/extensions.html
#EXTENSIONS = {
# EXTENSIONS = {
# 'scrapy.extensions.telnet.TelnetConsole': None,
#}
# }

# Configure item pipelines
# See https://doc.scrapy.org/en/latest/topics/item-pipeline.html
#ITEM_PIPELINES = {
# ITEM_PIPELINES = {
# 'Borges.pipelines.BorgesPipeline': 300,
#}
# }

# Enable and configure the AutoThrottle extension (disabled by default)
# See https://doc.scrapy.org/en/latest/topics/autothrottle.html
#AUTOTHROTTLE_ENABLED = True
# AUTOTHROTTLE_ENABLED = True
# The initial download delay
#AUTOTHROTTLE_START_DELAY = 5
# AUTOTHROTTLE_START_DELAY = 5
# The maximum download delay to be set in case of high latencies
#AUTOTHROTTLE_MAX_DELAY = 60
# AUTOTHROTTLE_MAX_DELAY = 60
# The average number of requests Scrapy should be sending in parallel to
# each remote server
#AUTOTHROTTLE_TARGET_CONCURRENCY = 1.0
# AUTOTHROTTLE_TARGET_CONCURRENCY = 1.0
# Enable showing throttling stats for every response received:
#AUTOTHROTTLE_DEBUG = False
# AUTOTHROTTLE_DEBUG = False

# Enable and configure HTTP caching (disabled by default)
# See https://doc.scrapy.org/en/latest/topics/downloader-middleware.html#httpcache-middleware-settings
#HTTPCACHE_ENABLED = True
#HTTPCACHE_EXPIRATION_SECS = 0
#HTTPCACHE_DIR = 'httpcache'
#HTTPCACHE_IGNORE_HTTP_CODES = []
#HTTPCACHE_STORAGE = 'scrapy.extensions.httpcache.FilesystemCacheStorage'
# HTTPCACHE_ENABLED = True
# HTTPCACHE_EXPIRATION_SECS = 0
# HTTPCACHE_DIR = 'httpcache'
# HTTPCACHE_IGNORE_HTTP_CODES = []
# HTTPCACHE_STORAGE = 'scrapy.extensions.httpcache.FilesystemCacheStorage'

SPLASH_URL = 'http://192.168.99.100:8050'
DOWNLOADER_MIDDLEWARES = {
Expand All @@ -101,8 +100,7 @@
DUPEFILTER_CLASS = 'scrapy_splash.SplashAwareDupeFilter'
HTTPCACHE_STORAGE = 'scrapy_splash.SplashAwareFSCacheStorage'


ELSEVIER_API_1 = '***REMOVED***'
ELSEVIER_API_2 = '***REMOVED***'
ELSEVIER_API_3 = '***REMOVED***'
ELSEVIER_API_4 = '***REMOVED***'
ELSEVIER_API_1 = 'YOUR KEY'
ELSEVIER_API_2 = 'YOUR KEY'
ELSEVIER_API_3 = 'YOUR KEY'
ELSEVIER_API_4 = 'YOUR KEY'
7 changes: 7 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,2 +1,9 @@
# Borges
Scarping Engines for Predicative Synthesis Project

## Legal notice

Running this code might breach certain contracts, agreements,
or licences you have negotiated with the publishers. Please
use this code carefully. Ceder Group is not responsible for
any legal consequences.
19 changes: 15 additions & 4 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,20 @@
if __name__ == "__main__":
setup(name='Borges',
version='0.1.0',
author="Ziqin (Shaun) Rong",
author_email="[email protected]",
author="Ceder Group",
license="MIT License",
packages=find_packages(),
zip_safe=False, install_requires=['PyYAML', 'bson', 'scrapy', 'requests', 'beautifulsoup4', 'jsonlines',
'sqlalchemy', 'pymongo', 'DBGater', 'pytz', 'elsapy'])
zip_safe=False,
install_requires=[
'PyYAML',
'bson',
'scrapy',
'requests',
'beautifulsoup4',
'jsonlines',
'sqlalchemy',
'pymongo',
'DBGater',
'pytz',
'elsapy']
)

0 comments on commit 62b98e2

Please sign in to comment.