Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 11 additions & 13 deletions scrapy_heroku/app.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,17 @@
from os import environ

from twisted.application.service import Application
from twisted.application.internet import TimerService, TCPServer
from twisted.web import server
from twisted.python import log

from scrapyd.interfaces import (IEggStorage, IPoller, ISpiderScheduler,
IEnvironment)
from scrapyd.launcher import Launcher
from scrapy_heroku.poller import Psycopg2QueuePoller
from scrapy_heroku.scheduler import Psycopg2SpiderScheduler
from scrapyd.eggstorage import FilesystemEggStorage
from scrapyd.environ import Environment
from scrapyd.interfaces import (IEggStorage, IEnvironment, IPoller,
ISpiderScheduler)
from scrapyd.launcher import Launcher
from scrapyd.website import Root

from .scheduler import Psycopg2SpiderScheduler
from .poller import Psycopg2QueuePoller
from twisted.application.internet import TCPServer, TimerService
from twisted.application.service import Application
from twisted.python import log
from twisted.web import server


def application(config):
Expand All @@ -34,8 +32,8 @@ def application(config):
launcher = Launcher(config, app)
timer = TimerService(5, poller.poll)
webservice = TCPServer(http_port, server.Site(Root(config, app)))
log.msg("Scrapyd web console available at http://localhost:%s/ (HEROKU)"
% http_port)
log.msg("Scrapyd web console available at http://localhost:%s/ (HEROKU)" %
http_port)

launcher.setServiceParent(app)
timer.setServiceParent(app)
Expand Down
3 changes: 1 addition & 2 deletions scrapy_heroku/poller.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
from scrapy_heroku.utils import get_spider_queues
from scrapyd.poller import QueuePoller

from .utils import get_spider_queues


class Psycopg2QueuePoller(QueuePoller):
def update_projects(self):
Expand Down
9 changes: 3 additions & 6 deletions scrapy_heroku/scheduler.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,10 @@
from zope.interface import implements

from scrapy_heroku.utils import get_spider_queues
from scrapyd.interfaces import ISpiderScheduler

from .utils import get_spider_queues
from zope.interface import implementer


@implementer(ISpiderScheduler)
class Psycopg2SpiderScheduler(object):
implements(ISpiderScheduler)

def __init__(self, config):
self.config = config
self.update_projects()
Expand Down
31 changes: 19 additions & 12 deletions scrapy_heroku/spiderqueue.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,23 @@
import psycopg2
import cPickle
try:
import cPickle
except ImportError:
import _pickle as cPickle

import json
import urlparse
from zope.interface import implements

import psycopg2
from scrapyd.interfaces import ISpiderQueue
from zope.interface import implementer

try:
from urlparse import urlparse
except ImportError:
from urllib.parse import urlparse


class Psycopg2PriorityQueue(object):
def __init__(self, config, table='scrapy_queue'):
url = urlparse.urlparse(config.get('database_url'))
url = urlparse(config.get('database_url'))
# Remove query strings.
path = url.path[1:]
path = path.split('?', 2)[0]
Expand Down Expand Up @@ -38,7 +46,7 @@ def _execute(self, q, args=None, results=True):
try:
cursor = self.conn.cursor()
cursor.execute(q, args)
except (psycopg2.InterfaceError, psycopg2.OperationalError) as err:
except (psycopg2.InterfaceError, psycopg2.OperationalError):
self.conn = psycopg2.connect(self.conn_string)
cursor = self.conn.cursor()
cursor.execute(q, args)
Expand All @@ -63,10 +71,10 @@ def pop(self):
% self.table
results = self._execute(q)
if len(results) == 0:
return
return None
mid, msg = results[0]
q = "delete from %s where id=%%s;" % self.table
self._execute(q, (mid,), results=False)
self._execute(q, (mid, ), results=False)
self.conn.commit()
return self.decode(msg)

Expand All @@ -76,7 +84,7 @@ def remove(self, func):
for mid, msg in self._execute(q):
if func(self.decode(msg)):
q = "delete from %s where id=%%s" % self.table
self._execute(q, (mid,), results=False)
self._execute(q, (mid, ), results=False)
n += 1
self.conn.commit()
return n
Expand Down Expand Up @@ -107,7 +115,7 @@ def decode(self, text):

class PicklePsycopg2PriorityQueue(Psycopg2PriorityQueue):
def encode(self, obj):
return buffer(cPickle.dumps(obj, protocol=2))
return memoryview(cPickle.dumps(obj, protocol=2))

def decode(self, text):
return cPickle.loads(str(text))
Expand All @@ -121,9 +129,8 @@ def decode(self, text):
return json.loads(text)


@implementer(ISpiderQueue)
class Psycopg2SpiderQueue(object):
implements(ISpiderQueue)

def __init__(self, config, table='spider_queue'):
self.q = JsonPsycopg2PriorityQueue(config, table)

Expand Down
3 changes: 1 addition & 2 deletions scrapy_heroku/utils.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
from scrapy_heroku.spiderqueue import Psycopg2SpiderQueue
from scrapyd.utils import get_project_list

from spiderqueue import Psycopg2SpiderQueue


def get_spider_queues(config):
queues = {}
Expand Down
6 changes: 2 additions & 4 deletions setup.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-

from setuptools import setup, find_packages
from setuptools import find_packages, setup

setup(
name='scrapy-heroku',
Expand All @@ -16,7 +16,6 @@
include_package_data=True,
packages=find_packages(),
install_requires=[
'distribute',
'scrapy',
'psycopg2',
],
Expand All @@ -30,5 +29,4 @@
'Operating System :: OS Independent',
'Programming Language :: Python',
'Topic :: Internet :: WWW/HTTP',
]
)
])