From f9f8195b04528393fed28a68c548ea9b5cb20bf3 Mon Sep 17 00:00:00 2001 From: Fraser Harris Date: Mon, 31 Aug 2015 16:28:35 -0700 Subject: [PATCH 1/5] Replaced scrapy.log with logging, decorator -> decorators --- scrapy_webdriver/download.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/scrapy_webdriver/download.py b/scrapy_webdriver/download.py index f4a28fd..82bd62d 100644 --- a/scrapy_webdriver/download.py +++ b/scrapy_webdriver/download.py @@ -1,5 +1,7 @@ -from scrapy import log, version_info -from scrapy.utils.decorator import inthread +import logging + +from scrapy import version_info +from scrapy.utils.decorators import inthread from scrapy.utils.misc import load_object from .http import WebdriverActionRequest, WebdriverRequest, WebdriverResponse @@ -37,13 +39,13 @@ def download_request(self, request, spider): @inthread def _download_request(self, request, spider): """Download a request URL using webdriver.""" - log.msg('Downloading %s with webdriver' % request.url, level=log.DEBUG) + logging.debug('Downloading %s with webdriver' % request.url) request.manager.webdriver.get(request.url) return WebdriverResponse(request.url, request.manager.webdriver) @inthread def _do_action_request(self, request, spider): """Perform an action on a previously webdriver-loaded page.""" - log.msg('Running webdriver actions %s' % request.url, level=log.DEBUG) + logging.debug('Running webdriver actions %s' % request.url) request.actions.perform() return WebdriverResponse(request.url, request.manager.webdriver) From e5dbe304f321295e951b0fa455182a01e9569142 Mon Sep 17 00:00:00 2001 From: Fraser Harris Date: Mon, 31 Aug 2015 17:54:39 -0700 Subject: [PATCH 2/5] Crawler runs now, but test still fails. --- scrapy_webdriver/tests/test_request_queue.py | 21 ++++++-------------- 1 file changed, 6 insertions(+), 15 deletions(-) diff --git a/scrapy_webdriver/tests/test_request_queue.py b/scrapy_webdriver/tests/test_request_queue.py index d285327..583a38c 100644 --- a/scrapy_webdriver/tests/test_request_queue.py +++ b/scrapy_webdriver/tests/test_request_queue.py @@ -1,14 +1,13 @@ from functools import partial +import logging from time import sleep from mock import call, Mock -from scrapy.crawler import Crawler +from scrapy.crawler import CrawlerProcess from scrapy.http import Request from scrapy import log, signals from scrapy.settings import Settings from scrapy.spider import BaseSpider -from scrapy.xlib.pydispatch import dispatcher -from twisted.internet import reactor from scrapy_webdriver.http import WebdriverRequest @@ -32,9 +31,6 @@ def settings(self, **options): settings.update(**options) return settings - def _stop_reactor(self): - reactor.stop() - def _wait(self, url, *args, **kwargs): sleep(0.1) @@ -44,15 +40,10 @@ def test_priorization(self): webdriver.get.side_effect = self._wait webdriver.page_source = u'' - dispatcher.connect(self._stop_reactor, signal=signals.spider_closed) - - crawler = Crawler(Settings(values=settings)) - crawler.configure() - spider = self.Spider(name='test', domain='testdomain') - crawler.crawl(spider) - crawler.start() - log.start(loglevel='ERROR') - reactor.run() + process = CrawlerProcess(Settings(values=settings)) + process.crawl(self.Spider, name='test', domain='testdomain') + logging.getLogger('scrapy').setLevel(logging.ERROR) + process.start() assert webdriver.get.mock_calls == [ call('http://testdomain/path?wr=0'), From f052b2e80b652cbd0fe02d485859dc0cefdf5d0e Mon Sep 17 00:00:00 2001 From: Fraser Harris Date: Mon, 31 Aug 2015 17:57:19 -0700 Subject: [PATCH 3/5] Fixed test. --- scrapy_webdriver/tests/test_manager.py | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/scrapy_webdriver/tests/test_manager.py b/scrapy_webdriver/tests/test_manager.py index 9f356cf..58945b9 100644 --- a/scrapy_webdriver/tests/test_manager.py +++ b/scrapy_webdriver/tests/test_manager.py @@ -1,5 +1,6 @@ from scrapy.crawler import Crawler from scrapy.settings import Settings +from scrapy.spiders import CrawlSpider from selenium import webdriver from scrapy_webdriver.manager import WebdriverManager @@ -29,19 +30,16 @@ class TestBrowser(object): pass settings = self.settings(WEBDRIVER_BROWSER='Firefox') - crawler = Crawler(Settings(values=settings)) - crawler.configure() + crawler = Crawler(CrawlSpider, settings=Settings(values=settings)) browser = WebdriverManager(crawler) assert issubclass(browser._browser, webdriver.Firefox) settings = self.settings(WEBDRIVER_BROWSER=TestBrowser) - crawler = Crawler(Settings(values=settings)) - crawler.configure() + crawler = Crawler(CrawlSpider, settings=Settings(values=settings)) browser = WebdriverManager(crawler) assert issubclass(browser._browser, TestBrowser) settings = self.settings(WEBDRIVER_BROWSER=TestBrowser()) - crawler = Crawler(Settings(values=settings)) - crawler.configure() + crawler = Crawler(CrawlSpider, settings=Settings(values=settings)) browser = WebdriverManager(crawler) assert isinstance(browser._webdriver, TestBrowser) From 9ddea8f58538e6c0e176d763d35443f995700134 Mon Sep 17 00:00:00 2001 From: Fraser Harris Date: Tue, 1 Sep 2015 14:57:24 -0700 Subject: [PATCH 4/5] Missed on log import --- scrapy_webdriver/tests/test_request_queue.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scrapy_webdriver/tests/test_request_queue.py b/scrapy_webdriver/tests/test_request_queue.py index 583a38c..1887ec2 100644 --- a/scrapy_webdriver/tests/test_request_queue.py +++ b/scrapy_webdriver/tests/test_request_queue.py @@ -5,7 +5,7 @@ from mock import call, Mock from scrapy.crawler import CrawlerProcess from scrapy.http import Request -from scrapy import log, signals +from scrapy import signals from scrapy.settings import Settings from scrapy.spider import BaseSpider From 6b7de22e3715accd3103673a4425ab353661ec4f Mon Sep 17 00:00:00 2001 From: Fraser Harris Date: Wed, 2 Sep 2015 14:42:34 -0700 Subject: [PATCH 5/5] Updated logging strings to unicode --- scrapy_webdriver/download.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scrapy_webdriver/download.py b/scrapy_webdriver/download.py index 82bd62d..733baa1 100644 --- a/scrapy_webdriver/download.py +++ b/scrapy_webdriver/download.py @@ -39,13 +39,13 @@ def download_request(self, request, spider): @inthread def _download_request(self, request, spider): """Download a request URL using webdriver.""" - logging.debug('Downloading %s with webdriver' % request.url) + logging.debug(u'Downloading %s with webdriver' % request.url) request.manager.webdriver.get(request.url) return WebdriverResponse(request.url, request.manager.webdriver) @inthread def _do_action_request(self, request, spider): """Perform an action on a previously webdriver-loaded page.""" - logging.debug('Running webdriver actions %s' % request.url) + logging.debug(u'Running webdriver actions %s' % request.url) request.actions.perform() return WebdriverResponse(request.url, request.manager.webdriver)