From e05aed4bbdc2ab9247ab86ff710bd1c687450d06 Mon Sep 17 00:00:00 2001 From: Vlada Dusek Date: Fri, 12 Jun 2026 16:05:30 +0200 Subject: [PATCH 1/2] fix: Drop deprecated spider arg from Scrapy proxy middleware methods --- src/apify/scrapy/middlewares/apify_proxy.py | 11 +++--- .../scrapy/middlewares/test_apify_proxy.py | 35 ++++++++++++------- 2 files changed, 27 insertions(+), 19 deletions(-) diff --git a/src/apify/scrapy/middlewares/apify_proxy.py b/src/apify/scrapy/middlewares/apify_proxy.py index 2f2f22cc0..de061f202 100644 --- a/src/apify/scrapy/middlewares/apify_proxy.py +++ b/src/apify/scrapy/middlewares/apify_proxy.py @@ -10,7 +10,7 @@ from apify.scrapy.utils import get_basic_auth_header if TYPE_CHECKING: - from scrapy import Request, Spider + from scrapy import Request from scrapy.crawler import Crawler @@ -63,17 +63,16 @@ def from_crawler(cls: type[ApifyHttpProxyMiddleware], crawler: Crawler) -> Apify return cls(proxy_settings) - async def process_request(self, request: Request, spider: Spider) -> None: + async def process_request(self, request: Request) -> None: """Process a Scrapy request by assigning a new proxy. Args: request: Scrapy Request object. - spider: Scrapy Spider object. Raises: ValueError: If username and password are not provided in the proxy URL. """ - Actor.log.debug(f'ApifyHttpProxyMiddleware.process_request: request={request}, spider={spider}') + Actor.log.debug(f'ApifyHttpProxyMiddleware.process_request: request={request}') url = await self._get_new_proxy_url() if not (url.username and url.password): @@ -89,14 +88,12 @@ def process_exception( self, request: Request, exception: Exception, - spider: Spider, ) -> None: """Process an exception that occurs during request processing. Args: request: Scrapy Request object. exception: Exception object. - spider: Scrapy Spider object. Returns: Returning None, meaning Scrapy will continue processing this exception, executing any other @@ -104,7 +101,7 @@ def process_exception( exception handling kicks in. """ Actor.log.debug( - f'ApifyHttpProxyMiddleware.process_exception: request={request}, exception={exception}, spider={spider}', + f'ApifyHttpProxyMiddleware.process_exception: request={request}, exception={exception}', ) if isinstance(exception, TunnelError): diff --git a/tests/unit/scrapy/middlewares/test_apify_proxy.py b/tests/unit/scrapy/middlewares/test_apify_proxy.py index 04e4e80e0..4d76a883a 100644 --- a/tests/unit/scrapy/middlewares/test_apify_proxy.py +++ b/tests/unit/scrapy/middlewares/test_apify_proxy.py @@ -1,12 +1,14 @@ from __future__ import annotations +import warnings from urllib.parse import ParseResult, urlparse import pytest from scrapy import Request, Spider from scrapy.core.downloader.handlers.http11 import TunnelError +from scrapy.core.downloader.middleware import DownloaderMiddlewareManager from scrapy.crawler import Crawler -from scrapy.exceptions import NotConfigured +from scrapy.exceptions import NotConfigured, ScrapyDeprecationWarning from apify import ProxyConfiguration from apify.scrapy.middlewares import ApifyHttpProxyMiddleware @@ -31,12 +33,6 @@ def crawler(monkeypatch: pytest.MonkeyPatch) -> Crawler: return crawler -@pytest.fixture -def spider() -> DummySpider: - """Fixture to create a "dummy" Scrapy spider.""" - return DummySpider() - - @pytest.fixture def dummy_request() -> Request: """Fixture to create a "dummy" Scrapy spider.""" @@ -119,7 +115,6 @@ async def mock_new_url() -> str: async def test_process_request_with_proxy( monkeypatch: pytest.MonkeyPatch, middleware: ApifyHttpProxyMiddleware, - spider: DummySpider, dummy_request: Request, proxy_url: str, expected_exception: type[Exception] | None, @@ -131,12 +126,12 @@ async def mock_get_new_proxy_url() -> ParseResult: monkeypatch.setattr(middleware, '_get_new_proxy_url', mock_get_new_proxy_url) if expected_exception is None: - await middleware.process_request(dummy_request, spider) + await middleware.process_request(dummy_request) assert dummy_request.meta['proxy'] == proxy_url assert dummy_request.headers[b'Proxy-Authorization'] == expected_request_header else: with pytest.raises(expected_exception): - await middleware.process_request(dummy_request, spider) + await middleware.process_request(dummy_request) @pytest.mark.parametrize( @@ -146,9 +141,25 @@ async def mock_get_new_proxy_url() -> ParseResult: ) def test_handles_exceptions( middleware: ApifyHttpProxyMiddleware, - spider: DummySpider, dummy_request: Request, exception: Exception, ) -> None: - returned_value = middleware.process_exception(dummy_request, exception, spider) + returned_value = middleware.process_exception(dummy_request, exception) assert returned_value is None + + +def test_methods_do_not_require_deprecated_spider_arg( + crawler: Crawler, + middleware: ApifyHttpProxyMiddleware, +) -> None: + """Registering the middleware must not emit Scrapy's `spider` argument deprecation warning (scrapy>=2.14).""" + with warnings.catch_warnings(record=True) as caught: + warnings.simplefilter('always') + DownloaderMiddlewareManager(middleware, crawler=crawler) + + spider_arg_warnings = [ + w + for w in caught + if issubclass(w.category, ScrapyDeprecationWarning) and 'requires a spider argument' in str(w.message) + ] + assert spider_arg_warnings == [] From 0e941bb54737660731c463b22f68bc16672780ce Mon Sep 17 00:00:00 2001 From: Vlada Dusek Date: Fri, 12 Jun 2026 19:09:13 +0200 Subject: [PATCH 2/2] test: Remove redundant spider-arg deprecation test --- .../scrapy/middlewares/test_apify_proxy.py | 21 +------------------ 1 file changed, 1 insertion(+), 20 deletions(-) diff --git a/tests/unit/scrapy/middlewares/test_apify_proxy.py b/tests/unit/scrapy/middlewares/test_apify_proxy.py index 4d76a883a..3c45d32f4 100644 --- a/tests/unit/scrapy/middlewares/test_apify_proxy.py +++ b/tests/unit/scrapy/middlewares/test_apify_proxy.py @@ -1,14 +1,12 @@ from __future__ import annotations -import warnings from urllib.parse import ParseResult, urlparse import pytest from scrapy import Request, Spider from scrapy.core.downloader.handlers.http11 import TunnelError -from scrapy.core.downloader.middleware import DownloaderMiddlewareManager from scrapy.crawler import Crawler -from scrapy.exceptions import NotConfigured, ScrapyDeprecationWarning +from scrapy.exceptions import NotConfigured from apify import ProxyConfiguration from apify.scrapy.middlewares import ApifyHttpProxyMiddleware @@ -146,20 +144,3 @@ def test_handles_exceptions( ) -> None: returned_value = middleware.process_exception(dummy_request, exception) assert returned_value is None - - -def test_methods_do_not_require_deprecated_spider_arg( - crawler: Crawler, - middleware: ApifyHttpProxyMiddleware, -) -> None: - """Registering the middleware must not emit Scrapy's `spider` argument deprecation warning (scrapy>=2.14).""" - with warnings.catch_warnings(record=True) as caught: - warnings.simplefilter('always') - DownloaderMiddlewareManager(middleware, crawler=crawler) - - spider_arg_warnings = [ - w - for w in caught - if issubclass(w.category, ScrapyDeprecationWarning) and 'requires a spider argument' in str(w.message) - ] - assert spider_arg_warnings == []