Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 4 additions & 7 deletions src/apify/scrapy/middlewares/apify_proxy.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
from apify.scrapy.utils import get_basic_auth_header

if TYPE_CHECKING:
from scrapy import Request, Spider
from scrapy import Request
from scrapy.crawler import Crawler


Expand Down Expand Up @@ -63,17 +63,16 @@ def from_crawler(cls: type[ApifyHttpProxyMiddleware], crawler: Crawler) -> Apify

return cls(proxy_settings)

async def process_request(self, request: Request, spider: Spider) -> None:
async def process_request(self, request: Request) -> None:
"""Process a Scrapy request by assigning a new proxy.

Args:
request: Scrapy Request object.
spider: Scrapy Spider object.

Raises:
ValueError: If username and password are not provided in the proxy URL.
"""
Actor.log.debug(f'ApifyHttpProxyMiddleware.process_request: request={request}, spider={spider}')
Actor.log.debug(f'ApifyHttpProxyMiddleware.process_request: request={request}')
url = await self._get_new_proxy_url()

if not (url.username and url.password):
Expand All @@ -89,22 +88,20 @@ def process_exception(
self,
request: Request,
exception: Exception,
spider: Spider,
) -> None:
"""Process an exception that occurs during request processing.

Args:
request: Scrapy Request object.
exception: Exception object.
spider: Scrapy Spider object.

Returns:
Returning None, meaning Scrapy will continue processing this exception, executing any other
process_exception() methods of installed middleware, until no middleware is left and the default
exception handling kicks in.
"""
Actor.log.debug(
f'ApifyHttpProxyMiddleware.process_exception: request={request}, exception={exception}, spider={spider}',
f'ApifyHttpProxyMiddleware.process_exception: request={request}, exception={exception}',
)

if isinstance(exception, TunnelError):
Expand Down
14 changes: 3 additions & 11 deletions tests/unit/scrapy/middlewares/test_apify_proxy.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,12 +31,6 @@ def crawler(monkeypatch: pytest.MonkeyPatch) -> Crawler:
return crawler


@pytest.fixture
def spider() -> DummySpider:
"""Fixture to create a "dummy" Scrapy spider."""
return DummySpider()


@pytest.fixture
def dummy_request() -> Request:
"""Fixture to create a "dummy" Scrapy spider."""
Expand Down Expand Up @@ -119,7 +113,6 @@ async def mock_new_url() -> str:
async def test_process_request_with_proxy(
monkeypatch: pytest.MonkeyPatch,
middleware: ApifyHttpProxyMiddleware,
spider: DummySpider,
dummy_request: Request,
proxy_url: str,
expected_exception: type[Exception] | None,
Expand All @@ -131,12 +124,12 @@ async def mock_get_new_proxy_url() -> ParseResult:
monkeypatch.setattr(middleware, '_get_new_proxy_url', mock_get_new_proxy_url)

if expected_exception is None:
await middleware.process_request(dummy_request, spider)
await middleware.process_request(dummy_request)
assert dummy_request.meta['proxy'] == proxy_url
assert dummy_request.headers[b'Proxy-Authorization'] == expected_request_header
else:
with pytest.raises(expected_exception):
await middleware.process_request(dummy_request, spider)
await middleware.process_request(dummy_request)


@pytest.mark.parametrize(
Expand All @@ -146,9 +139,8 @@ async def mock_get_new_proxy_url() -> ParseResult:
)
def test_handles_exceptions(
middleware: ApifyHttpProxyMiddleware,
spider: DummySpider,
dummy_request: Request,
exception: Exception,
) -> None:
returned_value = middleware.process_exception(dummy_request, exception, spider)
returned_value = middleware.process_exception(dummy_request, exception)
assert returned_value is None
Loading