Skip to content

Commit effb47c

Browse files
Fix chunked forwarding response
Please enter the commit message for your changes. Lines starting
1 parent 34b106e commit effb47c

File tree

4 files changed

+188
-117
lines changed

4 files changed

+188
-117
lines changed

examples/persistent_ca_usage.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,10 @@ def get_or_create_ca():
5050
class LoggingForwardProxyHandler(HTTPSForwardProxyHandler):
5151
"""Example forward proxy handler with logging."""
5252

53+
def __init__(self):
54+
super().__init__()
55+
self.response_size = 0
56+
5357
async def on_client_connected(self):
5458
print(f"Client connected: {self.request}")
5559
await super().on_client_connected()
@@ -63,6 +67,15 @@ async def on_error(self, error: Exception):
6367
print(f"❌ Proxy error: {type(error).__name__}: {error}")
6468
await super().on_error(error)
6569

70+
async def on_response_chunk(self, chunk: bytes) -> bytes:
71+
"""Process each response chunk - log size and analyze content."""
72+
chunk_size = len(chunk)
73+
self.response_size += chunk_size
74+
print(
75+
f" Received chunk: {chunk_size} bytes, total so far: {self.response_size} bytes"
76+
)
77+
return chunk
78+
6679

6780
async def main():
6881
"""Run a proxy with persistent CA."""
Lines changed: 146 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,146 @@
1+
"""
2+
HTTP Chunked Transfer Encoding Module
3+
4+
This module provides utilities for handling HTTP/1.1 chunked transfer encoding,
5+
which allows HTTP messages to be sent in chunks when the total content length
6+
is not known in advance.
7+
8+
## How Chunked Encoding Works
9+
10+
In HTTP chunked transfer encoding, the message body is sent as a series of chunks:
11+
12+
1. **Chunk Format**: Each chunk consists of:
13+
- Chunk size in hexadecimal followed by CRLF
14+
- Chunk data
15+
- Trailing CRLF
16+
17+
2. **Example**:
18+
```
19+
5\r\n <- chunk size (5 bytes in hex)
20+
Hello\r\n <- chunk data + trailing CRLF
21+
6\r\n <- next chunk size (6 bytes)
22+
World!\r\n <- chunk data + trailing CRLF
23+
0\r\n <- terminating chunk (size 0)
24+
\r\n <- final CRLF (no trailers)
25+
```
26+
27+
3. **Termination**: The last chunk has size 0, followed by optional trailers
28+
and a final CRLF.
29+
"""
30+
31+
import asyncio
32+
from typing import AsyncIterator, Awaitable, Callable, Optional
33+
34+
35+
class ChunkedReader:
36+
"""Handles reading HTTP chunked transfer encoding."""
37+
38+
def __init__(self, reader: asyncio.StreamReader):
39+
self.reader = reader
40+
41+
async def read_chunks(self) -> AsyncIterator[bytes]:
42+
"""
43+
Read chunked response data.
44+
45+
Yields:
46+
Chunks of data as bytes.
47+
"""
48+
while True:
49+
# Read chunk size line
50+
chunk_size_line = await self.reader.readline()
51+
if not chunk_size_line:
52+
break
53+
54+
# Parse chunk size (hex digits before optional ';' and extensions)
55+
chunk_size_str = chunk_size_line.decode("ascii").strip()
56+
try:
57+
# Split on ';' to handle chunk extensions, take only the size part
58+
chunk_size = int(chunk_size_str.split(";")[0], 16)
59+
except (ValueError, UnicodeDecodeError):
60+
break
61+
62+
if chunk_size == 0:
63+
# Terminating chunk - read trailers until empty line
64+
while True:
65+
trailer = await self.reader.readline()
66+
if not trailer or trailer == b"\r\n":
67+
break
68+
break
69+
70+
# Read exactly chunk_size bytes of data
71+
chunk_data = b""
72+
remaining = chunk_size
73+
while remaining > 0:
74+
data = await self.reader.read(remaining)
75+
if not data:
76+
# Connection closed unexpectedly
77+
break
78+
chunk_data += data
79+
remaining -= len(data)
80+
81+
if len(chunk_data) != chunk_size:
82+
# Incomplete read - connection was closed
83+
break
84+
85+
# Read the trailing CRLF after chunk data
86+
trailing_crlf = b""
87+
while len(trailing_crlf) < 2:
88+
data = await self.reader.read(2 - len(trailing_crlf))
89+
if not data:
90+
break
91+
trailing_crlf += data
92+
93+
if trailing_crlf != b"\r\n":
94+
# Invalid chunk format
95+
break
96+
97+
yield chunk_data
98+
99+
100+
class ChunkedWriter:
101+
"""Handles writing HTTP chunked transfer encoding."""
102+
103+
def __init__(self, write_func: Callable[[bytes], None]):
104+
self.write_func = write_func
105+
106+
def write_chunk(self, data: bytes) -> None:
107+
"""
108+
Write data as a chunked transfer encoding chunk.
109+
110+
Args:
111+
data: The chunk data to write
112+
"""
113+
if data:
114+
chunk_size_hex = hex(len(data))[2:].encode() # Remove '0x' prefix
115+
self.write_func(chunk_size_hex + b"\r\n")
116+
self.write_func(data)
117+
self.write_func(b"\r\n")
118+
119+
def write_final_chunk(self) -> None:
120+
self.write_func(b"0\r\n\r\n")
121+
122+
123+
async def forward_chunked_response(
124+
reader: asyncio.StreamReader,
125+
write_func: Callable[[bytes], None],
126+
chunk_hook: Callable[[bytes], Awaitable[Optional[bytes]]],
127+
) -> None:
128+
"""
129+
Forward a chunked response from reader to writer.
130+
131+
Args:
132+
reader: The stream reader to read from
133+
write_func: Function to write response data
134+
chunk_hook: Async function to process each chunk
135+
"""
136+
chunked_reader = ChunkedReader(reader)
137+
chunked_writer = ChunkedWriter(write_func)
138+
139+
async for chunk_data in chunked_reader.read_chunks():
140+
# Process chunk data through hook
141+
processed_chunk = await chunk_hook(chunk_data)
142+
143+
if processed_chunk is not None:
144+
chunked_writer.write_chunk(processed_chunk)
145+
146+
chunked_writer.write_final_chunk()

src/asyncio_https_proxy/https_forward_proxy_handler.py

Lines changed: 6 additions & 83 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
import asyncio
22
import ssl
3-
from typing import AsyncIterator
43

4+
from .chunked_encoding import forward_chunked_response
55
from .http_response import HTTPResponse
66
from .https_proxy_handler import HTTPSProxyHandler
77

@@ -203,7 +203,11 @@ async def _forward_response_body(self):
203203
transfer_encoding = self.response.headers.first("Transfer-Encoding")
204204

205205
if transfer_encoding and "chunked" in transfer_encoding.lower():
206-
await self._forward_chunked_response()
206+
await forward_chunked_response(
207+
self.upstream_reader,
208+
self.write_response,
209+
self.on_response_chunk,
210+
)
207211
else:
208212
# Read until EOF
209213
while True:
@@ -222,84 +226,3 @@ async def _forward_response_body(self):
222226
if not self._response_complete:
223227
self._response_complete = True
224228
await self.on_response_complete()
225-
226-
async def _forward_chunked_response(self):
227-
"""Forward chunked response data."""
228-
229-
if self.upstream_reader is None:
230-
return
231-
232-
while True:
233-
# Read chunk size line
234-
chunk_size_line = await self.upstream_reader.readline()
235-
self.write_response(chunk_size_line)
236-
237-
# Parse chunk size
238-
try:
239-
chunk_size = int(chunk_size_line.decode().split(";")[0], 16)
240-
except (ValueError, UnicodeDecodeError):
241-
break
242-
243-
if chunk_size == 0:
244-
# Read trailers until empty line
245-
while True:
246-
trailer = await self.upstream_reader.readline()
247-
self.write_response(trailer)
248-
if trailer == b"\r\n":
249-
break
250-
break
251-
252-
# Read chunk data + CRLF
253-
chunk_data_with_crlf = await self.upstream_reader.read(chunk_size + 2)
254-
if len(chunk_data_with_crlf) >= 2:
255-
# Separate actual data from CRLF
256-
chunk_data = chunk_data_with_crlf[:-2] # Remove CRLF
257-
crlf = chunk_data_with_crlf[-2:] # Extract CRLF
258-
259-
# Process chunk data through hook
260-
processed_chunk = await self.on_response_chunk(chunk_data)
261-
if processed_chunk is not None:
262-
# Write the processed data + CRLF
263-
self.write_response(processed_chunk + crlf)
264-
else:
265-
# If chunk was filtered out, we still need to write CRLF for protocol compliance
266-
self.write_response(crlf)
267-
else:
268-
# Handle edge case where we didn't get enough data
269-
self.write_response(chunk_data_with_crlf)
270-
271-
async def read_response_body(self) -> AsyncIterator[bytes]:
272-
"""
273-
Read the response body from the upstream server.
274-
275-
This method can be called from on_response_received() to read and potentially
276-
modify the response body before it's forwarded to the client.
277-
278-
Yields:
279-
Chunks of the response body as bytes.
280-
"""
281-
if not self.upstream_reader or not self.response:
282-
return
283-
284-
content_length = None
285-
if self.response.headers:
286-
content_length_header = self.response.headers.first("Content-Length")
287-
if content_length_header:
288-
content_length = int(content_length_header)
289-
290-
if content_length is not None:
291-
remaining = content_length
292-
while remaining > 0:
293-
chunk_size = min(remaining, MAX_CHUNK_SIZE)
294-
chunk = await self.upstream_reader.read(chunk_size)
295-
if not chunk:
296-
break
297-
yield chunk
298-
remaining -= len(chunk)
299-
else:
300-
# Read until EOF (for non-chunked responses without Content-Length)
301-
while True:
302-
chunk = await self.upstream_reader.read(MAX_CHUNK_SIZE)
303-
if not chunk:
304-
break
305-
yield chunk

tests/test_https_forward_proxy_handler.py

Lines changed: 23 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import ssl
2+
import unittest.mock
23
from unittest.mock import AsyncMock, MagicMock, patch
34

45
import pytest
@@ -181,40 +182,25 @@ async def test_forward_chunked_response(self):
181182
handler.upstream_reader.readline.side_effect = [
182183
b"5\r\n", # chunk size
183184
b"0\r\n", # end chunk
184-
b"\r\n", # final CRLF
185+
b"\r\n", # final CRLF after zero chunk (no trailers)
185186
]
186187
handler.upstream_reader.read.side_effect = [
187-
b"Hello\r\n" # chunk data + CRLF
188+
b"Hello", # chunk data (5 bytes)
189+
b"\r\n", # trailing CRLF after chunk data
188190
]
189191

190192
with patch.object(handler, "write_response") as mock_write:
191193
with patch.object(handler, "flush_response", new_callable=AsyncMock):
192194
await handler._forward_response_body()
193195

194-
# Verify chunked data was forwarded
195-
mock_write.assert_any_call(b"5\r\n")
196-
mock_write.assert_any_call(b"Hello\r\n")
197-
mock_write.assert_any_call(b"0\r\n")
198-
199-
@pytest.mark.asyncio
200-
async def test_read_response_body(self):
201-
"""Test reading response body for modification."""
202-
handler = HTTPSForwardProxyHandler()
203-
204-
# Mock response with Content-Length
205-
handler.response = HTTPResponse()
206-
handler.response.headers = HTTPHeader(b"Content-Length: 11\r\n\r\n")
207-
208-
# Mock upstream reader
209-
handler.upstream_reader = AsyncMock()
210-
handler.upstream_reader.read.side_effect = [b"Hello", b" World", b""]
211-
212-
# Collect chunks
213-
chunks = []
214-
async for chunk in handler.read_response_body():
215-
chunks.append(chunk)
216-
217-
assert chunks == [b"Hello", b" World"]
196+
# Verify proper chunked format was sent: size\r\ndata\r\n0\r\n\r\n
197+
expected_calls = [
198+
unittest.mock.call(b"5\r\n"), # chunk size
199+
unittest.mock.call(b"Hello"), # chunk data
200+
unittest.mock.call(b"\r\n"), # chunk trailing CRLF
201+
unittest.mock.call(b"0\r\n\r\n"), # final zero chunk
202+
]
203+
mock_write.assert_has_calls(expected_calls)
218204

219205
@pytest.mark.asyncio
220206
async def test_error_handling_connection_failed(self):
@@ -377,21 +363,20 @@ async def test_on_response_complete_called_after_forwarding(self):
377363
@pytest.mark.asyncio
378364
async def test_chunked_response_only_calls_hook_for_data_chunks(self):
379365
"""Test that chunked responses only call hook for actual data, not protocol overhead."""
366+
from asyncio_https_proxy.chunked_encoding import forward_chunked_response
367+
380368
handler = HTTPSForwardProxyHandler()
381369
handler.upstream_reader = AsyncMock()
382370

383-
# Mock response with chunked encoding
384-
handler.response = HTTPResponse()
385-
handler.response.headers = HTTPHeader(b"Transfer-Encoding: chunked\r\n\r\n")
386-
387-
# Mock chunked data: "5\r\nHello\r\n0\r\n\r\n"
371+
# Mock chunked data properly
388372
handler.upstream_reader.readline.side_effect = [
389373
b"5\r\n", # chunk size
390374
b"0\r\n", # end chunk
391-
b"\r\n", # final CRLF
375+
b"\r\n", # final CRLF after zero chunk (no trailers)
392376
]
393377
handler.upstream_reader.read.side_effect = [
394-
b"Hello\r\n" # chunk data + CRLF
378+
b"Hello", # chunk data (5 bytes)
379+
b"\r\n", # trailing CRLF after chunk data
395380
]
396381

397382
# Track chunk calls
@@ -405,7 +390,11 @@ async def mock_on_response_chunk(chunk):
405390
handler.on_response_chunk = mock_on_response_chunk
406391

407392
with patch.object(handler, "write_response"):
408-
await handler._forward_chunked_response()
393+
await forward_chunked_response(
394+
handler.upstream_reader,
395+
handler.write_response,
396+
handler.on_response_chunk,
397+
)
409398

410399
# Verify only actual data was passed to hook, not protocol overhead
411400
assert chunks_received == [b"Hello"]

0 commit comments

Comments
 (0)