Skip to content

Commit 974ac37

Browse files
authored
Merge pull request #222 from openzim/requests_timeout
Set default timeout in `download.stream_file` to 10 seconds, and allow to override value
2 parents 5f92462 + 7ddea55 commit 974ac37

File tree

4 files changed

+19
-5
lines changed

4 files changed

+19
-5
lines changed

CHANGELOG.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
1616

1717
- Add `filesystem.validate_folder_writable` to check if a folder can be written to #200
1818

19+
### Fixed
20+
21+
- Set default timeout in `download.stream_file` to 10 seconds, and allow to override value #222
22+
1923
## [4.0.0] - 2024-08-05
2024

2125
### Added

src/zimscraperlib/constants.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,3 +56,7 @@
5656
ILLUSTRATIONS_METADATA_RE = re.compile(
5757
r"^Illustration_(?P<height>\d+)x(?P<width>\d+)@(?P<scale>\d+)$"
5858
)
59+
60+
# default timeout to get responses from upstream when doing web requests ; this is not
61+
# the total time it gets to download the whole resource
62+
DEFAULT_WEB_REQUESTS_TIMEOUT = 10

src/zimscraperlib/download.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
import yt_dlp as youtube_dl
1616

1717
from zimscraperlib import logger
18+
from zimscraperlib.constants import DEFAULT_WEB_REQUESTS_TIMEOUT
1819

1920

2021
class YoutubeDownloader:
@@ -181,6 +182,7 @@ def stream_file(
181182
max_retries: int | None = 5,
182183
headers: dict[str, str] | None = None,
183184
session: requests.Session | None = None,
185+
timeout: int | None = DEFAULT_WEB_REQUESTS_TIMEOUT,
184186
*,
185187
only_first_block: bool | None = False,
186188
) -> tuple[int, requests.structures.CaseInsensitiveDict[str]]:
@@ -208,6 +210,7 @@ def stream_file(
208210
stream=True,
209211
proxies=proxies,
210212
headers=headers,
213+
timeout=timeout,
211214
)
212215
resp.raise_for_status()
213216

tests/download/test_download.py

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
import requests.structures
1515
from yt_dlp import DownloadError
1616

17+
from zimscraperlib.constants import DEFAULT_WEB_REQUESTS_TIMEOUT
1718
from zimscraperlib.download import (
1819
BestMp4,
1920
BestWebm,
@@ -22,13 +23,11 @@
2223
stream_file,
2324
)
2425

25-
DEFAULT_REQUEST_TIMEOUT = 60
26-
2726

2827
def assert_downloaded_file(url, file):
2928
assert file.exists()
3029
# our google test urls dont support HEAD
31-
req = requests.get(url, timeout=DEFAULT_REQUEST_TIMEOUT)
30+
req = requests.get(url, timeout=DEFAULT_WEB_REQUESTS_TIMEOUT)
3231
# we test against binary response: Content-Length not accurate as gzip-encoded
3332
assert file.stat().st_size == len(req.content)
3433

@@ -90,7 +89,11 @@ def test_first_block_download_custom_session(mocker, valid_http_url):
9089
)
9190
# check that custom session has been used
9291
custom_session.get.assert_called_once_with(
93-
valid_http_url, stream=True, proxies=None, headers=None
92+
valid_http_url,
93+
stream=True,
94+
proxies=None,
95+
headers=None,
96+
timeout=DEFAULT_WEB_REQUESTS_TIMEOUT,
9497
)
9598
requests.Session.assert_not_called() # pyright: ignore
9699

@@ -130,7 +133,7 @@ def test_stream_to_bytes(valid_https_url):
130133
assert_headers(ret)
131134
assert (
132135
byte_stream.read()
133-
== requests.get(valid_https_url, timeout=DEFAULT_REQUEST_TIMEOUT).content
136+
== requests.get(valid_https_url, timeout=DEFAULT_WEB_REQUESTS_TIMEOUT).content
134137
)
135138

136139

0 commit comments

Comments
 (0)