Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
66 changes: 59 additions & 7 deletions src/python_inspector/utils_pypi.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
import attr
import packageurl
import requests
import zipfile
from bs4 import BeautifulSoup
from commoncode import fileutils
from commoncode.hash import multi_checksums
Expand Down Expand Up @@ -1698,6 +1699,20 @@ async def get(

cache_valid = os.path.exists(cached) and os.path.getsize(cached) > 0

# Validate cached wheel/egg files.
if cache_valid and not as_text:
if path_or_url.endswith((".whl", ".egg", ".zip")):
try:
if not zipfile.is_zipfile(cached):
if TRACE_DEEP:
print(f" FILE CACHE INVALID (corrupted zip): {path_or_url}")
cache_valid = False
except (FileNotFoundError, OSError):
# File was deleted/modified by another task - treat as cache miss
if TRACE_DEEP:
print(f" FILE CACHE VANISHED during validation: {path_or_url}")
cache_valid = False

if force or not cache_valid:
if not cache_valid and os.path.exists(cached):
if TRACE_DEEP:
Expand All @@ -1715,17 +1730,54 @@ async def get(
)
wmode = "w" if as_text else "wb"

# acquire lock and wait until timeout to get a lock or die
with lockfile.FileLock(lock_file).locked(timeout=PYINSP_CACHE_LOCK_TIMEOUT):
async with aiofiles.open(cached, mode=wmode) as fo:
await fo.write(content)
# Use atomic file operations.
temp_file = f"{cached}.tmp.{os.getpid()}"

try:
# acquire lock and wait until timeout to get a lock or die
with lockfile.FileLock(lock_file).locked(timeout=PYINSP_CACHE_LOCK_TIMEOUT):
async with aiofiles.open(temp_file, mode=wmode) as fo:
await fo.write(content)

# Validate zip files before making them "live"
if not as_text and path_or_url.endswith((".whl", ".egg", ".zip")):
if not zipfile.is_zipfile(temp_file):
raise Exception(
f"Downloaded file is not a valid zip: {path_or_url}\n"
f"Size: {os.path.getsize(temp_file)} bytes"
)

# Atomic rename - readers will never see partial/corrupt file
os.rename(temp_file, cached)

except Exception:
# Clean up temp file on any error
if os.path.exists(temp_file):
os.remove(temp_file)
raise

return content, cached
else:
if TRACE_DEEP:
print(f" FILE CACHE HIT: {path_or_url}")
# also lock on read to avoid race conditions
with lockfile.FileLock(lock_file).locked(timeout=PYINSP_CACHE_LOCK_TIMEOUT):
return await get_local_file_content(path=cached, as_text=as_text), cached

# File passed validation, lock and read
# Handle race condition where file might be deleted between validation and lock
try:
with lockfile.FileLock(lock_file).locked(timeout=PYINSP_CACHE_LOCK_TIMEOUT):
return await get_local_file_content(path=cached, as_text=as_text), cached
except FileNotFoundError:
# File was deleted by another task after validation - retry with force.
if TRACE_DEEP:
print(f" FILE VANISHED after validation, re-downloading: {path_or_url}")
return await self.get(
credentials=credentials,
path_or_url=path_or_url,
as_text=as_text,
force=True,
verbose=verbose,
echo_func=echo_func,
)


CACHE = Cache()
Expand Down
43 changes: 22 additions & 21 deletions tests/data/azure-devops.req-310-expected.json
Original file line number Diff line number Diff line change
Expand Up @@ -315,12 +315,12 @@
"type": "pypi",
"namespace": null,
"name": "certifi",
"version": "2025.10.5",
"version": "2025.11.12",
"qualifiers": {},
"subpath": null,
"primary_language": "Python",
"description": "Python package for providing Mozilla's CA Bundle.\nCertifi: Python SSL Certificates\n================================\n\nCertifi provides Mozilla's carefully curated collection of Root Certificates for\nvalidating the trustworthiness of SSL certificates while verifying the identity\nof TLS hosts. It has been extracted from the `Requests`_ project.\n\nInstallation\n------------\n\n``certifi`` is available on PyPI. Simply install it with ``pip``::\n\n $ pip install certifi\n\nUsage\n-----\n\nTo reference the installed certificate authority (CA) bundle, you can use the\nbuilt-in function::\n\n >>> import certifi\n\n >>> certifi.where()\n '/usr/local/lib/python3.7/site-packages/certifi/cacert.pem'\n\nOr from the command line::\n\n $ python -m certifi\n /usr/local/lib/python3.7/site-packages/certifi/cacert.pem\n\nEnjoy!\n\n.. _`Requests`: https://requests.readthedocs.io/en/master/\n\nAddition/Removal of Certificates\n--------------------------------\n\nCertifi does not support any addition/removal or other modification of the\nCA trust store content. This project is intended to provide a reliable and\nhighly portable root of trust to python deployments. Look to upstream projects\nfor methods to use alternate trust.",
"release_date": "2025-10-05T04:12:14",
"release_date": "2025-11-12T02:54:49",
"parties": [
{
"type": "person",
Expand All @@ -347,11 +347,11 @@
"Programming Language :: Python :: 3.9"
],
"homepage_url": "https://github.com/certifi/python-certifi",
"download_url": "https://files.pythonhosted.org/packages/e4/37/af0d2ef3967ac0d6113837b44a4f0bfe1328c2b9763bd5b1744520e5cfed/certifi-2025.10.5-py3-none-any.whl",
"size": 163286,
"download_url": "https://files.pythonhosted.org/packages/70/7d/9bc192684cea499815ff478dfcdc13835ddf401365057044fb721ec6bddb/certifi-2025.11.12-py3-none-any.whl",
"size": 159438,
"sha1": null,
"md5": "7b56f7121949a196441739c539fd01be",
"sha256": "0f212c2744a9bb6de0c56639a6f68afe01ecd92d91f14ae897c4fe7bbeeef0de",
"md5": "7cf85392819e7d6f24c8589826df5d95",
"sha256": "97de8790030bbd5c2d96b7ec782fc2f7820ef8dba6db909ccf95449f2d062d4b",
"sha512": null,
"bug_tracking_url": null,
"code_view_url": "https://github.com/certifi/python-certifi",
Expand All @@ -371,9 +371,9 @@
"dependencies": [],
"repository_homepage_url": null,
"repository_download_url": null,
"api_data_url": "https://pypi.org/pypi/certifi/2025.10.5/json",
"api_data_url": "https://pypi.org/pypi/certifi/2025.11.12/json",
"datasource_id": null,
"purl": "pkg:pypi/certifi@2025.10.5"
"purl": "pkg:pypi/certifi@2025.11.12"
},
{
"type": "pypi",
Expand Down Expand Up @@ -522,12 +522,12 @@
"type": "pypi",
"namespace": null,
"name": "click",
"version": "8.3.0",
"version": "8.3.1",
"qualifiers": {},
"subpath": null,
"primary_language": "Python",
"description": "Composable command line interface toolkit\n<div align=\"center\"><img src=\"https://raw.githubusercontent.com/pallets/click/refs/heads/stable/docs/_static/click-name.svg\" alt=\"\" height=\"150\"></div>\n\n# Click\n\nClick is a Python package for creating beautiful command line interfaces\nin a composable way with as little code as necessary. It's the \"Command\nLine Interface Creation Kit\". It's highly configurable but comes with\nsensible defaults out of the box.\n\nIt aims to make the process of writing command line tools quick and fun\nwhile also preventing any frustration caused by the inability to\nimplement an intended CLI API.\n\nClick in three points:\n\n- Arbitrary nesting of commands\n- Automatic help page generation\n- Supports lazy loading of subcommands at runtime\n\n\n## A Simple Example\n\n```python\nimport click\n\[email protected]()\[email protected](\"--count\", default=1, help=\"Number of greetings.\")\[email protected](\"--name\", prompt=\"Your name\", help=\"The person to greet.\")\ndef hello(count, name):\n \"\"\"Simple program that greets NAME for a total of COUNT times.\"\"\"\n for _ in range(count):\n click.echo(f\"Hello, {name}!\")\n\nif __name__ == '__main__':\n hello()\n```\n\n```\n$ python hello.py --count=3\nYour name: Click\nHello, Click!\nHello, Click!\nHello, Click!\n```\n\n\n## Donate\n\nThe Pallets organization develops and supports Click and other popular\npackages. In order to grow the community of contributors and users, and\nallow the maintainers to devote more time to the projects, [please\ndonate today][].\n\n[please donate today]: https://palletsprojects.com/donate\n\n## Contributing\n\nSee our [detailed contributing documentation][contrib] for many ways to\ncontribute, including reporting issues, requesting features, asking or answering\nquestions, and making PRs.\n\n[contrib]: https://palletsprojects.com/contributing/",
"release_date": "2025-09-18T17:32:22",
"release_date": "2025-11-15T20:45:41",
"parties": [
{
"type": "person",
Expand All @@ -545,11 +545,11 @@
"Typing :: Typed"
],
"homepage_url": null,
"download_url": "https://files.pythonhosted.org/packages/db/d3/9dcc0f5797f070ec8edf30fbadfb200e71d9db6b84d211e3b2085a7589a0/click-8.3.0-py3-none-any.whl",
"size": 107295,
"download_url": "https://files.pythonhosted.org/packages/98/78/01c019cdb5d6498122777c1a43056ebb3ebfeef2076d9d026bfe15583b2b/click-8.3.1-py3-none-any.whl",
"size": 108274,
"sha1": null,
"md5": "dd99757a403a5728bb37a5cab4d46c83",
"sha256": "9b9f285302c6e3064f4330c05f05b81945b2a39544279343e6e7c5f27a9baddc",
"md5": "f032502934a5979330da77e3f09d889c",
"sha256": "981153a64e25f12d547d3426c367a4857371575ee7ad18df2a6183ab0545b2a6",
"sha512": null,
"bug_tracking_url": null,
"code_view_url": "https://github.com/pallets/click/",
Expand All @@ -564,9 +564,9 @@
"dependencies": [],
"repository_homepage_url": null,
"repository_download_url": null,
"api_data_url": "https://pypi.org/pypi/click/8.3.0/json",
"api_data_url": "https://pypi.org/pypi/click/8.3.1/json",
"datasource_id": null,
"purl": "pkg:pypi/[email protected].0"
"purl": "pkg:pypi/[email protected].1"
},
{
"type": "pypi",
Expand Down Expand Up @@ -1267,7 +1267,7 @@
]
},
{
"package": "pkg:pypi/certifi@2025.10.5",
"package": "pkg:pypi/certifi@2025.11.12",
"dependencies": []
},
{
Expand All @@ -1281,13 +1281,14 @@
"dependencies": []
},
{
"package": "pkg:pypi/[email protected].0",
"package": "pkg:pypi/[email protected].1",
"dependencies": []
},
{
"package": "pkg:pypi/[email protected]",
"dependencies": [
"pkg:pypi/[email protected]"
"pkg:pypi/[email protected]",
"pkg:pypi/[email protected]"
]
},
{
Expand All @@ -1302,7 +1303,7 @@
"package": "pkg:pypi/[email protected]",
"dependencies": [
"pkg:pypi/[email protected]",
"pkg:pypi/certifi@2025.10.5",
"pkg:pypi/certifi@2025.11.12",
"pkg:pypi/[email protected]",
"pkg:pypi/[email protected]",
"pkg:pypi/[email protected]"
Expand All @@ -1326,7 +1327,7 @@
{
"package": "pkg:pypi/[email protected]",
"dependencies": [
"pkg:pypi/certifi@2025.10.5",
"pkg:pypi/certifi@2025.11.12",
"pkg:pypi/[email protected]",
"pkg:pypi/[email protected]",
"pkg:pypi/[email protected]"
Expand Down
43 changes: 22 additions & 21 deletions tests/data/azure-devops.req-312-expected.json
Original file line number Diff line number Diff line change
Expand Up @@ -315,12 +315,12 @@
"type": "pypi",
"namespace": null,
"name": "certifi",
"version": "2025.10.5",
"version": "2025.11.12",
"qualifiers": {},
"subpath": null,
"primary_language": "Python",
"description": "Python package for providing Mozilla's CA Bundle.\nCertifi: Python SSL Certificates\n================================\n\nCertifi provides Mozilla's carefully curated collection of Root Certificates for\nvalidating the trustworthiness of SSL certificates while verifying the identity\nof TLS hosts. It has been extracted from the `Requests`_ project.\n\nInstallation\n------------\n\n``certifi`` is available on PyPI. Simply install it with ``pip``::\n\n $ pip install certifi\n\nUsage\n-----\n\nTo reference the installed certificate authority (CA) bundle, you can use the\nbuilt-in function::\n\n >>> import certifi\n\n >>> certifi.where()\n '/usr/local/lib/python3.7/site-packages/certifi/cacert.pem'\n\nOr from the command line::\n\n $ python -m certifi\n /usr/local/lib/python3.7/site-packages/certifi/cacert.pem\n\nEnjoy!\n\n.. _`Requests`: https://requests.readthedocs.io/en/master/\n\nAddition/Removal of Certificates\n--------------------------------\n\nCertifi does not support any addition/removal or other modification of the\nCA trust store content. This project is intended to provide a reliable and\nhighly portable root of trust to python deployments. Look to upstream projects\nfor methods to use alternate trust.",
"release_date": "2025-10-05T04:12:14",
"release_date": "2025-11-12T02:54:49",
"parties": [
{
"type": "person",
Expand All @@ -347,11 +347,11 @@
"Programming Language :: Python :: 3.9"
],
"homepage_url": "https://github.com/certifi/python-certifi",
"download_url": "https://files.pythonhosted.org/packages/e4/37/af0d2ef3967ac0d6113837b44a4f0bfe1328c2b9763bd5b1744520e5cfed/certifi-2025.10.5-py3-none-any.whl",
"size": 163286,
"download_url": "https://files.pythonhosted.org/packages/70/7d/9bc192684cea499815ff478dfcdc13835ddf401365057044fb721ec6bddb/certifi-2025.11.12-py3-none-any.whl",
"size": 159438,
"sha1": null,
"md5": "7b56f7121949a196441739c539fd01be",
"sha256": "0f212c2744a9bb6de0c56639a6f68afe01ecd92d91f14ae897c4fe7bbeeef0de",
"md5": "7cf85392819e7d6f24c8589826df5d95",
"sha256": "97de8790030bbd5c2d96b7ec782fc2f7820ef8dba6db909ccf95449f2d062d4b",
"sha512": null,
"bug_tracking_url": null,
"code_view_url": "https://github.com/certifi/python-certifi",
Expand All @@ -371,9 +371,9 @@
"dependencies": [],
"repository_homepage_url": null,
"repository_download_url": null,
"api_data_url": "https://pypi.org/pypi/certifi/2025.10.5/json",
"api_data_url": "https://pypi.org/pypi/certifi/2025.11.12/json",
"datasource_id": null,
"purl": "pkg:pypi/certifi@2025.10.5"
"purl": "pkg:pypi/certifi@2025.11.12"
},
{
"type": "pypi",
Expand Down Expand Up @@ -522,12 +522,12 @@
"type": "pypi",
"namespace": null,
"name": "click",
"version": "8.3.0",
"version": "8.3.1",
"qualifiers": {},
"subpath": null,
"primary_language": "Python",
"description": "Composable command line interface toolkit\n<div align=\"center\"><img src=\"https://raw.githubusercontent.com/pallets/click/refs/heads/stable/docs/_static/click-name.svg\" alt=\"\" height=\"150\"></div>\n\n# Click\n\nClick is a Python package for creating beautiful command line interfaces\nin a composable way with as little code as necessary. It's the \"Command\nLine Interface Creation Kit\". It's highly configurable but comes with\nsensible defaults out of the box.\n\nIt aims to make the process of writing command line tools quick and fun\nwhile also preventing any frustration caused by the inability to\nimplement an intended CLI API.\n\nClick in three points:\n\n- Arbitrary nesting of commands\n- Automatic help page generation\n- Supports lazy loading of subcommands at runtime\n\n\n## A Simple Example\n\n```python\nimport click\n\[email protected]()\[email protected](\"--count\", default=1, help=\"Number of greetings.\")\[email protected](\"--name\", prompt=\"Your name\", help=\"The person to greet.\")\ndef hello(count, name):\n \"\"\"Simple program that greets NAME for a total of COUNT times.\"\"\"\n for _ in range(count):\n click.echo(f\"Hello, {name}!\")\n\nif __name__ == '__main__':\n hello()\n```\n\n```\n$ python hello.py --count=3\nYour name: Click\nHello, Click!\nHello, Click!\nHello, Click!\n```\n\n\n## Donate\n\nThe Pallets organization develops and supports Click and other popular\npackages. In order to grow the community of contributors and users, and\nallow the maintainers to devote more time to the projects, [please\ndonate today][].\n\n[please donate today]: https://palletsprojects.com/donate\n\n## Contributing\n\nSee our [detailed contributing documentation][contrib] for many ways to\ncontribute, including reporting issues, requesting features, asking or answering\nquestions, and making PRs.\n\n[contrib]: https://palletsprojects.com/contributing/",
"release_date": "2025-09-18T17:32:22",
"release_date": "2025-11-15T20:45:41",
"parties": [
{
"type": "person",
Expand All @@ -545,11 +545,11 @@
"Typing :: Typed"
],
"homepage_url": null,
"download_url": "https://files.pythonhosted.org/packages/db/d3/9dcc0f5797f070ec8edf30fbadfb200e71d9db6b84d211e3b2085a7589a0/click-8.3.0-py3-none-any.whl",
"size": 107295,
"download_url": "https://files.pythonhosted.org/packages/98/78/01c019cdb5d6498122777c1a43056ebb3ebfeef2076d9d026bfe15583b2b/click-8.3.1-py3-none-any.whl",
"size": 108274,
"sha1": null,
"md5": "dd99757a403a5728bb37a5cab4d46c83",
"sha256": "9b9f285302c6e3064f4330c05f05b81945b2a39544279343e6e7c5f27a9baddc",
"md5": "f032502934a5979330da77e3f09d889c",
"sha256": "981153a64e25f12d547d3426c367a4857371575ee7ad18df2a6183ab0545b2a6",
"sha512": null,
"bug_tracking_url": null,
"code_view_url": "https://github.com/pallets/click/",
Expand All @@ -564,9 +564,9 @@
"dependencies": [],
"repository_homepage_url": null,
"repository_download_url": null,
"api_data_url": "https://pypi.org/pypi/click/8.3.0/json",
"api_data_url": "https://pypi.org/pypi/click/8.3.1/json",
"datasource_id": null,
"purl": "pkg:pypi/[email protected].0"
"purl": "pkg:pypi/[email protected].1"
},
{
"type": "pypi",
Expand Down Expand Up @@ -1267,7 +1267,7 @@
]
},
{
"package": "pkg:pypi/certifi@2025.10.5",
"package": "pkg:pypi/certifi@2025.11.12",
"dependencies": []
},
{
Expand All @@ -1281,13 +1281,14 @@
"dependencies": []
},
{
"package": "pkg:pypi/[email protected].0",
"package": "pkg:pypi/[email protected].1",
"dependencies": []
},
{
"package": "pkg:pypi/[email protected]",
"dependencies": [
"pkg:pypi/[email protected]"
"pkg:pypi/[email protected]",
"pkg:pypi/[email protected]"
]
},
{
Expand All @@ -1302,7 +1303,7 @@
"package": "pkg:pypi/[email protected]",
"dependencies": [
"pkg:pypi/[email protected]",
"pkg:pypi/certifi@2025.10.5",
"pkg:pypi/certifi@2025.11.12",
"pkg:pypi/[email protected]",
"pkg:pypi/[email protected]",
"pkg:pypi/[email protected]"
Expand All @@ -1326,7 +1327,7 @@
{
"package": "pkg:pypi/[email protected]",
"dependencies": [
"pkg:pypi/certifi@2025.10.5",
"pkg:pypi/certifi@2025.11.12",
"pkg:pypi/[email protected]",
"pkg:pypi/[email protected]",
"pkg:pypi/[email protected]"
Expand Down
Loading
Loading