44import asyncio
55import aiohttp
66import gzip
7+ import hashlib
78import os
89import re
910import shutil
1617from collections import deque
1718from functools import cmp_to_key
1819
19- async def download_file (session , url , dest_path , max_retries = 3 , retry_delay = 2 , timeout = 60 ):
20+ async def download_file (session , url , dest_path , max_retries = 3 , retry_delay = 2 , timeout = 60 , checksum = None ):
2021 """Asynchronous file download with retries."""
2122 attempt = 0
2223 while attempt < max_retries :
@@ -25,6 +26,13 @@ async def download_file(session, url, dest_path, max_retries=3, retry_delay=2, t
2526 if response .status == 200 :
2627 with open (dest_path , "wb" ) as f :
2728 content = await response .read ()
29+
30+ # verify checksum if provided
31+ if checksum :
32+ sha256 = hashlib .sha256 (content ).hexdigest ()
33+ if sha256 != checksum :
34+ raise Exception (f"SHA256 mismatch for { url } : expected { checksum } , got { sha256 } " )
35+
2836 f .write (content )
2937 print (f"Downloaded { url } at { dest_path } " )
3038 return
@@ -51,22 +59,21 @@ async def download_deb_files_parallel(mirror, packages, tmp_dir):
5159 if filename :
5260 url = f"{ mirror } /{ filename } "
5361 dest_path = os .path .join (tmp_dir , os .path .basename (filename ))
54- tasks .append (asyncio .create_task (download_file (session , url , dest_path )))
62+ tasks .append (asyncio .create_task (download_file (session , url , dest_path , checksum = info . get ( "SHA256" ) )))
5563
5664 await asyncio .gather (* tasks )
5765
58- async def download_package_index_parallel (mirror , arch , suites ):
66+ async def download_package_index_parallel (mirror , arch , suites , check_sig , keyring ):
5967 """Download package index files for specified suites and components entirely in memory."""
6068 tasks = []
6169 timeout = aiohttp .ClientTimeout (total = 60 )
6270
6371 async with aiohttp .ClientSession (timeout = timeout ) as session :
6472 for suite in suites :
6573 for component in ["main" , "universe" ]:
66- url = f"{ mirror } /dists/{ suite } /{ component } /binary-{ arch } /Packages.gz"
67- tasks .append (fetch_and_decompress (session , url ))
74+ tasks .append (fetch_and_decompress (session , mirror , arch , suite , component , check_sig , keyring ))
6875
69- results = await asyncio .gather (* tasks , return_exceptions = True )
76+ results = await asyncio .gather (* tasks )
7077
7178 merged_content = ""
7279 for result in results :
@@ -77,21 +84,74 @@ async def download_package_index_parallel(mirror, arch, suites):
7784
7885 return merged_content
7986
80- async def fetch_and_decompress (session , url ):
87+ async def fetch_and_decompress (session , mirror , arch , suite , component , check_sig , keyring ):
8188 """Fetch and decompress the Packages.gz file."""
89+
90+ path = f"{ component } /binary-{ arch } /Packages.gz"
91+ url = f"{ mirror } /dists/{ suite } /{ path } "
92+
8293 try :
8394 async with session .get (url ) as response :
8495 if response .status == 200 :
8596 compressed_data = await response .read ()
8697 decompressed_data = gzip .decompress (compressed_data ).decode ('utf-8' )
8798 print (f"Downloaded index: { url } " )
99+
100+ if check_sig :
101+ # Verify the package index against the sha256 recorded in the Release file
102+ release_file_content = await fetch_release_file (session , mirror , suite , keyring )
103+ packages_sha = parse_release_file (release_file_content , path )
104+
105+ sha256 = hashlib .sha256 (compressed_data ).hexdigest ()
106+ if sha256 != packages_sha :
107+ raise Exception (f"SHA256 mismatch for { path } : expected { packages_sha } , got { sha256 } " )
108+ print (f"Checksum verified for { path } " )
109+
88110 return decompressed_data
89111 else :
90112 print (f"Skipped index: { url } (doesn't exist)" )
91113 return None
92114 except Exception as e :
93115 print (f"Error fetching { url } : { e } " )
94116
117+ async def fetch_release_file (session , mirror , suite , keyring ):
118+ """Fetch Release and Release.gpg files and verify the signature."""
119+
120+ release_url = f"{ mirror } /dists/{ suite } /Release"
121+ release_gpg_url = f"{ mirror } /dists/{ suite } /Release.gpg"
122+
123+ with tempfile .NamedTemporaryFile () as release_file , tempfile .NamedTemporaryFile () as release_gpg_file :
124+ await download_file (session , release_url , release_file .name )
125+ await download_file (session , release_gpg_url , release_gpg_file .name )
126+
127+ if keyring != '' :
128+ keyring_arg = f"--keyring { args .keyring } "
129+
130+ print ("Verifying signature of Release with Release.gpg." )
131+ verify_command = f"gpg { keyring_arg } --verify { release_gpg_file .name } { release_file .name } "
132+ result = subprocess .run (verify_command , shell = True , stdout = subprocess .PIPE , stderr = subprocess .PIPE )
133+
134+ if result .returncode != 0 :
135+ raise Exception (f"Signature verification failed: { result .stderr .decode ('utf-8' )} " )
136+
137+ print ("Signature verified successfully." )
138+
139+ with open (release_file .name ) as f : return f .read ()
140+
141+ def parse_release_file (content , path ):
142+ """Parses the Release file and returns sha256 checksum of the specified path."""
143+
144+ # data looks like this:
145+ # <checksum> <size> <path>
146+ matches = re .findall (r'^ (\S*) +(\S*) +(\S*)$' , content , re .MULTILINE )
147+
148+ for entry in matches :
149+ # the file has both md5 and sha256 checksums, we want sha256 which has a length of 64
150+ if entry [2 ] == path and len (entry [0 ]) == 64 :
151+ return entry [0 ]
152+
153+ raise Exception (f"Could not find checksum for { path } in Release file." )
154+
95155def parse_debian_version (version ):
96156 """Parse a Debian package version into epoch, upstream version, and revision."""
97157 match = re .match (r'^(?:(\d+):)?([^-]+)(?:-(.+))?$' , version )
@@ -171,13 +231,15 @@ def parse_package_index(content):
171231 filename = fields .get ("Filename" )
172232 depends = fields .get ("Depends" )
173233 provides = fields .get ("Provides" , None )
234+ sha256 = fields .get ("SHA256" )
174235
175236 # Only update if package_name is not in packages or if the new version is higher
176237 if package_name not in packages or compare_debian_versions (version , packages [package_name ]["Version" ]) > 0 :
177238 packages [package_name ] = {
178239 "Version" : version ,
179240 "Filename" : filename ,
180- "Depends" : depends
241+ "Depends" : depends ,
242+ "SHA256" : sha256
181243 }
182244
183245 # Update aliases if package provides any alternatives
@@ -301,6 +363,8 @@ def finalize_setup(rootfsdir):
301363 parser .add_argument ('--suite' , required = True , action = 'append' , help = 'Specify one or more repository suites to collect index data.' )
302364 parser .add_argument ("--mirror" , required = False , help = "Mirror (e.g., http://ftp.debian.org/debian-ports etc.)" )
303365 parser .add_argument ("--artool" , required = False , default = "ar" , help = "ar tool to extract debs (e.g., ar, llvm-ar etc.)" )
366+ parser .add_argument ("--force-check-gpg" , required = False , action = 'store_true' , help = "Verify the packages against signatures in Release file." )
367+ parser .add_argument ("--keyring" , required = False , default = '' , help = "Keyring file to check signature of Release file." )
304368 parser .add_argument ("packages" , nargs = "+" , help = "List of package names to be installed." )
305369
306370 args = parser .parse_args ()
@@ -324,7 +388,7 @@ def finalize_setup(rootfsdir):
324388
325389 print (f"Creating rootfs. rootfsdir: { args .rootfsdir } , distro: { args .distro } , arch: { args .arch } , suites: { args .suite } , mirror: { args .mirror } " )
326390
327- package_index_content = asyncio .run (download_package_index_parallel (args .mirror , args .arch , args .suite ))
391+ package_index_content = asyncio .run (download_package_index_parallel (args .mirror , args .arch , args .suite , args . force_check_gpg , args . keyring ))
328392
329393 packages_info , aliases = parse_package_index (package_index_content )
330394
0 commit comments