From 41f0bdf17cfec68c3ef6d3bc87c639c7d09f43f1 Mon Sep 17 00:00:00 2001 From: Raven Kopelman Date: Tue, 27 Oct 2020 13:39:32 -0700 Subject: [PATCH 1/2] Prevent files from being partially read from the cache --- SCons/CacheDir.py | 19 ++++++++++++++++--- SCons/Taskmaster/__init__.py | 7 +++++-- 2 files changed, 21 insertions(+), 5 deletions(-) diff --git a/SCons/CacheDir.py b/SCons/CacheDir.py index a5184b6e9c..cb49f94c4b 100644 --- a/SCons/CacheDir.py +++ b/SCons/CacheDir.py @@ -66,7 +66,20 @@ def CacheRetrieveFunc(target, source, env) -> int: if fs.islink(cachefile): fs.symlink(fs.readlink(cachefile), t.get_internal_path()) else: - cd.copy_from_cache(env, cachefile, t.get_internal_path()) + try: + env.copy_from_cache(cachefile, t.get_internal_path()) + except: + try: + # In case file was partially retrieved (and now corrupt) + # delete it to avoid poisoning commands like 'ar' that + # read from the initial state of the file they are writing + # to. + t.fs.unlink(t.get_internal_path()) + except: + pass + + raise + try: os.utime(cachefile, None) except OSError: @@ -80,8 +93,8 @@ def CacheRetrieveString(target, source, env) -> str: cd = env.get_CacheDir() cachedir, cachefile = cd.cachepath(t) if t.fs.exists(cachefile): - return "Retrieved `%s' from cache" % t.get_internal_path() - return "" + return "Retrieving `%s' from cache" % t.get_internal_path() + return None CacheRetrieve = SCons.Action.Action(CacheRetrieveFunc, CacheRetrieveString) diff --git a/SCons/Taskmaster/__init__.py b/SCons/Taskmaster/__init__.py index 4d768ee90b..5511b82129 100644 --- a/SCons/Taskmaster/__init__.py +++ b/SCons/Taskmaster/__init__.py @@ -233,10 +233,13 @@ def execute(self): cached_targets.append(t) if len(cached_targets) < len(self.targets): # Remove targets before building. It's possible that we - # partially retrieved targets from the cache, leaving - # them in read-only mode. That might cause the command + # retrieved a subset of targets from the cache, leaving + # them in an inconsistent state. That might cause the command # to fail. # + # Note that retrieve_from_cache() ensures no single target can + # be partially retrieved (file left in corrupt state). + # for t in cached_targets: try: t.fs.unlink(t.get_internal_path()) From 3b7bac64561b022411ffa8d38a8ccdfd68afa264 Mon Sep 17 00:00:00 2001 From: William Deegan Date: Tue, 24 Nov 2020 09:44:24 -0800 Subject: [PATCH 2/2] Update Cachedir file copy error handling to handle the expected exceptions shutil.SameFileError, IOError for the default copy shutil.copy(). Move notice that file has been retrieved until after the file has actually been copied and didn't thrown exception --- CHANGES.txt | 11 ++++------- SCons/CacheDir.py | 16 +++++++--------- test/CacheDir/debug.py | 16 ++++++++-------- 3 files changed, 19 insertions(+), 24 deletions(-) diff --git a/CHANGES.txt b/CHANGES.txt index 2641fc846e..18ee59b318 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -15,6 +15,10 @@ RELEASE VERSION/DATE TO BE FILLED IN LATER From John Doe: - Whatever John Doe did. + From Raven Kopelman (@ravenAtSafe on GitHub): + - Prevent files from being partially read from the cache. If there's an error while copying the file + is now removed. + From Bill Prendergast: - Fixed SCons.Variables.PackageVariable to correctly test the default setting against both enable & disable strings. (Fixes #4702) @@ -1547,13 +1551,6 @@ RELEASE 4.1.0 - Tues, 19 Jan 2021 15:04:42 -0700 From Daniel Moody: - Fix issue where java parsed a class incorrectly from lambdas used after a new. - From Simon Tegelid - - Fix using TEMPFILE in multiple actions in an action list. Previously a builder, or command - with an action list like this: - ['${TEMPFILE("xxx.py -otempfile $SOURCE")}', '${TEMPFILE("yyy.py -o$TARGET tempfile")}'] - Could yield a single tempfile with the first TEMPFILE's contents, used by both steps - in the action list. - From Mats Wichmann: - Complete tests for Dictionary, env.keys() and env.values() for OverrideEnvironment. Enable env.setdefault() method, add tests. diff --git a/SCons/CacheDir.py b/SCons/CacheDir.py index cb49f94c4b..b12c07c8d6 100644 --- a/SCons/CacheDir.py +++ b/SCons/CacheDir.py @@ -30,6 +30,7 @@ import shutil import stat import sys +import shutil import tempfile import uuid @@ -61,31 +62,28 @@ def CacheRetrieveFunc(target, source, env) -> int: cd.CacheDebug('CacheRetrieve(%s): %s not in cache\n', t, cachefile) return 1 cd.hits += 1 - cd.CacheDebug('CacheRetrieve(%s): retrieving from %s\n', t, cachefile) if SCons.Action.execute_actions: if fs.islink(cachefile): fs.symlink(fs.readlink(cachefile), t.get_internal_path()) else: try: env.copy_from_cache(cachefile, t.get_internal_path()) - except: - try: + except (shutil.SameFileError, IOError) as e: # In case file was partially retrieved (and now corrupt) # delete it to avoid poisoning commands like 'ar' that # read from the initial state of the file they are writing # to. t.fs.unlink(t.get_internal_path()) - except: - pass - - raise + cd.CacheDebug('CacheRetrieve(%s): Error while retrieving from %s deleting %s\n', t, cachefile) + raise e try: os.utime(cachefile, None) except OSError: pass st = fs.stat(cachefile) - fs.chmod(t.get_internal_path(), stat.S_IMODE(st.st_mode) | stat.S_IWRITE) + fs.chmod(t.get_internal_path(), stat.S_IMODE(st[stat.ST_MODE]) | stat.S_IWRITE) + cd.CacheDebug('CacheRetrieve(%s): retrieved from %s\n', t, cachefile) return 0 def CacheRetrieveString(target, source, env) -> str: @@ -93,7 +91,7 @@ def CacheRetrieveString(target, source, env) -> str: cd = env.get_CacheDir() cachedir, cachefile = cd.cachepath(t) if t.fs.exists(cachefile): - return "Retrieving `%s' from cache" % t.get_internal_path() + return "Retrieved `%s' from cache" % t.get_internal_path() return None CacheRetrieve = SCons.Action.Action(CacheRetrieveFunc, CacheRetrieveString) diff --git a/test/CacheDir/debug.py b/test/CacheDir/debug.py index cf16f98d10..707e782525 100644 --- a/test/CacheDir/debug.py +++ b/test/CacheDir/debug.py @@ -144,16 +144,16 @@ def cat(env, source, target): expect = \ r"""Retrieved `aaa.out' from cache -CacheRetrieve\(aaa.out\): retrieving from [0-9a-fA-F]+ +CacheRetrieve\(aaa.out\): retrieved from [0-9a-fA-F]+ requests: [0-9]+, hits: [0-9]+, misses: [0-9]+, hit rate: [0-9]+\.[0-9]{2,}% Retrieved `bbb.out' from cache -CacheRetrieve\(bbb.out\): retrieving from [0-9a-fA-F]+ +CacheRetrieve\(bbb.out\): retrieved from [0-9a-fA-F]+ requests: [0-9]+, hits: [0-9]+, misses: [0-9]+, hit rate: [0-9]+\.[0-9]{2,}% Retrieved `ccc.out' from cache -CacheRetrieve\(ccc.out\): retrieving from [0-9a-fA-F]+ +CacheRetrieve\(ccc.out\): retrieved from [0-9a-fA-F]+ requests: [0-9]+, hits: [0-9]+, misses: [0-9]+, hit rate: [0-9]+\.[0-9]{2,}% Retrieved `all' from cache -CacheRetrieve\(all\): retrieving from [0-9a-fA-F]+ +CacheRetrieve\(all\): retrieved from [0-9a-fA-F]+ requests: [0-9]+, hits: [0-9]+, misses: [0-9]+, hit rate: [0-9]+\.[0-9]{2,}% """ @@ -178,13 +178,13 @@ def cat(env, source, target): stdout=expect) expect = \ -r"""CacheRetrieve\(aaa.out\): retrieving from [0-9a-fA-F]+ +r"""CacheRetrieve\(aaa.out\): retrieved from [0-9a-fA-F]+ requests: [0-9]+, hits: [0-9]+, misses: [0-9]+, hit rate: [0-9]+\.[0-9]{2,}% -CacheRetrieve\(bbb.out\): retrieving from [0-9a-fA-F]+ +CacheRetrieve\(bbb.out\): retrieved from [0-9a-fA-F]+ requests: [0-9]+, hits: [0-9]+, misses: [0-9]+, hit rate: [0-9]+\.[0-9]{2,}% -CacheRetrieve\(ccc.out\): retrieving from [0-9a-fA-F]+ +CacheRetrieve\(ccc.out\): retrieved from [0-9a-fA-F]+ requests: [0-9]+, hits: [0-9]+, misses: [0-9]+, hit rate: [0-9]+\.[0-9]{2,}% -CacheRetrieve\(all\): retrieving from [0-9a-fA-F]+ +CacheRetrieve\(all\): retrieved from [0-9a-fA-F]+ requests: [0-9]+, hits: [0-9]+, misses: [0-9]+, hit rate: [0-9]+\.[0-9]{2,}% """