Skip to content

Commit 4a90992

Browse files
author
gongdahan
committed
Speed up tar packing by lower compresslevel and create symbolic links for same files
1 parent 447fb8e commit 4a90992

File tree

4 files changed

+142
-18
lines changed

4 files changed

+142
-18
lines changed

pkg/private/tar/build_tar.py

Lines changed: 55 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,8 @@ class TarFile(object):
4242
class DebError(Exception):
4343
pass
4444

45-
def __init__(self, output, directory, compression, compressor, create_parents, allow_dups_from_deps, default_mtime):
45+
def __init__(self, output, directory, compression, compressor, create_parents,
46+
allow_dups_from_deps, auto_deduplicate, default_mtime, compresslevel=None):
4647
# Directory prefix on all output paths
4748
d = directory.strip('/')
4849
self.directory = (d + '/') if d else None
@@ -52,6 +53,8 @@ def __init__(self, output, directory, compression, compressor, create_parents, a
5253
self.default_mtime = default_mtime
5354
self.create_parents = create_parents
5455
self.allow_dups_from_deps = allow_dups_from_deps
56+
self.compresslevel = compresslevel
57+
self.src_to_first_dest_map = {} if auto_deduplicate else None
5558

5659
def __enter__(self):
5760
self.tarfile = tar_writer.TarFileWriter(
@@ -60,7 +63,8 @@ def __enter__(self):
6063
self.compressor,
6164
self.create_parents,
6265
self.allow_dups_from_deps,
63-
default_mtime=self.default_mtime)
66+
default_mtime=self.default_mtime,
67+
compresslevel=self.compresslevel)
6468
return self
6569

6670
def __exit__(self, t, v, traceback):
@@ -98,6 +102,12 @@ def add_file(self, f, destfile, mode=None, ids=None, names=None):
98102
copied to `self.directory/destfile` in the layer.
99103
"""
100104
dest = self.normalize_path(destfile)
105+
if self.src_to_first_dest_map is not None:
106+
normalized_src = normpath(f)
107+
relative_path_to_link_to = self.auto_deduplicate(normalized_src, dest)
108+
if relative_path_to_link_to:
109+
self.add_link(dest, relative_path_to_link_to, mode=mode, ids=ids, names=names)
110+
return
101111
# If mode is unspecified, derive the mode from the file's mode.
102112
if mode is None:
103113
mode = 0o755 if os.access(f, os.X_OK) else 0o644
@@ -114,6 +124,23 @@ def add_file(self, f, destfile, mode=None, ids=None, names=None):
114124
uname=names[0],
115125
gname=names[1])
116126

127+
def auto_deduplicate(self, src_file, dest_file):
128+
"""Detect whether to de-duplicate the destination file
129+
130+
Returns:
131+
The relative path to create a symlink to or None
132+
"""
133+
if self.src_to_first_dest_map is not None:
134+
first_dest = self.src_to_first_dest_map.get(src_file)
135+
if first_dest is None:
136+
real_src_file = os.path.realpath(src_file)
137+
first_dest = self.src_to_first_dest_map.setdefault(real_src_file, dest_file)
138+
self.src_to_first_dest_map[src_file] = first_dest
139+
if first_dest != dest_file:
140+
return os.path.relpath(first_dest, os.path.dirname(dest_file))
141+
return None
142+
143+
117144
def add_empty_file(self,
118145
destfile,
119146
mode=None,
@@ -269,13 +296,13 @@ def add_tree(self, tree_top, destpath, mode=None, ids=None, names=None):
269296
for dir in dirs:
270297
to_write[dest_dir + dir] = None
271298
for file in sorted(files):
272-
content_path = os.path.abspath(os.path.join(root, file))
299+
content_path = os.path.join(root, file)
273300
if os.name == "nt":
274301
# "To specify an extended-length path, use the `\\?\` prefix. For
275302
# example, `\\?\D:\very long path`."[1]
276303
#
277304
# [1]: https://learn.microsoft.com/en-us/windows/win32/fileio/maximum-file-path-limitation
278-
to_write[dest_dir + file] = "\\\\?\\" + content_path
305+
to_write[dest_dir + file] = "\\\\?\\" + os.path.abspath(content_path)
279306
else:
280307
to_write[dest_dir + file] = content_path
281308

@@ -297,6 +324,10 @@ def add_tree(self, tree_top, destpath, mode=None, ids=None, names=None):
297324
f_mode = 0o755 if os.access(content_path, os.X_OK) else 0o644
298325
else:
299326
f_mode = mode
327+
relative_path_to_link_to = self.auto_deduplicate(content_path, dest)
328+
if relative_path_to_link_to:
329+
self.add_link(dest, relative_path_to_link_to, mode=f_mode, ids=ids, names=names)
330+
continue
300331
self.tarfile.add_file(
301332
path,
302333
file_content=content_path,
@@ -345,7 +376,7 @@ def main():
345376
fromfile_prefix_chars='@')
346377
parser.add_argument('--output', required=True,
347378
help='The output file, mandatory.')
348-
parser.add_argument('--manifest',
379+
parser.add_argument('--manifest', action='append',
349380
help='manifest of contents to add to the layer.')
350381
parser.add_argument('--mode',
351382
help='Force the mode on the added files (in octal).')
@@ -359,7 +390,7 @@ def main():
359390
parser.add_argument('--deb', action='append',
360391
help='A debian package to add to the layer')
361392
parser.add_argument(
362-
'--directory',
393+
'--directory', action='append',
363394
help='Directory in which to store the file inside the layer')
364395

365396
compression = parser.add_mutually_exclusive_group()
@@ -397,6 +428,12 @@ def main():
397428
parser.add_argument('--allow_dups_from_deps',
398429
action='store_true',
399430
help='')
431+
parser.add_argument('--auto_deduplicate',
432+
action='store_true',
433+
help='Auto create symlinks for files mapped from a same source in manifests.')
434+
parser.add_argument(
435+
'--compresslevel', default='',
436+
help='Specify the numeric compress level in gzip mode; may be 0-9 or empty(6).')
400437
options = parser.parse_args()
401438

402439
# Parse modes arguments
@@ -443,12 +480,14 @@ def main():
443480
# Add objects to the tar file
444481
with TarFile(
445482
options.output,
446-
directory = helpers.GetFlagValue(options.directory),
483+
directory = helpers.GetFlagValue(options.directory[0]),
447484
compression = options.compression,
448485
compressor = options.compressor,
449486
default_mtime=default_mtime,
450487
create_parents=options.create_parents,
451-
allow_dups_from_deps=options.allow_dups_from_deps) as output:
488+
allow_dups_from_deps=options.allow_dups_from_deps,
489+
auto_deduplicate=options.auto_deduplicate,
490+
compresslevel = options.compresslevel) as output:
452491

453492
def file_attributes(filename):
454493
if filename.startswith('/'):
@@ -459,12 +498,18 @@ def file_attributes(filename):
459498
'names': names_map.get(filename, default_ownername),
460499
}
461500

462-
if options.manifest:
463-
with open(options.manifest, 'r') as manifest_fp:
501+
formatted_first_directory = output.directory
502+
manifest_list = zip(options.directory, options.manifest)
503+
if options.auto_deduplicate:
504+
manifest_list = list(manifest_list)[::-1]
505+
for directory, manifest_path in manifest_list:
506+
output.directory = (directory.strip('/') + '/') if directory.strip('/') else None
507+
with open(manifest_path, 'r') as manifest_fp:
464508
manifest_entries = manifest.read_entries_from(manifest_fp)
465509
for entry in manifest_entries:
466510
output.add_manifest_entry(entry, file_attributes)
467511

512+
output.directory = formatted_first_directory
468513
for tar in options.tar or []:
469514
output.add_tar(tar)
470515
for deb in options.deb or []:

pkg/private/tar/tar.bzl

Lines changed: 81 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,13 @@ SUPPORTED_TAR_COMPRESSIONS = (
4040
_DEFAULT_MTIME = -1
4141
_stamp_condition = Label("//pkg/private:private_stamp_detect")
4242

43+
MappingManifestInfo = provider(
44+
"Path mapping to pack files",
45+
fields = {
46+
"manifest": "a list of (expanded package_dir, manfiest files, deps).",
47+
},
48+
)
49+
4350
def _remap(remap_paths, path):
4451
"""If path starts with a key in remap_paths, rewrite it."""
4552
for prefix, replacement in remap_paths.items():
@@ -71,6 +78,7 @@ def _pkg_tar_impl(ctx):
7178
fail("Both package_dir and package_dir_file attributes were specified")
7279
args.add("--directory", "@" + ctx.file.package_dir_file.path)
7380
files.append(ctx.file.package_dir_file)
81+
package_dir_expanded = None
7482
else:
7583
package_dir_expanded = substitute_package_variables(ctx, ctx.attr.package_dir)
7684
args.add("--directory", package_dir_expanded or "/")
@@ -114,6 +122,10 @@ def _pkg_tar_impl(ctx):
114122
"--owner_names",
115123
"%s=%s" % (_quote(key), ctx.attr.ownernames[key]),
116124
)
125+
if ctx.attr.compresslevel:
126+
args.add("--compresslevel", ctx.attr.compresslevel)
127+
if ctx.attr.auto_deduplicate:
128+
args.add("--auto_deduplicate")
117129

118130
# Now we begin processing the files.
119131
path_mapper = None
@@ -151,8 +163,6 @@ def _pkg_tar_impl(ctx):
151163
add_empty_file(mapping_context, empty_file, ctx.label)
152164
for empty_dir in ctx.attr.empty_dirs or []:
153165
add_directory(mapping_context, empty_dir, ctx.label)
154-
for f in ctx.files.deps:
155-
args.add("--tar", f.path)
156166
for link in ctx.attr.symlinks:
157167
add_symlink(
158168
mapping_context,
@@ -170,6 +180,29 @@ def _pkg_tar_impl(ctx):
170180
write_manifest(ctx, manifest_file, mapping_context.content_map)
171181
args.add("--manifest", manifest_file.path)
172182

183+
does_merge_mappings = not ctx.attr.package_dir_file
184+
new_dir_prefix = package_dir_expanded + "/" if package_dir_expanded else ""
185+
manifest_list = [(package_dir_expanded, manifest_file, mapping_context.file_deps)]
186+
file_dep_set = {}
187+
for dep_i in ctx.attr.deps:
188+
if does_merge_mappings and (MappingManifestInfo in dep_i):
189+
for i_dir, i_manifest, i_file_deps in dep_i[MappingManifestInfo].manifest:
190+
i_dir = new_dir_prefix + (i_dir or "")
191+
args.add("--directory", i_dir)
192+
args.add("--manifest", i_manifest.path)
193+
files.append(i_manifest)
194+
for i in i_file_deps:
195+
file_dep_set[i] = 1
196+
manifest_list.append((i_dir, i_manifest, i_file_deps))
197+
else:
198+
for dep_file in dep_i.files.to_list():
199+
if dep_file.path.startswith("bazel-out/"):
200+
fail("Please avoid depending on generated .tar directly: " + dep_file.path)
201+
args.add("--tar", dep_file.path)
202+
files += dep_i.files.to_list()
203+
for i in mapping_context.file_deps:
204+
file_dep_set[i] = 1
205+
173206
args.set_param_file_format("flag_per_line")
174207
args.use_param_file("@%s", use_always = False)
175208

@@ -180,8 +213,8 @@ def _pkg_tar_impl(ctx):
180213
args.add("--allow_dups_from_deps")
181214

182215
inputs = depset(
183-
direct = ctx.files.deps + files,
184-
transitive = mapping_context.file_deps,
216+
direct = files,
217+
transitive = list(file_dep_set.keys()),
185218
)
186219

187220
ctx.actions.run(
@@ -212,7 +245,11 @@ def _pkg_tar_impl(ctx):
212245
OutputGroupInfo(
213246
manifest = [manifest_file],
214247
),
215-
]
248+
] + ([
249+
MappingManifestInfo(
250+
manifest = manifest_list,
251+
),
252+
] if does_merge_mappings else [])
216253

217254
# A rule for creating a tar file, see README.md
218255
pkg_tar_impl = rule(
@@ -256,6 +293,10 @@ pkg_tar_impl = rule(
256293
"extension": attr.string(default = "tar"),
257294
"symlinks": attr.string_dict(),
258295
"empty_files": attr.string_list(),
296+
"auto_deduplicate": attr.bool(
297+
doc = """Auto create symlinks for files mapped from a same source in manifests.""",
298+
default = False,
299+
),
259300
"include_runfiles": attr.bool(
260301
doc = ("""Include runfiles for executables. These appear as they would in bazel-bin."""
261302
+ """For example: 'path/to/myprog.runfiles/path/to/my_data.txt'."""),
@@ -272,6 +313,10 @@ pkg_tar_impl = rule(
272313
),
273314
"create_parents": attr.bool(default = True),
274315
"allow_duplicates_from_deps": attr.bool(default = False),
316+
"compresslevel": attr.string(
317+
doc = """Specify the numeric compress level in gzip mode; may be 0-9 or empty (6).""",
318+
default = "",
319+
),
275320

276321
# Common attributes
277322
"out": attr.output(mandatory = True),
@@ -342,3 +387,34 @@ def pkg_tar(name, **kwargs):
342387
}),
343388
**kwargs
344389
)
390+
391+
def _pkg_tar_group_impl(ctx):
392+
manifest_list = []
393+
output_files = []
394+
for i in ctx.attr.srcs:
395+
if MappingManifestInfo in i:
396+
manifest_list += i[MappingManifestInfo].manifest
397+
output_files += i.files.to_list()
398+
if manifest_list and len(manifest_list) < len(output_files):
399+
fail("Can not merge generated tar files and source ones; please split into different groups.")
400+
return [
401+
DefaultInfo(
402+
files = depset(output_files),
403+
),
404+
MappingManifestInfo(
405+
manifest = manifest_list,
406+
),
407+
]
408+
409+
pkg_tar_group = rule(
410+
doc = """Expose a group of source tar files.""",
411+
implementation = _pkg_tar_group_impl,
412+
attrs = {
413+
"srcs": attr.label_list(
414+
doc = """Tar files generated by pkg_tar().""",
415+
mandatory = True,
416+
allow_files = tar_filetype,
417+
),
418+
},
419+
provides = [MappingManifestInfo],
420+
)

pkg/private/tar/tar_writer.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,8 @@ def __init__(self,
4949
create_parents=False,
5050
allow_dups_from_deps=True,
5151
default_mtime=None,
52-
preserve_tar_mtimes=True):
52+
preserve_tar_mtimes=True,
53+
compresslevel=None):
5354
"""TarFileWriter wraps tarfile.open().
5455
5556
Args:
@@ -86,10 +87,11 @@ def __init__(self,
8687
else:
8788
mode = 'w:'
8889
if compression in ['tgz', 'gz']:
90+
compresslevel = int(compresslevel) if compresslevel or compresslevel == 0 else 6
8991
# The Tarfile class doesn't allow us to specify gzip's mtime attribute.
9092
# Instead, we manually reimplement gzopen from tarfile.py and set mtime.
9193
self.fileobj = gzip.GzipFile(
92-
filename=name, mode='w', compresslevel=6, mtime=self.default_mtime)
94+
filename=name, mode='w', compresslevel=compresslevel, mtime=self.default_mtime)
9395
self.compressor_proc = None
9496
if self.compressor_cmd:
9597
mode = 'w|'

pkg/tar.bzl

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
# limitations under the License.
1414
"""Forwarder for pkg_tar."""
1515

16-
load("//pkg/private/tar:tar.bzl", _pkg_tar = "pkg_tar")
16+
load("//pkg/private/tar:tar.bzl", _pkg_tar = "pkg_tar", _pkg_tar_group = "pkg_tar_group")
1717

1818
pkg_tar = _pkg_tar
19+
pkg_tar_group = _pkg_tar_group

0 commit comments

Comments
 (0)