diff --git a/pkg/private/tar/build_tar.py b/pkg/private/tar/build_tar.py index ce80a9fe..ce531649 100644 --- a/pkg/private/tar/build_tar.py +++ b/pkg/private/tar/build_tar.py @@ -42,7 +42,8 @@ class TarFile(object): class DebError(Exception): pass - def __init__(self, output, directory, compression, compressor, create_parents, allow_dups_from_deps, default_mtime): + def __init__(self, output, directory, compression, compressor, create_parents, + allow_dups_from_deps, auto_deduplicate, default_mtime, compresslevel=None): # Directory prefix on all output paths d = directory.strip('/') self.directory = (d + '/') if d else None @@ -52,6 +53,8 @@ def __init__(self, output, directory, compression, compressor, create_parents, a self.default_mtime = default_mtime self.create_parents = create_parents self.allow_dups_from_deps = allow_dups_from_deps + self.compresslevel = compresslevel + self.src_to_first_dest_map = {} if auto_deduplicate else None def __enter__(self): self.tarfile = tar_writer.TarFileWriter( @@ -60,7 +63,8 @@ def __enter__(self): self.compressor, self.create_parents, self.allow_dups_from_deps, - default_mtime=self.default_mtime) + default_mtime=self.default_mtime, + compresslevel=self.compresslevel) return self def __exit__(self, t, v, traceback): @@ -98,6 +102,12 @@ def add_file(self, f, destfile, mode=None, ids=None, names=None): copied to `self.directory/destfile` in the layer. """ dest = self.normalize_path(destfile) + if self.src_to_first_dest_map is not None: + normalized_src = normpath(f) + relative_path_to_link_to = self.auto_deduplicate(normalized_src, dest) + if relative_path_to_link_to: + self.add_link(dest, relative_path_to_link_to, mode=mode, ids=ids, names=names) + return # If mode is unspecified, derive the mode from the file's mode. if mode is None: mode = 0o755 if os.access(f, os.X_OK) else 0o644 @@ -114,6 +124,23 @@ def add_file(self, f, destfile, mode=None, ids=None, names=None): uname=names[0], gname=names[1]) + def auto_deduplicate(self, src_file, dest_file): + """Detect whether to de-duplicate the destination file + + Returns: + The relative path to create a symlink to or None + """ + if self.src_to_first_dest_map is not None: + first_dest = self.src_to_first_dest_map.get(src_file) + if first_dest is None: + real_src_file = os.path.realpath(src_file) + first_dest = self.src_to_first_dest_map.setdefault(real_src_file, dest_file) + self.src_to_first_dest_map[src_file] = first_dest + if first_dest != dest_file: + return os.path.relpath(first_dest, os.path.dirname(dest_file)) + return None + + def add_empty_file(self, destfile, mode=None, @@ -269,13 +296,13 @@ def add_tree(self, tree_top, destpath, mode=None, ids=None, names=None): for dir in dirs: to_write[dest_dir + dir] = None for file in sorted(files): - content_path = os.path.abspath(os.path.join(root, file)) + content_path = os.path.join(root, file) if os.name == "nt": # "To specify an extended-length path, use the `\\?\` prefix. For # example, `\\?\D:\very long path`."[1] # # [1]: https://learn.microsoft.com/en-us/windows/win32/fileio/maximum-file-path-limitation - to_write[dest_dir + file] = "\\\\?\\" + content_path + to_write[dest_dir + file] = "\\\\?\\" + os.path.abspath(content_path) else: to_write[dest_dir + file] = content_path @@ -297,6 +324,10 @@ def add_tree(self, tree_top, destpath, mode=None, ids=None, names=None): f_mode = 0o755 if os.access(content_path, os.X_OK) else 0o644 else: f_mode = mode + relative_path_to_link_to = self.auto_deduplicate(content_path, dest) + if relative_path_to_link_to: + self.add_link(dest, relative_path_to_link_to, mode=f_mode, ids=ids, names=names) + continue self.tarfile.add_file( path, file_content=content_path, @@ -345,7 +376,7 @@ def main(): fromfile_prefix_chars='@') parser.add_argument('--output', required=True, help='The output file, mandatory.') - parser.add_argument('--manifest', + parser.add_argument('--manifest', action='append', help='manifest of contents to add to the layer.') parser.add_argument('--mode', help='Force the mode on the added files (in octal).') @@ -359,7 +390,7 @@ def main(): parser.add_argument('--deb', action='append', help='A debian package to add to the layer') parser.add_argument( - '--directory', + '--directory', action='append', help='Directory in which to store the file inside the layer') compression = parser.add_mutually_exclusive_group() @@ -397,6 +428,12 @@ def main(): parser.add_argument('--allow_dups_from_deps', action='store_true', help='') + parser.add_argument('--auto_deduplicate', + action='store_true', + help='Auto create symlinks for files mapped from a same source in manifests.') + parser.add_argument( + '--compresslevel', default='', + help='Specify the numeric compress level in gzip mode; may be 0-9 or empty(6).') options = parser.parse_args() # Parse modes arguments @@ -443,12 +480,14 @@ def main(): # Add objects to the tar file with TarFile( options.output, - directory = helpers.GetFlagValue(options.directory), + directory = helpers.GetFlagValue(options.directory[0]), compression = options.compression, compressor = options.compressor, default_mtime=default_mtime, create_parents=options.create_parents, - allow_dups_from_deps=options.allow_dups_from_deps) as output: + allow_dups_from_deps=options.allow_dups_from_deps, + auto_deduplicate=options.auto_deduplicate, + compresslevel = options.compresslevel) as output: def file_attributes(filename): if filename.startswith('/'): @@ -459,12 +498,18 @@ def file_attributes(filename): 'names': names_map.get(filename, default_ownername), } - if options.manifest: - with open(options.manifest, 'r') as manifest_fp: + formatted_first_directory = output.directory + manifest_list = zip(options.directory, options.manifest) + if options.auto_deduplicate: + manifest_list = list(manifest_list)[::-1] + for directory, manifest_path in manifest_list: + output.directory = (directory.strip('/') + '/') if directory.strip('/') else None + with open(manifest_path, 'r') as manifest_fp: manifest_entries = manifest.read_entries_from(manifest_fp) for entry in manifest_entries: output.add_manifest_entry(entry, file_attributes) + output.directory = formatted_first_directory for tar in options.tar or []: output.add_tar(tar) for deb in options.deb or []: diff --git a/pkg/private/tar/tar.bzl b/pkg/private/tar/tar.bzl index 120820a8..30528317 100644 --- a/pkg/private/tar/tar.bzl +++ b/pkg/private/tar/tar.bzl @@ -40,6 +40,13 @@ SUPPORTED_TAR_COMPRESSIONS = ( _DEFAULT_MTIME = -1 _stamp_condition = Label("//pkg/private:private_stamp_detect") +MappingManifestInfo = provider( + "Path mapping to pack files", + fields = { + "manifest": "a list of (expanded package_dir, manfiest files, deps).", + }, +) + def _remap(remap_paths, path): """If path starts with a key in remap_paths, rewrite it.""" for prefix, replacement in remap_paths.items(): @@ -71,6 +78,7 @@ def _pkg_tar_impl(ctx): fail("Both package_dir and package_dir_file attributes were specified") args.add("--directory", "@" + ctx.file.package_dir_file.path) files.append(ctx.file.package_dir_file) + package_dir_expanded = None else: package_dir_expanded = substitute_package_variables(ctx, ctx.attr.package_dir) args.add("--directory", package_dir_expanded or "/") @@ -114,6 +122,10 @@ def _pkg_tar_impl(ctx): "--owner_names", "%s=%s" % (_quote(key), ctx.attr.ownernames[key]), ) + if ctx.attr.compresslevel: + args.add("--compresslevel", ctx.attr.compresslevel) + if ctx.attr.auto_deduplicate: + args.add("--auto_deduplicate") # Now we begin processing the files. path_mapper = None @@ -151,8 +163,6 @@ def _pkg_tar_impl(ctx): add_empty_file(mapping_context, empty_file, ctx.label) for empty_dir in ctx.attr.empty_dirs or []: add_directory(mapping_context, empty_dir, ctx.label) - for f in ctx.files.deps: - args.add("--tar", f.path) for link in ctx.attr.symlinks: add_symlink( mapping_context, @@ -170,6 +180,29 @@ def _pkg_tar_impl(ctx): write_manifest(ctx, manifest_file, mapping_context.content_map) args.add("--manifest", manifest_file.path) + does_merge_mappings = not ctx.attr.package_dir_file + new_dir_prefix = package_dir_expanded + "/" if package_dir_expanded else "" + manifest_list = [(package_dir_expanded, manifest_file, mapping_context.file_deps)] + file_dep_set = {} + for dep_i in ctx.attr.deps: + if does_merge_mappings and (MappingManifestInfo in dep_i): + for i_dir, i_manifest, i_file_deps in dep_i[MappingManifestInfo].manifest: + i_dir = new_dir_prefix + (i_dir or "") + args.add("--directory", i_dir) + args.add("--manifest", i_manifest.path) + files.append(i_manifest) + for i in i_file_deps: + file_dep_set[i] = 1 + manifest_list.append((i_dir, i_manifest, i_file_deps)) + else: + for dep_file in dep_i.files.to_list(): + if dep_file.path.startswith("bazel-out/"): + fail("Please avoid depending on generated .tar directly: " + dep_file.path) + args.add("--tar", dep_file.path) + files += dep_i.files.to_list() + for i in mapping_context.file_deps: + file_dep_set[i] = 1 + args.set_param_file_format("flag_per_line") args.use_param_file("@%s", use_always = False) @@ -180,8 +213,8 @@ def _pkg_tar_impl(ctx): args.add("--allow_dups_from_deps") inputs = depset( - direct = ctx.files.deps + files, - transitive = mapping_context.file_deps, + direct = files, + transitive = list(file_dep_set.keys()), ) ctx.actions.run( @@ -212,7 +245,11 @@ def _pkg_tar_impl(ctx): OutputGroupInfo( manifest = [manifest_file], ), - ] + ] + ([ + MappingManifestInfo( + manifest = manifest_list, + ), + ] if does_merge_mappings else []) # A rule for creating a tar file, see README.md pkg_tar_impl = rule( @@ -256,6 +293,10 @@ pkg_tar_impl = rule( "extension": attr.string(default = "tar"), "symlinks": attr.string_dict(), "empty_files": attr.string_list(), + "auto_deduplicate": attr.bool( + doc = """Auto create symlinks for files mapped from a same source in manifests.""", + default = False, + ), "include_runfiles": attr.bool( doc = ("""Include runfiles for executables. These appear as they would in bazel-bin.""" + """For example: 'path/to/myprog.runfiles/path/to/my_data.txt'."""), @@ -272,6 +313,10 @@ pkg_tar_impl = rule( ), "create_parents": attr.bool(default = True), "allow_duplicates_from_deps": attr.bool(default = False), + "compresslevel": attr.string( + doc = """Specify the numeric compress level in gzip mode; may be 0-9 or empty (6).""", + default = "", + ), # Common attributes "out": attr.output(mandatory = True), @@ -342,3 +387,34 @@ def pkg_tar(name, **kwargs): }), **kwargs ) + +def _pkg_tar_group_impl(ctx): + manifest_list = [] + output_files = [] + for i in ctx.attr.srcs: + if MappingManifestInfo in i: + manifest_list += i[MappingManifestInfo].manifest + output_files += i.files.to_list() + if manifest_list and len(manifest_list) < len(output_files): + fail("Can not merge generated tar files and source ones; please split into different groups.") + return [ + DefaultInfo( + files = depset(output_files), + ), + MappingManifestInfo( + manifest = manifest_list, + ), + ] + +pkg_tar_group = rule( + doc = """Expose a group of source tar files.""", + implementation = _pkg_tar_group_impl, + attrs = { + "srcs": attr.label_list( + doc = """Tar files generated by pkg_tar().""", + mandatory = True, + allow_files = tar_filetype, + ), + }, + provides = [MappingManifestInfo], +) diff --git a/pkg/private/tar/tar_writer.py b/pkg/private/tar/tar_writer.py index cee232da..3e740c3e 100644 --- a/pkg/private/tar/tar_writer.py +++ b/pkg/private/tar/tar_writer.py @@ -49,7 +49,8 @@ def __init__(self, create_parents=False, allow_dups_from_deps=True, default_mtime=None, - preserve_tar_mtimes=True): + preserve_tar_mtimes=True, + compresslevel=None): """TarFileWriter wraps tarfile.open(). Args: @@ -86,10 +87,11 @@ def __init__(self, else: mode = 'w:' if compression in ['tgz', 'gz']: + compresslevel = int(compresslevel) if compresslevel or compresslevel == 0 else 6 # The Tarfile class doesn't allow us to specify gzip's mtime attribute. # Instead, we manually reimplement gzopen from tarfile.py and set mtime. self.fileobj = gzip.GzipFile( - filename=name, mode='w', compresslevel=6, mtime=self.default_mtime) + filename=name, mode='w', compresslevel=compresslevel, mtime=self.default_mtime) self.compressor_proc = None if self.compressor_cmd: mode = 'w|' diff --git a/pkg/tar.bzl b/pkg/tar.bzl index 697f95a7..e586b824 100644 --- a/pkg/tar.bzl +++ b/pkg/tar.bzl @@ -13,6 +13,7 @@ # limitations under the License. """Forwarder for pkg_tar.""" -load("//pkg/private/tar:tar.bzl", _pkg_tar = "pkg_tar") +load("//pkg/private/tar:tar.bzl", _pkg_tar = "pkg_tar", _pkg_tar_group = "pkg_tar_group") pkg_tar = _pkg_tar +pkg_tar_group = _pkg_tar_group