From 0bf992ee0595727b4a2c563fd5023287727dd8b3 Mon Sep 17 00:00:00 2001 From: Kelvin Zhang Date: Mon, 5 Jun 2023 09:58:16 -0700 Subject: [PATCH 1/4] Use python based unzip function for portability Bug: 283033491 Test: th (cherry picked from https://android-review.googlesource.com/q/commit:7c9205b008002c41880241e866ad4b5438112439) Merged-In: Ief86b55c1d4a14220a0fb593c583a721d59cf86c Change-Id: Ief86b55c1d4a14220a0fb593c583a721d59cf86c --- tools/releasetools/common.py | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/tools/releasetools/common.py b/tools/releasetools/common.py index b50caaa95e..420be97efd 100644 --- a/tools/releasetools/common.py +++ b/tools/releasetools/common.py @@ -2112,20 +2112,19 @@ def UnzipToDir(filename, dirname, patterns=None): archvie. Non-matching patterns will be filtered out. If there's no match after the filtering, no file will be unzipped. """ - cmd = ["unzip", "-o", "-q", filename, "-d", dirname] - if patterns is not None: + with zipfile.ZipFile(filename, allowZip64=True, mode="r") as input_zip: # Filter out non-matching patterns. unzip will complain otherwise. - with zipfile.ZipFile(filename, allowZip64=True) as input_zip: + if patterns is not None: names = input_zip.namelist() - filtered = [ - pattern for pattern in patterns if fnmatch.filter(names, pattern)] + filtered = [name for name in names if any( + [fnmatch.fnmatch(name, p) for p in patterns])] - # There isn't any matching files. Don't unzip anything. - if not filtered: - return - cmd.extend(filtered) - - RunAndCheckOutput(cmd) + # There isn't any matching files. Don't unzip anything. + if not filtered: + return + input_zip.extractall(dirname, filtered) + else: + input_zip.extractall(dirname) def UnzipTemp(filename, patterns=None): From 3c739f18475a9d31be299f3016dddc6a5ac02e16 Mon Sep 17 00:00:00 2001 From: Kelvin Zhang Date: Wed, 14 Jun 2023 12:53:29 -0700 Subject: [PATCH 2/4] Fix python3.11's support for zip64 Bug: 283033491 Test: check_target_files_signatures -v -l (cherry picked from https://android-review.googlesource.com/q/commit:38d0c373ac9e0f00c6e677c41bbc85f0e364ba02) Merged-In: I9c1a5346e3a5f3920242dc9a5268d999f50a4937 Change-Id: I9c1a5346e3a5f3920242dc9a5268d999f50a4937 --- tools/releasetools/common.py | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/tools/releasetools/common.py b/tools/releasetools/common.py index 420be97efd..f2e77756b0 100644 --- a/tools/releasetools/common.py +++ b/tools/releasetools/common.py @@ -2114,17 +2114,28 @@ def UnzipToDir(filename, dirname, patterns=None): """ with zipfile.ZipFile(filename, allowZip64=True, mode="r") as input_zip: # Filter out non-matching patterns. unzip will complain otherwise. + entries = input_zip.infolist() + # b/283033491 + # Per https://en.wikipedia.org/wiki/ZIP_(file_format)#Central_directory_file_header + # In zip64 mode, central directory record's header_offset field might be + # set to 0xFFFFFFFF if header offset is > 2^32. In this case, the extra + # fields will contain an 8 byte little endian integer at offset 20 + # to indicate the actual local header offset. + # As of python3.11, python does not handle zip64 central directories + # correctly, so we will manually do the parsing here. + for entry in entries: + if entry.header_offset == 0xFFFFFFFF and len(entry.extra) >= 28: + entry.header_offset = int.from_bytes(entry.extra[20:28], "little") if patterns is not None: - names = input_zip.namelist() - filtered = [name for name in names if any( - [fnmatch.fnmatch(name, p) for p in patterns])] + filtered = [info for info in entries if any( + [fnmatch.fnmatch(info.filename, p) for p in patterns])] # There isn't any matching files. Don't unzip anything. if not filtered: return input_zip.extractall(dirname, filtered) else: - input_zip.extractall(dirname) + input_zip.extractall(dirname, entries) def UnzipTemp(filename, patterns=None): From e66c973a36339484e8fa59c37c9005badca62a5d Mon Sep 17 00:00:00 2001 From: Kelvin Zhang Date: Sat, 17 Jun 2023 09:18:15 -0700 Subject: [PATCH 3/4] Handle zip64 extra fields better Test: check_target_files_signatures Bug: 283033491 (cherry picked from https://android-review.googlesource.com/q/commit:1e774245a4dd2763545827d65462e5c115eecb63) Merged-In: I7da89f8389c09cc99201cff342483c158bd7e9c1 Change-Id: I7da89f8389c09cc99201cff342483c158bd7e9c1 --- .../check_target_files_signatures.py | 5 +++-- tools/releasetools/common.py | 18 ++++++++++++++++-- 2 files changed, 19 insertions(+), 4 deletions(-) diff --git a/tools/releasetools/check_target_files_signatures.py b/tools/releasetools/check_target_files_signatures.py index d935607e43..a7b35230ea 100755 --- a/tools/releasetools/check_target_files_signatures.py +++ b/tools/releasetools/check_target_files_signatures.py @@ -241,7 +241,8 @@ class APK(object): # Signer (minSdkVersion=24, maxSdkVersion=32) certificate SHA-1 digest: 19da94896ce4078c38ca695701f1dec741ec6d67 # ... certs_info = {} - certificate_regex = re.compile(r"(Signer (?:#[0-9]+|\(.*\))) (certificate .*):(.*)") + certificate_regex = re.compile( + r"(Signer (?:#[0-9]+|\(.*\))) (certificate .*):(.*)") for line in output.splitlines(): m = certificate_regex.match(line) if not m: @@ -312,7 +313,7 @@ class TargetFiles(object): # This is the list of wildcards of files we extract from |filename|. apk_extensions = ['*.apk', '*.apex'] - with zipfile.ZipFile(filename) as input_zip: + with zipfile.ZipFile(filename, "r") as input_zip: self.certmap, compressed_extension = common.ReadApkCerts(input_zip) if compressed_extension: apk_extensions.append('*.apk' + compressed_extension) diff --git a/tools/releasetools/common.py b/tools/releasetools/common.py index f2e77756b0..477bf4ea9d 100644 --- a/tools/releasetools/common.py +++ b/tools/releasetools/common.py @@ -2123,9 +2123,23 @@ def UnzipToDir(filename, dirname, patterns=None): # to indicate the actual local header offset. # As of python3.11, python does not handle zip64 central directories # correctly, so we will manually do the parsing here. + + # ZIP64 central directory extra field has two required fields: + # 2 bytes header ID and 2 bytes size field. Thes two require fields have + # a total size of 4 bytes. Then it has three other 8 bytes field, followed + # by a 4 byte disk number field. The last disk number field is not required + # to be present, but if it is present, the total size of extra field will be + # divisible by 8(because 2+2+4+8*n is always going to be multiple of 8) + # Most extra fields are optional, but when they appear, their must appear + # in the order defined by zip64 spec. Since file header offset is the 2nd + # to last field in zip64 spec, it will only be at last 8 bytes or last 12-4 + # bytes, depending on whether disk number is present. for entry in entries: - if entry.header_offset == 0xFFFFFFFF and len(entry.extra) >= 28: - entry.header_offset = int.from_bytes(entry.extra[20:28], "little") + if entry.header_offset == 0xFFFFFFFF: + if len(entry.extra) % 8 == 0: + entry.header_offset = int.from_bytes(entry.extra[-12:-4], "little") + else: + entry.header_offset = int.from_bytes(entry.extra[-8:], "little") if patterns is not None: filtered = [info for info in entries if any( [fnmatch.fnmatch(info.filename, p) for p in patterns])] From 2b2de0375faf9e23094f6238c38f9a49906b1882 Mon Sep 17 00:00:00 2001 From: Kelvin Zhang Date: Wed, 21 Jun 2023 13:06:59 -0700 Subject: [PATCH 4/4] Handle symlinks when extracting zipfiles python3.11's zipfile implementation does not handle symlinks. This causes important symlinks in ramdisk to be broken, and later causing a boo failure. Test: unzip a target files with symlinks, make sure symlinks are created Bug: 287896098 Merged-In: I7da89f8389c09cc99201cff342483c158bd7e9c1 Change-Id: Ia7d6ac8ffb03807680a36ff648aa11afafb7f481 --- tools/releasetools/common.py | 27 +++++++++++++++++++++++++-- 1 file changed, 25 insertions(+), 2 deletions(-) diff --git a/tools/releasetools/common.py b/tools/releasetools/common.py index 477bf4ea9d..abedecfa76 100644 --- a/tools/releasetools/common.py +++ b/tools/releasetools/common.py @@ -35,6 +35,7 @@ import shlex import shutil import subprocess import sys +import stat import tempfile import threading import time @@ -2102,6 +2103,26 @@ def Gunzip(in_filename, out_filename): shutil.copyfileobj(in_file, out_file) +def UnzipSingleFile(input_zip: zipfile.ZipFile, info: zipfile.ZipInfo, dirname: str): + # According to https://stackoverflow.com/questions/434641/how-do-i-set-permissions-attributes-on-a-file-in-a-zip-file-using-pythons-zip/6297838#6297838 + # higher bits of |external_attr| are unix file permission and types + unix_filetype = info.external_attr >> 16 + + def CheckMask(a, mask): + return (a & mask) == mask + + def IsSymlink(a): + return CheckMask(a, stat.S_IFLNK) + # python3.11 zipfile implementation doesn't handle symlink correctly + if not IsSymlink(unix_filetype): + return input_zip.extract(info, dirname) + if dirname is None: + dirname = os.getcwd() + target = os.path.join(dirname, info.filename) + os.makedirs(os.path.dirname(target), exist_ok=True) + os.symlink(input_zip.read(info).decode(), target) + + def UnzipToDir(filename, dirname, patterns=None): """Unzips the archive to the given directory. @@ -2147,9 +2168,11 @@ def UnzipToDir(filename, dirname, patterns=None): # There isn't any matching files. Don't unzip anything. if not filtered: return - input_zip.extractall(dirname, filtered) + for info in filtered: + UnzipSingleFile(input_zip, info, dirname) else: - input_zip.extractall(dirname, entries) + for info in entries: + UnzipSingleFile(input_zip, info, dirname) def UnzipTemp(filename, patterns=None):