From 437e9470c0730019ff7ed02cccc678c723fc1bc7 Mon Sep 17 00:00:00 2001 From: Anas Sulaiman Date: Fri, 9 Feb 2024 21:06:49 +0000 Subject: [PATCH] Fix non-deterministic output for merge_zips A map iteration was causing a non-deterministic order in output entries. The default behaviour is to preserve the same order of the input zips, which is necessary to ensure a deterministic output for deterministic inputs. Bug: b/322788229 Test: Ran a couple of builds and confirmed no cache misses from the output of merge_zips. Change-Id: I3217e6887ab108d213a290b59da5b33d51b8241f --- cmd/merge_zips/merge_zips.go | 10 ++++++---- cmd/merge_zips/merge_zips_test.go | 31 ++++++++++++++++++++++++++++++- 2 files changed, 36 insertions(+), 5 deletions(-) diff --git a/cmd/merge_zips/merge_zips.go b/cmd/merge_zips/merge_zips.go index 1aa6f6f39..2c5718063 100644 --- a/cmd/merge_zips/merge_zips.go +++ b/cmd/merge_zips/merge_zips.go @@ -342,7 +342,8 @@ func (oz *OutputZip) writeEntries(entries []string) error { func (oz *OutputZip) getUninitializedPythonPackages(inputZips []InputZip) ([]string, error) { // the runfiles packages needs to be populated with "__init__.py". // the runfiles dirs have been treated as packages. - allPackages := make(map[string]bool) + var allPackages []string // Using a slice to preserve input order. + seenPkgs := make(map[string]bool) initedPackages := make(map[string]bool) getPackage := func(path string) string { ret := filepath.Dir(path) @@ -369,16 +370,17 @@ func (oz *OutputZip) getUninitializedPythonPackages(inputZips []InputZip) ([]str initedPackages[pyPkg] = true } for pyPkg != "" { - if _, found := allPackages[pyPkg]; found { + if _, found := seenPkgs[pyPkg]; found { break } - allPackages[pyPkg] = true + seenPkgs[pyPkg] = true + allPackages = append(allPackages, pyPkg) pyPkg = getPackage(pyPkg) } } } noInitPackages := make([]string, 0) - for pyPkg := range allPackages { + for _, pyPkg := range allPackages { if _, found := initedPackages[pyPkg]; !found { noInitPackages = append(noInitPackages, pyPkg) } diff --git a/cmd/merge_zips/merge_zips_test.go b/cmd/merge_zips/merge_zips_test.go index 64b08d018..17228c4e0 100644 --- a/cmd/merge_zips/merge_zips_test.go +++ b/cmd/merge_zips/merge_zips_test.go @@ -103,6 +103,7 @@ func TestMergeZips(t *testing.T) { stripFiles []string stripDirs []string jar bool + par bool sort bool ignoreDuplicates bool stripDirEntries bool @@ -265,6 +266,34 @@ func TestMergeZips(t *testing.T) { }, out: []testZipEntry{withoutTimestamp, a}, }, + { + name: "emulate par", + in: [][]testZipEntry{ + { + testZipEntry{name: "3/main.py"}, + testZipEntry{name: "c/main.py"}, + testZipEntry{name: "a/main.py"}, + testZipEntry{name: "2/main.py"}, + testZipEntry{name: "b/main.py"}, + testZipEntry{name: "1/main.py"}, + }, + }, + out: []testZipEntry{ + testZipEntry{name: "3/__init__.py", mode: 0700, timestamp: jar.DefaultTime}, + testZipEntry{name: "c/__init__.py", mode: 0700, timestamp: jar.DefaultTime}, + testZipEntry{name: "a/__init__.py", mode: 0700, timestamp: jar.DefaultTime}, + testZipEntry{name: "2/__init__.py", mode: 0700, timestamp: jar.DefaultTime}, + testZipEntry{name: "b/__init__.py", mode: 0700, timestamp: jar.DefaultTime}, + testZipEntry{name: "1/__init__.py", mode: 0700, timestamp: jar.DefaultTime}, + testZipEntry{name: "3/main.py", timestamp: jar.DefaultTime}, + testZipEntry{name: "c/main.py", timestamp: jar.DefaultTime}, + testZipEntry{name: "a/main.py", timestamp: jar.DefaultTime}, + testZipEntry{name: "2/main.py", timestamp: jar.DefaultTime}, + testZipEntry{name: "b/main.py", timestamp: jar.DefaultTime}, + testZipEntry{name: "1/main.py", timestamp: jar.DefaultTime}, + }, + par: true, + }, } for _, test := range testCases { @@ -280,7 +309,7 @@ func TestMergeZips(t *testing.T) { writer := zip.NewWriter(out) err := mergeZips(inputZips, writer, "", "", - test.sort, test.jar, false, test.stripDirEntries, test.ignoreDuplicates, + test.sort, test.jar, test.par, test.stripDirEntries, test.ignoreDuplicates, test.stripFiles, test.stripDirs, test.zipsToNotStrip) closeErr := writer.Close()