Handle aquery build statements in a goroutine

Creation of build statements is largely parallelizable because each
action is independent apart from updates/reads to
depsetHashToArtifactPathsCache. Locally resulted in build statements
taking ~.45 seconds on staging mode to ~.02 seconds

Test: CI
Change-Id: Iab00c8394a9eab17353f71230885ff0870e17f24
This commit is contained in:
Liz Kammer
2023-02-10 17:17:28 -05:00
parent 00629db646
commit a4655a96c0
3 changed files with 88 additions and 49 deletions

View File

@@ -188,7 +188,7 @@ type BazelContext interface {
OutputBase() string OutputBase() string
// Returns build statements which should get registered to reflect Bazel's outputs. // Returns build statements which should get registered to reflect Bazel's outputs.
BuildStatementsToRegister() []bazel.BuildStatement BuildStatementsToRegister() []*bazel.BuildStatement
// Returns the depsets defined in Bazel's aquery response. // Returns the depsets defined in Bazel's aquery response.
AqueryDepsets() []bazel.AqueryDepset AqueryDepsets() []bazel.AqueryDepset
@@ -222,7 +222,7 @@ type mixedBuildBazelContext struct {
results map[cqueryKey]string // Results of cquery requests after Bazel invocations results map[cqueryKey]string // Results of cquery requests after Bazel invocations
// Build statements which should get registered to reflect Bazel's outputs. // Build statements which should get registered to reflect Bazel's outputs.
buildStatements []bazel.BuildStatement buildStatements []*bazel.BuildStatement
// Depsets which should be used for Bazel's build statements. // Depsets which should be used for Bazel's build statements.
depsets []bazel.AqueryDepset depsets []bazel.AqueryDepset
@@ -314,8 +314,8 @@ func (m MockBazelContext) IsModuleNameAllowed(_ string) bool {
func (m MockBazelContext) OutputBase() string { return m.OutputBaseDir } func (m MockBazelContext) OutputBase() string { return m.OutputBaseDir }
func (m MockBazelContext) BuildStatementsToRegister() []bazel.BuildStatement { func (m MockBazelContext) BuildStatementsToRegister() []*bazel.BuildStatement {
return []bazel.BuildStatement{} return []*bazel.BuildStatement{}
} }
func (m MockBazelContext) AqueryDepsets() []bazel.AqueryDepset { func (m MockBazelContext) AqueryDepsets() []bazel.AqueryDepset {
@@ -434,8 +434,8 @@ func (n noopBazelContext) IsModuleNameAllowed(_ string) bool {
return false return false
} }
func (m noopBazelContext) BuildStatementsToRegister() []bazel.BuildStatement { func (m noopBazelContext) BuildStatementsToRegister() []*bazel.BuildStatement {
return []bazel.BuildStatement{} return []*bazel.BuildStatement{}
} }
func (m noopBazelContext) AqueryDepsets() []bazel.AqueryDepset { func (m noopBazelContext) AqueryDepsets() []bazel.AqueryDepset {
@@ -1128,7 +1128,7 @@ func (context *mixedBuildBazelContext) generateBazelSymlinks(config Config, ctx
return err return err
} }
func (context *mixedBuildBazelContext) BuildStatementsToRegister() []bazel.BuildStatement { func (context *mixedBuildBazelContext) BuildStatementsToRegister() []*bazel.BuildStatement {
return context.buildStatements return context.buildStatements
} }
@@ -1196,6 +1196,11 @@ func (c *bazelSingleton) GenerateBuildActions(ctx SingletonContext) {
executionRoot := path.Join(ctx.Config().BazelContext.OutputBase(), "execroot", "__main__") executionRoot := path.Join(ctx.Config().BazelContext.OutputBase(), "execroot", "__main__")
bazelOutDir := path.Join(executionRoot, "bazel-out") bazelOutDir := path.Join(executionRoot, "bazel-out")
for index, buildStatement := range ctx.Config().BazelContext.BuildStatementsToRegister() { for index, buildStatement := range ctx.Config().BazelContext.BuildStatementsToRegister() {
// nil build statements are a valid case where we do not create an action because it is
// unnecessary or handled by other processing
if buildStatement == nil {
continue
}
if len(buildStatement.Command) > 0 { if len(buildStatement.Command) > 0 {
rule := NewRuleBuilder(pctx, ctx) rule := NewRuleBuilder(pctx, ctx)
createCommand(rule.Command(), buildStatement, executionRoot, bazelOutDir, ctx) createCommand(rule.Command(), buildStatement, executionRoot, bazelOutDir, ctx)
@@ -1240,7 +1245,7 @@ func (c *bazelSingleton) GenerateBuildActions(ctx SingletonContext) {
} }
// Register bazel-owned build statements (obtained from the aquery invocation). // Register bazel-owned build statements (obtained from the aquery invocation).
func createCommand(cmd *RuleBuilderCommand, buildStatement bazel.BuildStatement, executionRoot string, bazelOutDir string, ctx BuilderContext) { func createCommand(cmd *RuleBuilderCommand, buildStatement *bazel.BuildStatement, executionRoot string, bazelOutDir string, ctx BuilderContext) {
// executionRoot is the action cwd. // executionRoot is the action cwd.
cmd.Text(fmt.Sprintf("cd '%s' &&", executionRoot)) cmd.Text(fmt.Sprintf("cd '%s' &&", executionRoot))

View File

@@ -22,6 +22,7 @@ import (
"reflect" "reflect"
"sort" "sort"
"strings" "strings"
"sync"
analysis_v2_proto "prebuilts/bazel/common/proto/analysis_v2" analysis_v2_proto "prebuilts/bazel/common/proto/analysis_v2"
@@ -130,7 +131,7 @@ type aqueryArtifactHandler struct {
// depsetIdToArtifactIdsCache is a memoization of depset flattening, because flattening // depsetIdToArtifactIdsCache is a memoization of depset flattening, because flattening
// may be an expensive operation. // may be an expensive operation.
depsetHashToArtifactPathsCache map[string][]string depsetHashToArtifactPathsCache sync.Map
// Maps artifact ids to fully expanded paths. // Maps artifact ids to fully expanded paths.
artifactIdToPath map[artifactId]string artifactIdToPath map[artifactId]string
} }
@@ -192,7 +193,7 @@ func newAqueryHandler(aqueryResult *analysis_v2_proto.ActionGraphContainer) (*aq
aqueryHandler := aqueryArtifactHandler{ aqueryHandler := aqueryArtifactHandler{
depsetIdToAqueryDepset: map[depsetId]AqueryDepset{}, depsetIdToAqueryDepset: map[depsetId]AqueryDepset{},
depsetHashToAqueryDepset: map[string]AqueryDepset{}, depsetHashToAqueryDepset: map[string]AqueryDepset{},
depsetHashToArtifactPathsCache: map[string][]string{}, depsetHashToArtifactPathsCache: sync.Map{},
emptyDepsetIds: make(map[depsetId]struct{}, 0), emptyDepsetIds: make(map[depsetId]struct{}, 0),
artifactIdToPath: artifactIdToPath, artifactIdToPath: artifactIdToPath,
} }
@@ -297,8 +298,8 @@ func (a *aqueryArtifactHandler) getInputPaths(depsetIds []uint32) ([]string, err
} }
func (a *aqueryArtifactHandler) artifactPathsFromDepsetHash(depsetHash string) ([]string, error) { func (a *aqueryArtifactHandler) artifactPathsFromDepsetHash(depsetHash string) ([]string, error) {
if result, exists := a.depsetHashToArtifactPathsCache[depsetHash]; exists { if result, exists := a.depsetHashToArtifactPathsCache.Load(depsetHash); exists {
return result, nil return result.([]string), nil
} }
if depset, exists := a.depsetHashToAqueryDepset[depsetHash]; exists { if depset, exists := a.depsetHashToAqueryDepset[depsetHash]; exists {
result := depset.DirectArtifacts result := depset.DirectArtifacts
@@ -309,7 +310,7 @@ func (a *aqueryArtifactHandler) artifactPathsFromDepsetHash(depsetHash string) (
} }
result = append(result, childArtifactIds...) result = append(result, childArtifactIds...)
} }
a.depsetHashToArtifactPathsCache[depsetHash] = result a.depsetHashToArtifactPathsCache.Store(depsetHash, result)
return result, nil return result, nil
} else { } else {
return nil, fmt.Errorf("undefined input depset hash %s", depsetHash) return nil, fmt.Errorf("undefined input depset hash %s", depsetHash)
@@ -321,7 +322,7 @@ func (a *aqueryArtifactHandler) artifactPathsFromDepsetHash(depsetHash string) (
// action graph, as described by the given action graph json proto. // action graph, as described by the given action graph json proto.
// BuildStatements are one-to-one with actions in the given action graph, and AqueryDepsets // BuildStatements are one-to-one with actions in the given action graph, and AqueryDepsets
// are one-to-one with Bazel's depSetOfFiles objects. // are one-to-one with Bazel's depSetOfFiles objects.
func AqueryBuildStatements(aqueryJsonProto []byte, eventHandler *metrics.EventHandler) ([]BuildStatement, []AqueryDepset, error) { func AqueryBuildStatements(aqueryJsonProto []byte, eventHandler *metrics.EventHandler) ([]*BuildStatement, []AqueryDepset, error) {
aqueryProto := &analysis_v2_proto.ActionGraphContainer{} aqueryProto := &analysis_v2_proto.ActionGraphContainer{}
err := proto.Unmarshal(aqueryJsonProto, aqueryProto) err := proto.Unmarshal(aqueryJsonProto, aqueryProto)
if err != nil { if err != nil {
@@ -338,21 +339,35 @@ func AqueryBuildStatements(aqueryJsonProto []byte, eventHandler *metrics.EventHa
} }
} }
buildStatements := make([]BuildStatement, 0, len(aqueryProto.Actions)) // allocate both length and capacity so each goroutine can write to an index independently without
// any need for synchronization for slice access.
buildStatements := make([]*BuildStatement, len(aqueryProto.Actions))
{ {
eventHandler.Begin("build_statements") eventHandler.Begin("build_statements")
defer eventHandler.End("build_statements") defer eventHandler.End("build_statements")
var buildStatement *BuildStatement wg := sync.WaitGroup{}
for _, actionEntry := range aqueryProto.Actions { var errOnce sync.Once
buildStatement, err = aqueryHandler.actionToBuildStatement(actionEntry)
if err != nil { for i, actionEntry := range aqueryProto.Actions {
return nil, nil, err wg.Add(1)
} go func(i int, actionEntry *analysis_v2_proto.Action) {
if buildStatement == nil { buildStatement, aErr := aqueryHandler.actionToBuildStatement(actionEntry)
continue if aErr != nil {
} errOnce.Do(func() {
buildStatements = append(buildStatements, *buildStatement) err = aErr
})
} else {
// set build statement at an index rather than appending such that each goroutine does not
// impact other goroutines
buildStatements[i] = buildStatement
}
wg.Done()
}(i, actionEntry)
} }
wg.Wait()
}
if err != nil {
return nil, nil, err
} }
depsetsByHash := map[string]AqueryDepset{} depsetsByHash := map[string]AqueryDepset{}
@@ -379,7 +394,13 @@ func AqueryBuildStatements(aqueryJsonProto []byte, eventHandler *metrics.EventHa
// output). Note they are not sorted by their original IDs nor their Bazel ordering, // output). Note they are not sorted by their original IDs nor their Bazel ordering,
// as Bazel gives nondeterministic ordering / identifiers in aquery responses. // as Bazel gives nondeterministic ordering / identifiers in aquery responses.
sort.Slice(buildStatements, func(i, j int) bool { sort.Slice(buildStatements, func(i, j int) bool {
// For build statements, compare output lists. In Bazel, each output file // Sort all nil statements to the end of the slice
if buildStatements[i] == nil {
return false
} else if buildStatements[j] == nil {
return true
}
//For build statements, compare output lists. In Bazel, each output file
// may only have one action which generates it, so this will provide // may only have one action which generates it, so this will provide
// a deterministic ordering. // a deterministic ordering.
outputs_i := buildStatements[i].OutputPaths outputs_i := buildStatements[i].OutputPaths

View File

@@ -139,10 +139,10 @@ func TestAqueryMultiArchGenrule(t *testing.T) {
return return
} }
actualbuildStatements, actualDepsets, _ := AqueryBuildStatements(data, &metrics.EventHandler{}) actualbuildStatements, actualDepsets, _ := AqueryBuildStatements(data, &metrics.EventHandler{})
var expectedBuildStatements []BuildStatement var expectedBuildStatements []*BuildStatement
for _, arch := range []string{"arm", "arm64", "x86", "x86_64"} { for _, arch := range []string{"arm", "arm64", "x86", "x86_64"} {
expectedBuildStatements = append(expectedBuildStatements, expectedBuildStatements = append(expectedBuildStatements,
BuildStatement{ &BuildStatement{
Command: fmt.Sprintf( Command: fmt.Sprintf(
"/bin/bash -c 'source ../bazel_tools/tools/genrule/genrule-setup.sh; ../sourceroot/bionic/libc/tools/gensyscalls.py %s ../sourceroot/bionic/libc/SYSCALLS.TXT > bazel-out/sourceroot/k8-fastbuild/bin/bionic/libc/syscalls-%s.S'", "/bin/bash -c 'source ../bazel_tools/tools/genrule/genrule-setup.sh; ../sourceroot/bionic/libc/tools/gensyscalls.py %s ../sourceroot/bionic/libc/SYSCALLS.TXT > bazel-out/sourceroot/k8-fastbuild/bin/bionic/libc/syscalls-%s.S'",
arch, arch), arch, arch),
@@ -487,8 +487,8 @@ func TestTransitiveInputDepsets(t *testing.T) {
} }
actualbuildStatements, actualDepsets, _ := AqueryBuildStatements(data, &metrics.EventHandler{}) actualbuildStatements, actualDepsets, _ := AqueryBuildStatements(data, &metrics.EventHandler{})
expectedBuildStatements := []BuildStatement{ expectedBuildStatements := []*BuildStatement{
{ &BuildStatement{
Command: "/bin/bash -c 'touch bazel-out/sourceroot/k8-fastbuild/bin/testpkg/test_out'", Command: "/bin/bash -c 'touch bazel-out/sourceroot/k8-fastbuild/bin/testpkg/test_out'",
OutputPaths: []string{"bazel-out/sourceroot/k8-fastbuild/bin/testpkg/test_out"}, OutputPaths: []string{"bazel-out/sourceroot/k8-fastbuild/bin/testpkg/test_out"},
Mnemonic: "Action", Mnemonic: "Action",
@@ -545,8 +545,8 @@ func TestSymlinkTree(t *testing.T) {
if err != nil { if err != nil {
t.Errorf("Unexpected error %q", err) t.Errorf("Unexpected error %q", err)
} }
assertBuildStatements(t, []BuildStatement{ assertBuildStatements(t, []*BuildStatement{
{ &BuildStatement{
Command: "", Command: "",
OutputPaths: []string{"foo.runfiles/MANIFEST"}, OutputPaths: []string{"foo.runfiles/MANIFEST"},
Mnemonic: "SymlinkTree", Mnemonic: "SymlinkTree",
@@ -615,7 +615,7 @@ func TestBazelOutRemovalFromInputDepsets(t *testing.T) {
t.Errorf("dependency ../dep2 expected but not found") t.Errorf("dependency ../dep2 expected but not found")
} }
expectedBuildStatement := BuildStatement{ expectedBuildStatement := &BuildStatement{
Command: "bogus command", Command: "bogus command",
OutputPaths: []string{"output"}, OutputPaths: []string{"output"},
Mnemonic: "x", Mnemonic: "x",
@@ -692,7 +692,7 @@ func TestBazelOutRemovalFromTransitiveInputDepsets(t *testing.T) {
return return
} }
expectedBuildStatement := BuildStatement{ expectedBuildStatement := &BuildStatement{
Command: "bogus command", Command: "bogus command",
OutputPaths: []string{"output"}, OutputPaths: []string{"output"},
Mnemonic: "x", Mnemonic: "x",
@@ -757,8 +757,8 @@ func TestMiddlemenAction(t *testing.T) {
if err != nil { if err != nil {
t.Errorf("Unexpected error %q", err) t.Errorf("Unexpected error %q", err)
} }
if expected := 1; len(actualBuildStatements) != expected { if expected := 2; len(actualBuildStatements) != expected {
t.Fatalf("Expected %d build statements, got %d", expected, len(actualBuildStatements)) t.Fatalf("Expected %d build statements, got %d %#v", expected, len(actualBuildStatements), actualBuildStatements)
} }
expectedDepsetFiles := [][]string{ expectedDepsetFiles := [][]string{
@@ -783,6 +783,11 @@ func TestMiddlemenAction(t *testing.T) {
if !reflect.DeepEqual(actualFlattenedInputs, expectedFlattenedInputs) { if !reflect.DeepEqual(actualFlattenedInputs, expectedFlattenedInputs) {
t.Errorf("Expected flattened inputs %v, but got %v", expectedFlattenedInputs, actualFlattenedInputs) t.Errorf("Expected flattened inputs %v, but got %v", expectedFlattenedInputs, actualFlattenedInputs)
} }
bs = actualBuildStatements[1]
if bs != nil {
t.Errorf("Expected nil action for skipped")
}
} }
// Returns the contents of given depsets in concatenated post order. // Returns the contents of given depsets in concatenated post order.
@@ -856,8 +861,8 @@ func TestSimpleSymlink(t *testing.T) {
t.Errorf("Unexpected error %q", err) t.Errorf("Unexpected error %q", err)
} }
expectedBuildStatements := []BuildStatement{ expectedBuildStatements := []*BuildStatement{
{ &BuildStatement{
Command: "mkdir -p one/symlink_subdir && " + Command: "mkdir -p one/symlink_subdir && " +
"rm -f one/symlink_subdir/symlink && " + "rm -f one/symlink_subdir/symlink && " +
"ln -sf $PWD/one/file_subdir/file one/symlink_subdir/symlink", "ln -sf $PWD/one/file_subdir/file one/symlink_subdir/symlink",
@@ -904,8 +909,8 @@ func TestSymlinkQuotesPaths(t *testing.T) {
t.Errorf("Unexpected error %q", err) t.Errorf("Unexpected error %q", err)
} }
expectedBuildStatements := []BuildStatement{ expectedBuildStatements := []*BuildStatement{
{ &BuildStatement{
Command: "mkdir -p 'one/symlink subdir' && " + Command: "mkdir -p 'one/symlink subdir' && " +
"rm -f 'one/symlink subdir/symlink' && " + "rm -f 'one/symlink subdir/symlink' && " +
"ln -sf $PWD/'one/file subdir/file' 'one/symlink subdir/symlink'", "ln -sf $PWD/'one/file subdir/file' 'one/symlink subdir/symlink'",
@@ -1014,8 +1019,8 @@ func TestTemplateExpandActionSubstitutions(t *testing.T) {
t.Errorf("Unexpected error %q", err) t.Errorf("Unexpected error %q", err)
} }
expectedBuildStatements := []BuildStatement{ expectedBuildStatements := []*BuildStatement{
{ &BuildStatement{
Command: "/bin/bash -c 'echo \"Test template substitutions: abcd, python3\" | sed \"s/\\\\\\\\n/\\\\n/g\" > template_file && " + Command: "/bin/bash -c 'echo \"Test template substitutions: abcd, python3\" | sed \"s/\\\\\\\\n/\\\\n/g\" > template_file && " +
"chmod a+x template_file'", "chmod a+x template_file'",
OutputPaths: []string{"template_file"}, OutputPaths: []string{"template_file"},
@@ -1084,8 +1089,8 @@ func TestFileWrite(t *testing.T) {
if err != nil { if err != nil {
t.Errorf("Unexpected error %q", err) t.Errorf("Unexpected error %q", err)
} }
assertBuildStatements(t, []BuildStatement{ assertBuildStatements(t, []*BuildStatement{
{ &BuildStatement{
OutputPaths: []string{"foo.manifest"}, OutputPaths: []string{"foo.manifest"},
Mnemonic: "FileWrite", Mnemonic: "FileWrite",
FileContents: "file data\n", FileContents: "file data\n",
@@ -1122,8 +1127,8 @@ func TestSourceSymlinkManifest(t *testing.T) {
if err != nil { if err != nil {
t.Errorf("Unexpected error %q", err) t.Errorf("Unexpected error %q", err)
} }
assertBuildStatements(t, []BuildStatement{ assertBuildStatements(t, []*BuildStatement{
{ &BuildStatement{
OutputPaths: []string{"foo.manifest"}, OutputPaths: []string{"foo.manifest"},
Mnemonic: "SourceSymlinkManifest", Mnemonic: "SourceSymlinkManifest",
SymlinkPaths: []string{}, SymlinkPaths: []string{},
@@ -1142,7 +1147,7 @@ func assertError(t *testing.T, err error, expected string) {
// Asserts that the given actual build statements match the given expected build statements. // Asserts that the given actual build statements match the given expected build statements.
// Build statement equivalence is determined using buildStatementEquals. // Build statement equivalence is determined using buildStatementEquals.
func assertBuildStatements(t *testing.T, expected []BuildStatement, actual []BuildStatement) { func assertBuildStatements(t *testing.T, expected []*BuildStatement, actual []*BuildStatement) {
t.Helper() t.Helper()
if len(expected) != len(actual) { if len(expected) != len(actual) {
t.Errorf("expected %d build statements, but got %d,\n expected: %#v,\n actual: %#v", t.Errorf("expected %d build statements, but got %d,\n expected: %#v,\n actual: %#v",
@@ -1150,8 +1155,13 @@ func assertBuildStatements(t *testing.T, expected []BuildStatement, actual []Bui
return return
} }
type compareFn = func(i int, j int) bool type compareFn = func(i int, j int) bool
byCommand := func(slice []BuildStatement) compareFn { byCommand := func(slice []*BuildStatement) compareFn {
return func(i int, j int) bool { return func(i int, j int) bool {
if slice[i] == nil {
return false
} else if slice[j] == nil {
return false
}
return slice[i].Command < slice[j].Command return slice[i].Command < slice[j].Command
} }
} }
@@ -1167,7 +1177,10 @@ func assertBuildStatements(t *testing.T, expected []BuildStatement, actual []Bui
} }
} }
func buildStatementEquals(first BuildStatement, second BuildStatement) string { func buildStatementEquals(first *BuildStatement, second *BuildStatement) string {
if (first == nil) != (second == nil) {
return "Nil"
}
if first.Mnemonic != second.Mnemonic { if first.Mnemonic != second.Mnemonic {
return "Mnemonic" return "Mnemonic"
} }