reduce forest generation to be incremental

Previously, symlink forest generation involved removing the entire
symlink forest and recreating it from scratch. With this change,
a) symlinks which need not change are untouched,
b) symlinks pointing to the wrong location are fixed, and
c) symlinks which should no longer exist are removed.

On AOSP on my local machine, this reduces the symlink forest generation
step from 2.5s to 1.1s clean, and 0.6s when a single file is added to
a source directory.

Bug: 257528847
Test: m bp2build, touch `fakefile` under the forest, remove a file
from the source tree, rerun m bp2build. Manually verify the new forest
does not retain the link to the deleted source file, and that fakefile
no longer exists in the forest.

Change-Id: I481371ae487e9419af6a3a4370c552578b07d650
This commit is contained in:
Usta (Tsering) Shrestha
2022-11-08 18:31:14 -05:00
parent 1bb18bb2ac
commit c4c07b12b6
3 changed files with 130 additions and 121 deletions

View File

@@ -15,9 +15,7 @@
package bp2build package bp2build
import ( import (
"errors"
"fmt" "fmt"
"io/fs"
"io/ioutil" "io/ioutil"
"os" "os"
"path/filepath" "path/filepath"
@@ -53,59 +51,6 @@ type symlinkForestContext struct {
symlinkCount atomic.Uint64 symlinkCount atomic.Uint64
} }
// A simple thread pool to limit concurrency on system calls.
// Necessary because Go spawns a new OS-level thread for each blocking system
// call. This means that if syscalls are too slow and there are too many of
// them, the hard limit on OS-level threads can be exhausted.
type syscallPool struct {
shutdownCh []chan<- struct{}
workCh chan syscall
}
type syscall struct {
work func()
done chan<- struct{}
}
func createSyscallPool(count int) *syscallPool {
result := &syscallPool{
shutdownCh: make([]chan<- struct{}, count),
workCh: make(chan syscall),
}
for i := 0; i < count; i++ {
shutdownCh := make(chan struct{})
result.shutdownCh[i] = shutdownCh
go result.worker(shutdownCh)
}
return result
}
func (p *syscallPool) do(work func()) {
doneCh := make(chan struct{})
p.workCh <- syscall{work, doneCh}
<-doneCh
}
func (p *syscallPool) shutdown() {
for _, ch := range p.shutdownCh {
ch <- struct{}{} // Blocks until the value is received
}
}
func (p *syscallPool) worker(shutdownCh <-chan struct{}) {
for {
select {
case <-shutdownCh:
return
case work := <-p.workCh:
work.work()
work.done <- struct{}{}
}
}
}
// Ensures that the node for the given path exists in the tree and returns it. // Ensures that the node for the given path exists in the tree and returns it.
func ensureNodeExists(root *instructionsNode, path string) *instructionsNode { func ensureNodeExists(root *instructionsNode, path string) *instructionsNode {
if path == "" { if path == "" {
@@ -217,12 +162,35 @@ func readdirToMap(dir string) map[string]os.FileInfo {
} }
// Creates a symbolic link at dst pointing to src // Creates a symbolic link at dst pointing to src
func symlinkIntoForest(topdir, dst, src string) { func symlinkIntoForest(topdir, dst, src string) uint64 {
err := os.Symlink(shared.JoinPath(topdir, src), shared.JoinPath(topdir, dst)) srcPath := shared.JoinPath(topdir, src)
if err != nil { dstPath := shared.JoinPath(topdir, dst)
// Check if a symlink already exists.
if dstInfo, err := os.Lstat(dstPath); err != nil {
if !os.IsNotExist(err) {
fmt.Fprintf(os.Stderr, "Failed to lstat '%s': %s", dst, err)
os.Exit(1)
}
} else {
if dstInfo.Mode()&os.ModeSymlink != 0 {
// Assume that the link's target is correct, i.e. no manual tampering.
// E.g. OUT_DIR could have been previously used with a different source tree check-out!
return 0
} else {
if err := os.RemoveAll(dstPath); err != nil {
fmt.Fprintf(os.Stderr, "Failed to remove '%s': %s", dst, err)
os.Exit(1)
}
}
}
// Create symlink.
if err := os.Symlink(srcPath, dstPath); err != nil {
fmt.Fprintf(os.Stderr, "Cannot create symlink at '%s' pointing to '%s': %s", dst, src, err) fmt.Fprintf(os.Stderr, "Cannot create symlink at '%s' pointing to '%s': %s", dst, src, err)
os.Exit(1) os.Exit(1)
} }
return 1
} }
func isDir(path string, fi os.FileInfo) bool { func isDir(path string, fi os.FileInfo) bool {
@@ -253,8 +221,9 @@ func plantSymlinkForestRecursive(context *symlinkForestContext, instructions *in
defer context.wg.Done() defer context.wg.Done()
if instructions != nil && instructions.excluded { if instructions != nil && instructions.excluded {
// This directory is not needed, bail out // Excluded paths are skipped at the level of the non-excluded parent.
return fmt.Fprintf(os.Stderr, "may not specify a root-level exclude directory '%s'", srcDir)
os.Exit(1)
} }
// We don't add buildFilesDir here because the bp2build files marker files is // We don't add buildFilesDir here because the bp2build files marker files is
@@ -272,6 +241,12 @@ func plantSymlinkForestRecursive(context *symlinkForestContext, instructions *in
renamingBuildFile = true renamingBuildFile = true
srcDirMap["BUILD.bazel"] = srcDirMap["BUILD"] srcDirMap["BUILD.bazel"] = srcDirMap["BUILD"]
delete(srcDirMap, "BUILD") delete(srcDirMap, "BUILD")
if instructions != nil {
if _, ok := instructions.children["BUILD"]; ok {
instructions.children["BUILD.bazel"] = instructions.children["BUILD"]
delete(instructions.children, "BUILD")
}
}
} }
} }
} }
@@ -288,17 +263,41 @@ func plantSymlinkForestRecursive(context *symlinkForestContext, instructions *in
// Tests read the error messages generated, so ensure their order is deterministic // Tests read the error messages generated, so ensure their order is deterministic
sort.Strings(allEntries) sort.Strings(allEntries)
err := os.MkdirAll(shared.JoinPath(context.topdir, forestDir), 0777) fullForestPath := shared.JoinPath(context.topdir, forestDir)
if err != nil { createForestDir := false
fmt.Fprintf(os.Stderr, "Cannot mkdir '%s': %s\n", forestDir, err) if fi, err := os.Lstat(fullForestPath); err != nil {
os.Exit(1) if os.IsNotExist(err) {
createForestDir = true
} else {
fmt.Fprintf(os.Stderr, "Could not read info for '%s': %s\n", forestDir, err)
}
} else if fi.Mode()&os.ModeDir == 0 {
if err := os.RemoveAll(fullForestPath); err != nil {
fmt.Fprintf(os.Stderr, "Failed to remove '%s': %s", forestDir, err)
os.Exit(1)
}
createForestDir = true
} }
context.mkdirCount.Add(1) if createForestDir {
if err := os.MkdirAll(fullForestPath, 0777); err != nil {
fmt.Fprintf(os.Stderr, "Could not mkdir '%s': %s\n", forestDir, err)
os.Exit(1)
}
context.mkdirCount.Add(1)
}
// Start with a list of items that already exist in the forest, and remove
// each element as it is processed in allEntries. Any remaining items in
// forestMapForDeletion must be removed. (This handles files which were
// removed since the previous forest generation).
forestMapForDeletion := readdirToMap(shared.JoinPath(context.topdir, forestDir))
for _, f := range allEntries { for _, f := range allEntries {
if f[0] == '.' { if f[0] == '.' {
continue // Ignore dotfiles continue // Ignore dotfiles
} }
delete(forestMapForDeletion, f)
// todo add deletionCount metric
// The full paths of children in the input trees and in the output tree // The full paths of children in the input trees and in the output tree
forestChild := shared.JoinPath(forestDir, f) forestChild := shared.JoinPath(forestDir, f)
@@ -309,13 +308,9 @@ func plantSymlinkForestRecursive(context *symlinkForestContext, instructions *in
buildFilesChild := shared.JoinPath(buildFilesDir, f) buildFilesChild := shared.JoinPath(buildFilesDir, f)
// Descend in the instruction tree if it exists // Descend in the instruction tree if it exists
var instructionsChild *instructionsNode = nil var instructionsChild *instructionsNode
if instructions != nil { if instructions != nil {
if f == "BUILD.bazel" && renamingBuildFile { instructionsChild = instructions.children[f]
instructionsChild = instructions.children["BUILD"]
} else {
instructionsChild = instructions.children[f]
}
} }
srcChildEntry, sExists := srcDirMap[f] srcChildEntry, sExists := srcDirMap[f]
@@ -323,8 +318,7 @@ func plantSymlinkForestRecursive(context *symlinkForestContext, instructions *in
if instructionsChild != nil && instructionsChild.excluded { if instructionsChild != nil && instructionsChild.excluded {
if bExists { if bExists {
symlinkIntoForest(context.topdir, forestChild, buildFilesChild) context.symlinkCount.Add(symlinkIntoForest(context.topdir, forestChild, buildFilesChild))
context.symlinkCount.Add(1)
} }
continue continue
} }
@@ -340,8 +334,7 @@ func plantSymlinkForestRecursive(context *symlinkForestContext, instructions *in
go plantSymlinkForestRecursive(context, instructionsChild, forestChild, buildFilesChild, srcChild) go plantSymlinkForestRecursive(context, instructionsChild, forestChild, buildFilesChild, srcChild)
} else { } else {
// Not in the source tree, symlink BUILD file // Not in the source tree, symlink BUILD file
symlinkIntoForest(context.topdir, forestChild, buildFilesChild) context.symlinkCount.Add(symlinkIntoForest(context.topdir, forestChild, buildFilesChild))
context.symlinkCount.Add(1)
} }
} else if !bExists { } else if !bExists {
if sDir && instructionsChild != nil { if sDir && instructionsChild != nil {
@@ -351,8 +344,7 @@ func plantSymlinkForestRecursive(context *symlinkForestContext, instructions *in
go plantSymlinkForestRecursive(context, instructionsChild, forestChild, buildFilesChild, srcChild) go plantSymlinkForestRecursive(context, instructionsChild, forestChild, buildFilesChild, srcChild)
} else { } else {
// Not in the build file tree, symlink source tree, carry on // Not in the build file tree, symlink source tree, carry on
symlinkIntoForest(context.topdir, forestChild, srcChild) context.symlinkCount.Add(symlinkIntoForest(context.topdir, forestChild, srcChild))
context.symlinkCount.Add(1)
} }
} else if sDir && bDir { } else if sDir && bDir {
// Both are directories. Descend. // Both are directories. Descend.
@@ -365,8 +357,7 @@ func plantSymlinkForestRecursive(context *symlinkForestContext, instructions *in
// The Android.bp file that codegen used to produce `buildFilesChild` is // The Android.bp file that codegen used to produce `buildFilesChild` is
// already a dependency, we can ignore `buildFilesChild`. // already a dependency, we can ignore `buildFilesChild`.
context.depCh <- srcChild context.depCh <- srcChild
err = mergeBuildFiles(shared.JoinPath(context.topdir, forestChild), srcBuildFile, generatedBuildFile, context.verbose) if err := mergeBuildFiles(shared.JoinPath(context.topdir, forestChild), srcBuildFile, generatedBuildFile, context.verbose); err != nil {
if err != nil {
fmt.Fprintf(os.Stderr, "Error merging %s and %s: %s", fmt.Fprintf(os.Stderr, "Error merging %s and %s: %s",
srcBuildFile, generatedBuildFile, err) srcBuildFile, generatedBuildFile, err)
os.Exit(1) os.Exit(1)
@@ -379,51 +370,27 @@ func plantSymlinkForestRecursive(context *symlinkForestContext, instructions *in
os.Exit(1) os.Exit(1)
} }
} }
}
func removeParallelRecursive(pool *syscallPool, path string, fi os.FileInfo, wg *sync.WaitGroup) { // Remove all files in the forest that exist in neither the source
defer wg.Done() // tree nor the build files tree. (This handles files which were removed
// since the previous forest generation).
if fi.IsDir() { for f := range forestMapForDeletion {
children := readdirToMap(path) var instructionsChild *instructionsNode
childrenWg := &sync.WaitGroup{} if instructions != nil {
childrenWg.Add(len(children)) instructionsChild = instructions.children[f]
for child, childFi := range children {
go removeParallelRecursive(pool, shared.JoinPath(path, child), childFi, childrenWg)
} }
childrenWg.Wait() if instructionsChild != nil && instructionsChild.excluded {
} // This directory may be excluded because bazel writes to it under the
// forest root. Thus this path is intentionally left alone.
pool.do(func() { continue
if err := os.Remove(path); err != nil { }
fmt.Fprintf(os.Stderr, "Cannot unlink '%s': %s\n", path, err) forestChild := shared.JoinPath(context.topdir, forestDir, f)
if err := os.RemoveAll(forestChild); err != nil {
fmt.Fprintf(os.Stderr, "Failed to remove '%s/%s': %s", forestDir, f, err)
os.Exit(1) os.Exit(1)
} }
})
}
func removeParallel(path string) {
fi, err := os.Lstat(path)
if err != nil {
if errors.Is(err, fs.ErrNotExist) {
return
}
fmt.Fprintf(os.Stderr, "Cannot lstat '%s': %s\n", path, err)
os.Exit(1)
} }
wg := &sync.WaitGroup{}
wg.Add(1)
// Random guess as to the best number of syscalls to run in parallel
pool := createSyscallPool(100)
removeParallelRecursive(pool, path, fi, wg)
pool.shutdown()
wg.Wait()
} }
// PlantSymlinkForest Creates a symlink forest by merging the directory tree at "buildFiles" and // PlantSymlinkForest Creates a symlink forest by merging the directory tree at "buildFiles" and
@@ -439,8 +406,6 @@ func PlantSymlinkForest(verbose bool, topdir string, forest string, buildFiles s
symlinkCount: atomic.Uint64{}, symlinkCount: atomic.Uint64{},
} }
removeParallel(shared.JoinPath(topdir, forest))
instructions := instructionsFromExcludePathList(exclude) instructions := instructionsFromExcludePathList(exclude)
go func() { go func() {
context.wg.Add(1) context.wg.Add(1)

View File

@@ -447,6 +447,7 @@ func bazelArtifacts() []string {
"bazel-genfiles", "bazel-genfiles",
"bazel-out", "bazel-out",
"bazel-testlogs", "bazel-testlogs",
"bazel-workspace",
"bazel-" + filepath.Base(topDir), "bazel-" + filepath.Base(topDir),
} }
} }

View File

@@ -140,7 +140,7 @@ EOF
# NOTE: We don't actually use the extra BUILD file for anything here # NOTE: We don't actually use the extra BUILD file for anything here
run_bazel build --config=android --config=bp2build --config=ci //foo/... run_bazel build --config=android --config=bp2build --config=ci //foo/...
local the_answer_file="$(find -L bazel-out -name the_answer.txt)" local -r the_answer_file="$(find -L bazel-out -name the_answer.txt)"
if [[ ! -f "${the_answer_file}" ]]; then if [[ ! -f "${the_answer_file}" ]]; then
fail "Expected the_answer.txt to be generated, but was missing" fail "Expected the_answer.txt to be generated, but was missing"
fi fi
@@ -156,6 +156,49 @@ function test_bp2build_generates_all_buildfiles {
eval "${_save_trap}" eval "${_save_trap}"
} }
function test_bp2build_symlinks_files {
setup
mkdir -p foo
touch foo/BLANK1
touch foo/BLANK2
touch foo/F2D
touch foo/BUILD
run_soong bp2build
if [[ -e "./out/soong/workspace/foo/BUILD" ]]; then
fail "./out/soong/workspace/foo/BUILD should be omitted"
fi
for file in BLANK1 BLANK2 F2D
do
if [[ ! -L "./out/soong/workspace/foo/$file" ]]; then
fail "./out/soong/workspace/foo/$file should exist"
fi
done
local -r BLANK1_BEFORE=$(stat -c %y "./out/soong/workspace/foo/BLANK1")
rm foo/BLANK2
rm foo/F2D
mkdir foo/F2D
touch foo/F2D/BUILD
run_soong bp2build
if [[ -e "./out/soong/workspace/foo/BUILD" ]]; then
fail "./out/soong/workspace/foo/BUILD should be omitted"
fi
local -r BLANK1_AFTER=$(stat -c %y "./out/soong/workspace/foo/BLANK1")
if [[ "$BLANK1_AFTER" != "$BLANK1_BEFORE" ]]; then
fail "./out/soong/workspace/foo/BLANK1 should be untouched"
fi
if [[ -e "./out/soong/workspace/foo/BLANK2" ]]; then
fail "./out/soong/workspace/foo/BLANK2 should be removed"
fi
if [[ -L "./out/soong/workspace/foo/F2D" ]] || [[ ! -d "./out/soong/workspace/foo/F2D" ]]; then
fail "./out/soong/workspace/foo/F2D should be a dir"
fi
}
function test_cc_correctness { function test_cc_correctness {
setup setup