reduce forest generation to be incremental

Previously, symlink forest generation involved removing the entire
symlink forest and recreating it from scratch. With this change,
a) symlinks which need not change are untouched,
b) symlinks pointing to the wrong location are fixed, and
c) symlinks which should no longer exist are removed.

On AOSP on my local machine, this reduces the symlink forest generation
step from 2.5s to 1.1s clean, and 0.6s when a single file is added to
a source directory.

Bug: 257528847
Test: m bp2build, touch `fakefile` under the forest, remove a file
from the source tree, rerun m bp2build. Manually verify the new forest
does not retain the link to the deleted source file, and that fakefile
no longer exists in the forest.

Change-Id: I481371ae487e9419af6a3a4370c552578b07d650
This commit is contained in:
Usta (Tsering) Shrestha
2022-11-08 18:31:14 -05:00
parent 1bb18bb2ac
commit c4c07b12b6
3 changed files with 130 additions and 121 deletions

View File

@@ -15,9 +15,7 @@
package bp2build
import (
"errors"
"fmt"
"io/fs"
"io/ioutil"
"os"
"path/filepath"
@@ -53,59 +51,6 @@ type symlinkForestContext struct {
symlinkCount atomic.Uint64
}
// A simple thread pool to limit concurrency on system calls.
// Necessary because Go spawns a new OS-level thread for each blocking system
// call. This means that if syscalls are too slow and there are too many of
// them, the hard limit on OS-level threads can be exhausted.
type syscallPool struct {
shutdownCh []chan<- struct{}
workCh chan syscall
}
type syscall struct {
work func()
done chan<- struct{}
}
func createSyscallPool(count int) *syscallPool {
result := &syscallPool{
shutdownCh: make([]chan<- struct{}, count),
workCh: make(chan syscall),
}
for i := 0; i < count; i++ {
shutdownCh := make(chan struct{})
result.shutdownCh[i] = shutdownCh
go result.worker(shutdownCh)
}
return result
}
func (p *syscallPool) do(work func()) {
doneCh := make(chan struct{})
p.workCh <- syscall{work, doneCh}
<-doneCh
}
func (p *syscallPool) shutdown() {
for _, ch := range p.shutdownCh {
ch <- struct{}{} // Blocks until the value is received
}
}
func (p *syscallPool) worker(shutdownCh <-chan struct{}) {
for {
select {
case <-shutdownCh:
return
case work := <-p.workCh:
work.work()
work.done <- struct{}{}
}
}
}
// Ensures that the node for the given path exists in the tree and returns it.
func ensureNodeExists(root *instructionsNode, path string) *instructionsNode {
if path == "" {
@@ -217,12 +162,35 @@ func readdirToMap(dir string) map[string]os.FileInfo {
}
// Creates a symbolic link at dst pointing to src
func symlinkIntoForest(topdir, dst, src string) {
err := os.Symlink(shared.JoinPath(topdir, src), shared.JoinPath(topdir, dst))
if err != nil {
func symlinkIntoForest(topdir, dst, src string) uint64 {
srcPath := shared.JoinPath(topdir, src)
dstPath := shared.JoinPath(topdir, dst)
// Check if a symlink already exists.
if dstInfo, err := os.Lstat(dstPath); err != nil {
if !os.IsNotExist(err) {
fmt.Fprintf(os.Stderr, "Failed to lstat '%s': %s", dst, err)
os.Exit(1)
}
} else {
if dstInfo.Mode()&os.ModeSymlink != 0 {
// Assume that the link's target is correct, i.e. no manual tampering.
// E.g. OUT_DIR could have been previously used with a different source tree check-out!
return 0
} else {
if err := os.RemoveAll(dstPath); err != nil {
fmt.Fprintf(os.Stderr, "Failed to remove '%s': %s", dst, err)
os.Exit(1)
}
}
}
// Create symlink.
if err := os.Symlink(srcPath, dstPath); err != nil {
fmt.Fprintf(os.Stderr, "Cannot create symlink at '%s' pointing to '%s': %s", dst, src, err)
os.Exit(1)
}
return 1
}
func isDir(path string, fi os.FileInfo) bool {
@@ -253,8 +221,9 @@ func plantSymlinkForestRecursive(context *symlinkForestContext, instructions *in
defer context.wg.Done()
if instructions != nil && instructions.excluded {
// This directory is not needed, bail out
return
// Excluded paths are skipped at the level of the non-excluded parent.
fmt.Fprintf(os.Stderr, "may not specify a root-level exclude directory '%s'", srcDir)
os.Exit(1)
}
// We don't add buildFilesDir here because the bp2build files marker files is
@@ -272,6 +241,12 @@ func plantSymlinkForestRecursive(context *symlinkForestContext, instructions *in
renamingBuildFile = true
srcDirMap["BUILD.bazel"] = srcDirMap["BUILD"]
delete(srcDirMap, "BUILD")
if instructions != nil {
if _, ok := instructions.children["BUILD"]; ok {
instructions.children["BUILD.bazel"] = instructions.children["BUILD"]
delete(instructions.children, "BUILD")
}
}
}
}
}
@@ -288,17 +263,41 @@ func plantSymlinkForestRecursive(context *symlinkForestContext, instructions *in
// Tests read the error messages generated, so ensure their order is deterministic
sort.Strings(allEntries)
err := os.MkdirAll(shared.JoinPath(context.topdir, forestDir), 0777)
if err != nil {
fmt.Fprintf(os.Stderr, "Cannot mkdir '%s': %s\n", forestDir, err)
os.Exit(1)
fullForestPath := shared.JoinPath(context.topdir, forestDir)
createForestDir := false
if fi, err := os.Lstat(fullForestPath); err != nil {
if os.IsNotExist(err) {
createForestDir = true
} else {
fmt.Fprintf(os.Stderr, "Could not read info for '%s': %s\n", forestDir, err)
}
} else if fi.Mode()&os.ModeDir == 0 {
if err := os.RemoveAll(fullForestPath); err != nil {
fmt.Fprintf(os.Stderr, "Failed to remove '%s': %s", forestDir, err)
os.Exit(1)
}
createForestDir = true
}
context.mkdirCount.Add(1)
if createForestDir {
if err := os.MkdirAll(fullForestPath, 0777); err != nil {
fmt.Fprintf(os.Stderr, "Could not mkdir '%s': %s\n", forestDir, err)
os.Exit(1)
}
context.mkdirCount.Add(1)
}
// Start with a list of items that already exist in the forest, and remove
// each element as it is processed in allEntries. Any remaining items in
// forestMapForDeletion must be removed. (This handles files which were
// removed since the previous forest generation).
forestMapForDeletion := readdirToMap(shared.JoinPath(context.topdir, forestDir))
for _, f := range allEntries {
if f[0] == '.' {
continue // Ignore dotfiles
}
delete(forestMapForDeletion, f)
// todo add deletionCount metric
// The full paths of children in the input trees and in the output tree
forestChild := shared.JoinPath(forestDir, f)
@@ -309,13 +308,9 @@ func plantSymlinkForestRecursive(context *symlinkForestContext, instructions *in
buildFilesChild := shared.JoinPath(buildFilesDir, f)
// Descend in the instruction tree if it exists
var instructionsChild *instructionsNode = nil
var instructionsChild *instructionsNode
if instructions != nil {
if f == "BUILD.bazel" && renamingBuildFile {
instructionsChild = instructions.children["BUILD"]
} else {
instructionsChild = instructions.children[f]
}
instructionsChild = instructions.children[f]
}
srcChildEntry, sExists := srcDirMap[f]
@@ -323,8 +318,7 @@ func plantSymlinkForestRecursive(context *symlinkForestContext, instructions *in
if instructionsChild != nil && instructionsChild.excluded {
if bExists {
symlinkIntoForest(context.topdir, forestChild, buildFilesChild)
context.symlinkCount.Add(1)
context.symlinkCount.Add(symlinkIntoForest(context.topdir, forestChild, buildFilesChild))
}
continue
}
@@ -340,8 +334,7 @@ func plantSymlinkForestRecursive(context *symlinkForestContext, instructions *in
go plantSymlinkForestRecursive(context, instructionsChild, forestChild, buildFilesChild, srcChild)
} else {
// Not in the source tree, symlink BUILD file
symlinkIntoForest(context.topdir, forestChild, buildFilesChild)
context.symlinkCount.Add(1)
context.symlinkCount.Add(symlinkIntoForest(context.topdir, forestChild, buildFilesChild))
}
} else if !bExists {
if sDir && instructionsChild != nil {
@@ -351,8 +344,7 @@ func plantSymlinkForestRecursive(context *symlinkForestContext, instructions *in
go plantSymlinkForestRecursive(context, instructionsChild, forestChild, buildFilesChild, srcChild)
} else {
// Not in the build file tree, symlink source tree, carry on
symlinkIntoForest(context.topdir, forestChild, srcChild)
context.symlinkCount.Add(1)
context.symlinkCount.Add(symlinkIntoForest(context.topdir, forestChild, srcChild))
}
} else if sDir && bDir {
// Both are directories. Descend.
@@ -365,8 +357,7 @@ func plantSymlinkForestRecursive(context *symlinkForestContext, instructions *in
// The Android.bp file that codegen used to produce `buildFilesChild` is
// already a dependency, we can ignore `buildFilesChild`.
context.depCh <- srcChild
err = mergeBuildFiles(shared.JoinPath(context.topdir, forestChild), srcBuildFile, generatedBuildFile, context.verbose)
if err != nil {
if err := mergeBuildFiles(shared.JoinPath(context.topdir, forestChild), srcBuildFile, generatedBuildFile, context.verbose); err != nil {
fmt.Fprintf(os.Stderr, "Error merging %s and %s: %s",
srcBuildFile, generatedBuildFile, err)
os.Exit(1)
@@ -379,51 +370,27 @@ func plantSymlinkForestRecursive(context *symlinkForestContext, instructions *in
os.Exit(1)
}
}
}
func removeParallelRecursive(pool *syscallPool, path string, fi os.FileInfo, wg *sync.WaitGroup) {
defer wg.Done()
if fi.IsDir() {
children := readdirToMap(path)
childrenWg := &sync.WaitGroup{}
childrenWg.Add(len(children))
for child, childFi := range children {
go removeParallelRecursive(pool, shared.JoinPath(path, child), childFi, childrenWg)
// Remove all files in the forest that exist in neither the source
// tree nor the build files tree. (This handles files which were removed
// since the previous forest generation).
for f := range forestMapForDeletion {
var instructionsChild *instructionsNode
if instructions != nil {
instructionsChild = instructions.children[f]
}
childrenWg.Wait()
}
pool.do(func() {
if err := os.Remove(path); err != nil {
fmt.Fprintf(os.Stderr, "Cannot unlink '%s': %s\n", path, err)
if instructionsChild != nil && instructionsChild.excluded {
// This directory may be excluded because bazel writes to it under the
// forest root. Thus this path is intentionally left alone.
continue
}
forestChild := shared.JoinPath(context.topdir, forestDir, f)
if err := os.RemoveAll(forestChild); err != nil {
fmt.Fprintf(os.Stderr, "Failed to remove '%s/%s': %s", forestDir, f, err)
os.Exit(1)
}
})
}
func removeParallel(path string) {
fi, err := os.Lstat(path)
if err != nil {
if errors.Is(err, fs.ErrNotExist) {
return
}
fmt.Fprintf(os.Stderr, "Cannot lstat '%s': %s\n", path, err)
os.Exit(1)
}
wg := &sync.WaitGroup{}
wg.Add(1)
// Random guess as to the best number of syscalls to run in parallel
pool := createSyscallPool(100)
removeParallelRecursive(pool, path, fi, wg)
pool.shutdown()
wg.Wait()
}
// PlantSymlinkForest Creates a symlink forest by merging the directory tree at "buildFiles" and
@@ -439,8 +406,6 @@ func PlantSymlinkForest(verbose bool, topdir string, forest string, buildFiles s
symlinkCount: atomic.Uint64{},
}
removeParallel(shared.JoinPath(topdir, forest))
instructions := instructionsFromExcludePathList(exclude)
go func() {
context.wg.Add(1)

View File

@@ -447,6 +447,7 @@ func bazelArtifacts() []string {
"bazel-genfiles",
"bazel-out",
"bazel-testlogs",
"bazel-workspace",
"bazel-" + filepath.Base(topDir),
}
}

View File

@@ -140,7 +140,7 @@ EOF
# NOTE: We don't actually use the extra BUILD file for anything here
run_bazel build --config=android --config=bp2build --config=ci //foo/...
local the_answer_file="$(find -L bazel-out -name the_answer.txt)"
local -r the_answer_file="$(find -L bazel-out -name the_answer.txt)"
if [[ ! -f "${the_answer_file}" ]]; then
fail "Expected the_answer.txt to be generated, but was missing"
fi
@@ -156,6 +156,49 @@ function test_bp2build_generates_all_buildfiles {
eval "${_save_trap}"
}
function test_bp2build_symlinks_files {
setup
mkdir -p foo
touch foo/BLANK1
touch foo/BLANK2
touch foo/F2D
touch foo/BUILD
run_soong bp2build
if [[ -e "./out/soong/workspace/foo/BUILD" ]]; then
fail "./out/soong/workspace/foo/BUILD should be omitted"
fi
for file in BLANK1 BLANK2 F2D
do
if [[ ! -L "./out/soong/workspace/foo/$file" ]]; then
fail "./out/soong/workspace/foo/$file should exist"
fi
done
local -r BLANK1_BEFORE=$(stat -c %y "./out/soong/workspace/foo/BLANK1")
rm foo/BLANK2
rm foo/F2D
mkdir foo/F2D
touch foo/F2D/BUILD
run_soong bp2build
if [[ -e "./out/soong/workspace/foo/BUILD" ]]; then
fail "./out/soong/workspace/foo/BUILD should be omitted"
fi
local -r BLANK1_AFTER=$(stat -c %y "./out/soong/workspace/foo/BLANK1")
if [[ "$BLANK1_AFTER" != "$BLANK1_BEFORE" ]]; then
fail "./out/soong/workspace/foo/BLANK1 should be untouched"
fi
if [[ -e "./out/soong/workspace/foo/BLANK2" ]]; then
fail "./out/soong/workspace/foo/BLANK2 should be removed"
fi
if [[ -L "./out/soong/workspace/foo/F2D" ]] || [[ ! -d "./out/soong/workspace/foo/F2D" ]]; then
fail "./out/soong/workspace/foo/F2D should be a dir"
fi
}
function test_cc_correctness {
setup