Merge "Rewrite the build system benchmarks to be much simpler and not require bazel." into main am: ba541c7e06

Original change: https://android-review.googlesource.com/c/platform/build/+/2881635

Change-Id: Ia7894da9465154e2053d6b57683e0de27afad042
Signed-off-by: Automerger Merge Worker <android-build-automerger-merge-worker@system.gserviceaccount.com>
This commit is contained in:
Treehugger Robot
2023-12-19 17:10:16 +00:00
committed by Automerger Merge Worker
4 changed files with 817 additions and 0 deletions

550
tools/perf/benchmarks Executable file
View File

@@ -0,0 +1,550 @@
#!/usr/bin/env python3
# Copyright (C) 2023 The Android Open Source Project
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import sys
if __name__ == "__main__":
sys.dont_write_bytecode = True
import argparse
import dataclasses
import datetime
import json
import os
import pathlib
import shutil
import subprocess
import time
import pretty
import utils
class FatalError(Exception):
def __init__(self):
pass
class OptionsError(Exception):
def __init__(self, message):
self.message = message
@dataclasses.dataclass(frozen=True)
class Lunch:
"Lunch combination"
target_product: str
"TARGET_PRODUCT"
target_release: str
"TARGET_RELEASE"
target_build_variant: str
"TARGET_BUILD_VARIANT"
def ToDict(self):
return {
"TARGET_PRODUCT": self.target_product,
"TARGET_RELEASE": self.target_release,
"TARGET_BUILD_VARIANT": self.target_build_variant,
}
def Combine(self):
return f"{self.target_product}-{self.target_release}-{self.target_build_variant}"
@dataclasses.dataclass(frozen=True)
class Change:
"A change that we make to the tree, and how to undo it"
label: str
"String to print in the log when the change is made"
change: callable
"Function to change the source tree"
undo: callable
"Function to revert the source tree to its previous condition in the most minimal way possible."
@dataclasses.dataclass(frozen=True)
class Benchmark:
"Something we measure"
id: str
"Short ID for the benchmark, for the command line"
title: str
"Title for reports"
change: Change
"Source tree modification for the benchmark that will be measured"
modules: list[str]
"Build modules to build on soong command line"
preroll: int
"Number of times to run the build command to stabilize"
postroll: int
"Number of times to run the build command after reverting the action to stabilize"
@dataclasses.dataclass(frozen=True)
class FileSnapshot:
"Snapshot of a file's contents."
filename: str
"The file that was snapshottened"
contents: str
"The contents of the file"
def write(self):
"Write the contents back to the file"
with open(self.filename, "w") as f:
f.write(self.contents)
def Snapshot(filename):
"""Return a FileSnapshot with the file's current contents."""
with open(filename) as f:
contents = f.read()
return FileSnapshot(filename, contents)
def Clean():
"""Remove the out directory."""
def remove_out():
if os.path.exists("out"):
shutil.rmtree("out")
return Change(label="Remove out", change=remove_out, undo=lambda: None)
def NoChange():
"""No change to the source tree."""
return Change(label="No change", change=lambda: None, undo=lambda: None)
def Modify(filename, contents, before=None):
"""Create an action to modify `filename` by appending `contents` before the last instances
of `before` in the file.
Raises an error if `before` doesn't appear in the file.
"""
orig = Snapshot(filename)
if before:
index = orig.contents.rfind(before)
if index < 0:
report_error(f"{filename}: Unable to find string '{before}' for modify operation.")
raise FatalError()
else:
index = len(orig.contents)
modified = FileSnapshot(filename, orig.contents[:index] + contents + orig.contents[index:])
return Change(
label="Modify " + filename,
change=lambda: modified.write(),
undo=lambda: orig.write()
)
class BenchmarkReport():
"Information about a run of the benchmark"
lunch: Lunch
"lunch combo"
benchmark: Benchmark
"The benchmark object."
iteration: int
"Which iteration of the benchmark"
log_dir: str
"Path the the log directory, relative to the root of the reports directory"
preroll_duration_ns: [int]
"Durations of the in nanoseconds."
duration_ns: int
"Duration of the measured portion of the benchmark in nanoseconds."
postroll_duration_ns: [int]
"Durations of the postrolls in nanoseconds."
complete: bool
"Whether the benchmark made it all the way through the postrolls."
def __init__(self, lunch, benchmark, iteration, log_dir):
self.lunch = lunch
self.benchmark = benchmark
self.iteration = iteration
self.log_dir = log_dir
self.preroll_duration_ns = []
self.duration_ns = -1
self.postroll_duration_ns = []
self.complete = False
def ToDict(self):
return {
"lunch": self.lunch.ToDict(),
"id": self.benchmark.id,
"title": self.benchmark.title,
"modules": self.benchmark.modules,
"change": self.benchmark.change.label,
"iteration": self.iteration,
"log_dir": self.log_dir,
"preroll_duration_ns": self.preroll_duration_ns,
"duration_ns": self.duration_ns,
"postroll_duration_ns": self.postroll_duration_ns,
"complete": self.complete,
}
class Runner():
"""Runs the benchmarks."""
def __init__(self, options):
self._options = options
self._reports = []
self._complete = False
def Run(self):
"""Run all of the user-selected benchmarks."""
# Clean out the log dir or create it if necessary
prepare_log_dir(self._options.LogDir())
try:
for lunch in self._options.Lunches():
print(lunch)
for benchmark in self._options.Benchmarks():
for iteration in range(self._options.Iterations()):
self._run_benchmark(lunch, benchmark, iteration)
self._complete = True
finally:
self._write_summary()
def _run_benchmark(self, lunch, benchmark, iteration):
"""Run a single benchmark."""
benchmark_log_subdir = self._log_dir(lunch, benchmark, iteration)
benchmark_log_dir = self._options.LogDir().joinpath(benchmark_log_subdir)
sys.stderr.write(f"STARTING BENCHMARK: {benchmark.id}\n")
sys.stderr.write(f" lunch: {lunch.Combine()}\n")
sys.stderr.write(f" iteration: {iteration}\n")
sys.stderr.write(f" benchmark_log_dir: {benchmark_log_dir}\n")
report = BenchmarkReport(lunch, benchmark, iteration, benchmark_log_subdir)
self._reports.append(report)
# Preroll builds
for i in range(benchmark.preroll):
ns = self._run_build(lunch, benchmark_log_dir.joinpath(f"pre_{i}"), benchmark.modules)
report.preroll_duration_ns.append(ns)
sys.stderr.write(f"PERFORMING CHANGE: {benchmark.change.label}\n")
if not self._options.DryRun():
benchmark.change.change()
try:
# Measured build
ns = self._run_build(lunch, benchmark_log_dir.joinpath("measured"), benchmark.modules)
report.duration_ns = ns
# Postroll builds
for i in range(benchmark.preroll):
ns = self._run_build(lunch, benchmark_log_dir.joinpath(f"post_{i}"),
benchmark.modules)
report.postroll_duration_ns.append(ns)
finally:
# Always undo, even if we crashed or the build failed and we stopped.
sys.stderr.write(f"UNDOING CHANGE: {benchmark.change.label}\n")
if not self._options.DryRun():
benchmark.change.undo()
self._write_summary()
sys.stderr.write(f"FINISHED BENCHMARK: {benchmark.id}\n")
def _log_dir(self, lunch, benchmark, iteration):
"""Construct the log directory fir a benchmark run."""
path = f"{lunch.Combine()}/{benchmark.id}"
# Zero pad to the correct length for correct alpha sorting
path += ("/%0" + str(len(str(self._options.Iterations()))) + "d") % iteration
return path
def _run_build(self, lunch, build_log_dir, modules):
"""Builds the modules. Saves interesting log files to log_dir. Raises FatalError
if the build fails.
"""
sys.stderr.write(f"STARTING BUILD {modules}\n")
before_ns = time.perf_counter_ns()
if not self._options.DryRun():
cmd = [
"build/soong/soong_ui.bash",
"--build-mode",
"--all-modules",
f"--dir={self._options.root}",
] + modules
env = dict(os.environ)
env["TARGET_PRODUCT"] = lunch.target_product
env["TARGET_RELEASE"] = lunch.target_release
env["TARGET_BUILD_VARIANT"] = lunch.target_build_variant
returncode = subprocess.call(cmd, env=env)
if returncode != 0:
report_error(f"Build failed: {' '.join(cmd)}")
raise FatalError()
after_ns = time.perf_counter_ns()
# TODO: Copy some log files.
sys.stderr.write(f"FINISHED BUILD {modules}\n")
return after_ns - before_ns
def _write_summary(self):
# Write the results, even if the build failed or we crashed, including
# whether we finished all of the benchmarks.
data = {
"start_time": self._options.Timestamp().isoformat(),
"branch": self._options.Branch(),
"tag": self._options.Tag(),
"benchmarks": [report.ToDict() for report in self._reports],
"complete": self._complete,
}
with open(self._options.LogDir().joinpath("summary.json"), "w", encoding="utf-8") as f:
json.dump(data, f, indent=2, sort_keys=True)
def benchmark_table(benchmarks):
rows = [("ID", "DESCRIPTION", "REBUILD"),]
rows += [(benchmark.id, benchmark.title, " ".join(benchmark.modules)) for benchmark in
benchmarks]
return rows
def prepare_log_dir(directory):
if os.path.exists(directory):
# If it exists and isn't a directory, fail.
if not os.path.isdir(directory):
report_error(f"Log directory already exists but isn't a directory: {directory}")
raise FatalError()
# Make sure the directory is empty. Do this rather than deleting it to handle
# symlinks cleanly.
for filename in os.listdir(directory):
entry = os.path.join(directory, filename)
if os.path.isdir(entry):
shutil.rmtree(entry)
else:
os.unlink(entry)
else:
# Create it
os.makedirs(directory)
class Options():
def __init__(self):
self._had_error = False
# Wall time clock when we started
self._timestamp = datetime.datetime.now(datetime.timezone.utc)
# Move to the root of the tree right away. Everything must happen from there.
self.root = utils.get_root()
if not self.root:
report_error("Unable to find root of tree from cwd.")
raise FatalError()
os.chdir(self.root)
# Initialize the Benchmarks. Note that this pre-loads all of the files, etc.
# Doing all that here forces us to fail fast if one of them can't load a required
# file, at the cost of a small startup speed. Don't make this do something slow
# like scan the whole tree.
self._init_benchmarks()
# Argument parsing
epilog = f"""
benchmarks:
{pretty.FormatTable(benchmark_table(self._benchmarks), prefix=" ")}
"""
parser = argparse.ArgumentParser(
prog="benchmarks",
allow_abbrev=False, # Don't let people write unsupportable scripts.
formatter_class=argparse.RawDescriptionHelpFormatter,
epilog=epilog,
description="Run build system performance benchmarks.")
self.parser = parser
parser.add_argument("--log-dir",
help="Directory for logs. Default is $TOP/../benchmarks/.")
parser.add_argument("--dated-logs", action="store_true",
help="Append timestamp to log dir.")
parser.add_argument("-n", action="store_true", dest="dry_run",
help="Dry run. Don't run the build commands but do everything else.")
parser.add_argument("--tag",
help="Variant of the run, for when there are multiple perf runs.")
parser.add_argument("--lunch", nargs="*",
help="Lunch combos to test")
parser.add_argument("--iterations", type=int, default=1,
help="Number of iterations of each test to run.")
parser.add_argument("--branch", type=str,
help="Specify branch. Otherwise a guess will be made based on repo.")
parser.add_argument("--benchmark", nargs="*", default=[b.id for b in self._benchmarks],
metavar="BENCHMARKS",
help="Benchmarks to run. Default suite will be run if omitted.")
self._args = parser.parse_args()
self._branch = self._branch()
self._log_dir = self._log_dir()
self._lunches = self._lunches()
# Validate the benchmark ids
all_ids = [benchmark.id for benchmark in self._benchmarks]
bad_ids = [id for id in self._args.benchmark if id not in all_ids]
if bad_ids:
for id in bad_ids:
self._error(f"Invalid benchmark: {id}")
if self._had_error:
raise FatalError()
def Timestamp(self):
return self._timestamp
def _branch(self):
"""Return the branch, either from the command line or by guessing from repo."""
if self._args.branch:
return self._args.branch
try:
branch = subprocess.check_output(f"cd {self.root}/.repo/manifests"
+ " && git rev-parse --abbrev-ref --symbolic-full-name @{u}",
shell=True, encoding="utf-8")
return branch.strip().split("/")[-1]
except subprocess.CalledProcessError as ex:
report_error("Can't get branch from .repo dir. Specify --branch argument")
report_error(str(ex))
raise FatalError()
def Branch(self):
return self._branch
def _log_dir(self):
"The log directory to use, based on the current options"
if self._args.log_dir:
d = pathlib.Path(self._args.log_dir).resolve().absolute()
else:
d = self.root.joinpath("..", utils.DEFAULT_REPORT_DIR)
if self._args.dated_logs:
d = d.joinpath(self._timestamp.strftime('%Y-%m-%d'))
d = d.joinpath(self._branch)
if self._args.tag:
d = d.joinpath(self._args.tag)
return d.resolve().absolute()
def LogDir(self):
return self._log_dir
def Benchmarks(self):
return [b for b in self._benchmarks if b.id in self._args.benchmark]
def Tag(self):
return self._args.tag
def DryRun(self):
return self._args.dry_run
def _lunches(self):
def parse_lunch(lunch):
parts = lunch.split("-")
if len(parts) != 3:
raise OptionsError(f"Invalid lunch combo: {lunch}")
return Lunch(parts[0], parts[1], parts[2])
# If they gave lunch targets on the command line use that
if self._args.lunch:
result = []
# Split into Lunch objects
for lunch in self._args.lunch:
try:
result.append(parse_lunch(lunch))
except OptionsError as ex:
self._error(ex.message)
return result
# Use whats in the environment
product = os.getenv("TARGET_PRODUCT")
release = os.getenv("TARGET_RELEASE")
variant = os.getenv("TARGET_BUILD_VARIANT")
if (not product) or (not release) or (not variant):
# If they didn't give us anything, fail rather than guessing. There's no good
# default for AOSP.
self._error("No lunch combo specified. Either pass --lunch argument or run lunch.")
return []
return [Lunch(product, release, variant),]
def Lunches(self):
return self._lunches
def Iterations(self):
return self._args.iterations
def _init_benchmarks(self):
"""Initialize the list of benchmarks."""
# Assumes that we've already chdired to the root of the tree.
self._benchmarks = [
Benchmark(id="full",
title="Full build",
change=Clean(),
modules=["droid"],
preroll=0,
postroll=3
),
Benchmark(id="nochange",
title="No change",
change=NoChange(),
modules=["droid"],
preroll=2,
postroll=3
),
Benchmark(id="modify_bp",
title="Modify Android.bp",
change=Modify("bionic/libc/Android.bp", "// Comment"),
modules=["droid"],
preroll=1,
postroll=3
),
]
def _error(self, message):
report_error(message)
self._had_error = True
def report_error(message):
sys.stderr.write(f"error: {message}\n")
def main(argv):
try:
options = Options()
runner = Runner(options)
runner.Run()
except FatalError:
sys.stderr.write(f"FAILED\n")
if __name__ == "__main__":
main(sys.argv)

185
tools/perf/format_benchmarks Executable file
View File

@@ -0,0 +1,185 @@
#!/usr/bin/env python3
# Copyright (C) 2023 The Android Open Source Project
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import sys
if __name__ == "__main__":
sys.dont_write_bytecode = True
import argparse
import dataclasses
import datetime
import json
import os
import pathlib
import statistics
import zoneinfo
import pretty
import utils
# TODO:
# - Flag if the last postroll build was more than 15 seconds or something. That's
# an indicator that something is amiss.
# - Add a mode to print all of the values for multi-iteration runs
# - Add a flag to reorder the tags
# - Add a flag to reorder the headers in order to show grouping more clearly.
def FindSummaries(args):
def find_summaries(directory):
return [str(p.resolve()) for p in pathlib.Path(directory).glob("**/summary.json")]
if not args:
# If they didn't give an argument, use the default dir
root = utils.get_root()
if not root:
return []
return find_summaries(root.joinpath("..", utils.DEFAULT_REPORT_DIR))
results = list()
for arg in args:
if os.path.isfile(arg):
# If it's a file add that
results.append(arg)
elif os.path.isdir(arg):
# If it's a directory, find all of the files there
results += find_summaries(arg)
else:
sys.stderr.write(f"Invalid summary argument: {arg}\n")
sys.exit(1)
return sorted(list(results))
def LoadSummary(filename):
with open(filename) as f:
return json.load(f)
# Columns:
# Date
# Branch
# Tag
# --
# Lunch
# Rows:
# Benchmark
@dataclasses.dataclass(frozen=True)
class Key():
pass
class Column():
def __init__(self):
pass
def lunch_str(d):
"Convert a lunch dict to a string"
return f"{d['TARGET_PRODUCT']}-{d['TARGET_RELEASE']}-{d['TARGET_BUILD_VARIANT']}"
def group_by(l, key):
"Return a list of tuples, grouped by key, sorted by key"
result = {}
for item in l:
result.setdefault(key(item), []).append(item)
return [(k, v) for k, v in result.items()]
class Table:
def __init__(self):
self._data = {}
self._rows = []
self._cols = []
def Set(self, column_key, row_key, data):
self._data[(column_key, row_key)] = data
if not column_key in self._cols:
self._cols.append(column_key)
if not row_key in self._rows:
self._rows.append(row_key)
def Write(self, out):
table = []
# Expand the column items
for row in zip(*self._cols):
if row.count(row[0]) == len(row):
continue
table.append([""] + [col for col in row])
if table:
table.append(pretty.SEPARATOR)
# Populate the data
for row in self._rows:
table.append([str(row)] + [str(self._data.get((col, row), "")) for col in self._cols])
out.write(pretty.FormatTable(table))
def format_duration_sec(ns):
"Format a duration in ns to second precision"
sec = round(ns / 1000000000)
h, sec = divmod(sec, 60*60)
m, sec = divmod(sec, 60)
result = ""
if h > 0:
result += f"{h:2d}h "
if h > 0 or m > 0:
result += f"{m:2d}m "
return result + f"{sec:2d}s"
def main(argv):
parser = argparse.ArgumentParser(
prog="format_benchmarks",
allow_abbrev=False, # Don't let people write unsupportable scripts.
description="Print analysis tables for benchmarks")
parser.add_argument("summaries", nargs="*",
help="A summary.json file or a directory in which to look for summaries.")
args = parser.parse_args()
# Load the summaries
summaries = [(s, LoadSummary(s)) for s in FindSummaries(args.summaries)]
# Convert to MTV time
for filename, s in summaries:
dt = datetime.datetime.fromisoformat(s["start_time"])
dt = dt.astimezone(zoneinfo.ZoneInfo("America/Los_Angeles"))
s["datetime"] = dt
s["date"] = datetime.date(dt.year, dt.month, dt.day)
# Sort the summaries
summaries.sort(key=lambda s: (s[1]["date"], s[1]["branch"], s[1]["tag"]))
# group the benchmarks by column and iteration
def bm_key(b):
return (
lunch_str(b["lunch"]),
)
for filename, summary in summaries:
summary["columns"] = [(key, group_by(bms, lambda b: b["id"])) for key, bms
in group_by(summary["benchmarks"], bm_key)]
# Build the table
table = Table()
for filename, summary in summaries:
for key, column in summary["columns"]:
for id, cell in column:
duration_ns = statistics.median([b["duration_ns"] for b in cell])
table.Set(tuple([summary["date"].strftime("YYYY-MM-DD"),
summary["branch"],
summary["tag"]]
+ list(key)),
cell[0]["title"], format_duration_sec(duration_ns))
table.Write(sys.stdout)
if __name__ == "__main__":
main(sys.argv)

52
tools/perf/pretty.py Normal file
View File

@@ -0,0 +1,52 @@
# Copyright (C) 2023 The Android Open Source Project
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Formatting utilities
class Sentinel():
pass
SEPARATOR = Sentinel()
def FormatTable(data, prefix=""):
"""Pretty print a table.
Prefixes each row with `prefix`.
"""
if not data:
return ""
widths = [max([len(x) if x else 0 for x in col]) for col
in zip(*[d for d in data if not isinstance(d, Sentinel)])]
result = ""
colsep = " "
for row in data:
result += prefix
if row == SEPARATOR:
for w in widths:
result += "-" * w
result += colsep
result += "\n"
else:
for i in range(len(row)):
cell = row[i] if row[i] else ""
if i != 0:
result += " " * (widths[i] - len(cell))
result += cell
if i == 0:
result += " " * (widths[i] - len(cell))
result += colsep
result += "\n"
return result

30
tools/perf/utils.py Normal file
View File

@@ -0,0 +1,30 @@
# Copyright (C) 2023 The Android Open Source Project
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import pathlib
DEFAULT_REPORT_DIR = "benchmarks"
def get_root():
top_dir = os.environ.get("ANDROID_BUILD_TOP")
if top_dir:
return pathlib.Path(top_dir).resolve()
d = pathlib.Path.cwd()
while True:
if d.joinpath("build", "soong", "soong_ui.bash").exists():
return d.resolve().absolute()
d = d.parent
if d == pathlib.Path("/"):
return None