From bd13e63e997dcb070c344dd6381ea3f6a76f6ede Mon Sep 17 00:00:00 2001 From: Devin Moore Date: Mon, 15 May 2023 18:09:23 +0000 Subject: [PATCH] Add new script to analyze static/shared library usage Parses module-info.json, gathers stats on how many times each library is included shared or statically. Can print a list of libraries that would be a candidate for changing from static to shared or visa versa. Test: m Bug: 280829178 Change-Id: I4bbffbd673ab2e08c69d0ab6e68402be77c9ffbc --- tools/find_static_candidates.py | 232 ++++++++++++++++++++++++++++++++ 1 file changed, 232 insertions(+) create mode 100644 tools/find_static_candidates.py diff --git a/tools/find_static_candidates.py b/tools/find_static_candidates.py new file mode 100644 index 0000000000..7511b36ab2 --- /dev/null +++ b/tools/find_static_candidates.py @@ -0,0 +1,232 @@ +#!/usr/bin/env python3 + +"""Tool to find static libraries that maybe should be shared libraries and shared libraries that maybe should be static libraries. + +This tool only looks at the module-info.json for the current target. + +Example of "class" types for each of the modules in module-info.json + "EXECUTABLES": 2307, + "ETC": 9094, + "NATIVE_TESTS": 10461, + "APPS": 2885, + "JAVA_LIBRARIES": 5205, + "EXECUTABLES/JAVA_LIBRARIES": 119, + "FAKE": 553, + "SHARED_LIBRARIES/STATIC_LIBRARIES": 7591, + "STATIC_LIBRARIES": 11535, + "SHARED_LIBRARIES": 10852, + "HEADER_LIBRARIES": 1897, + "DYLIB_LIBRARIES": 1262, + "RLIB_LIBRARIES": 3413, + "ROBOLECTRIC": 39, + "PACKAGING": 5, + "PROC_MACRO_LIBRARIES": 36, + "RENDERSCRIPT_BITCODE": 17, + "DYLIB_LIBRARIES/RLIB_LIBRARIES": 8, + "ETC/FAKE": 1 + +None of the "SHARED_LIBRARIES/STATIC_LIBRARIES" are double counted in the +modules with one class +RLIB/ + +All of these classes have shared_libs and/or static_libs + "EXECUTABLES", + "SHARED_LIBRARIES", + "STATIC_LIBRARIES", + "SHARED_LIBRARIES/STATIC_LIBRARIES", # cc_library + "HEADER_LIBRARIES", + "NATIVE_TESTS", # test modules + "DYLIB_LIBRARIES", # rust + "RLIB_LIBRARIES", # rust + "ETC", # rust_bindgen +""" + +from collections import defaultdict + +import json, os, argparse + +ANDROID_PRODUCT_OUT = os.environ.get("ANDROID_PRODUCT_OUT") +# If a shared library is used less than MAX_SHARED_INCLUSIONS times in a target, +# then it will likely save memory by changing it to a static library +# This move will also use less storage +MAX_SHARED_INCLUSIONS = 2 +# If a static library is used more than MAX_STATIC_INCLUSIONS times in a target, +# then it will likely save memory by changing it to a shared library +# This move will also likely use less storage +MIN_STATIC_INCLUSIONS = 3 + + +def parse_args(): + parser = argparse.ArgumentParser( + description=( + "Parse module-info.jso and display information about static and" + " shared library dependencies." + ) + ) + parser.add_argument( + "--module", dest="module", help="Print the info for the module." + ) + parser.add_argument( + "--shared", + dest="print_shared", + action=argparse.BooleanOptionalAction, + help=( + "Print the list of libraries that are shared_libs for fewer than {}" + " modules.".format(MAX_SHARED_INCLUSIONS) + ), + ) + parser.add_argument( + "--static", + dest="print_static", + action=argparse.BooleanOptionalAction, + help=( + "Print the list of libraries that are static_libs for more than {}" + " modules.".format(MIN_STATIC_INCLUSIONS) + ), + ) + parser.add_argument( + "--recursive", + dest="recursive", + action=argparse.BooleanOptionalAction, + default=True, + help=( + "Gather all dependencies of EXECUTABLES recursvily before calculating" + " the stats. This eliminates duplicates from multiple libraries" + " including the same dependencies in a single binary." + ), + ) + parser.add_argument( + "--both", + dest="both", + action=argparse.BooleanOptionalAction, + default=False, + help=( + "Print a list of libraries that are including libraries as both" + " static and shared" + ), + ) + return parser.parse_args() + + +class TransitiveHelper: + + def __init__(self): + # keep a list of already expanded libraries so we don't end up in a cycle + self.visited = defaultdict(lambda: defaultdict(set)) + + # module is an object from the module-info dictionary + # module_info is the dictionary from module-info.json + # modify the module's shared_libs and static_libs with all of the transient + # dependencies required from all of the explicit dependencies + def flattenDeps(self, module, module_info): + libs_snapshot = dict(shared_libs = set(module["shared_libs"]), static_libs = set(module["static_libs"])) + + for lib_class in ["shared_libs", "static_libs"]: + for lib in libs_snapshot[lib_class]: + if not lib or lib not in module_info: + continue + if lib in self.visited: + module[lib_class].update(self.visited[lib][lib_class]) + else: + res = self.flattenDeps(module_info[lib], module_info) + module[lib_class].update(res[lib_class]) + self.visited[lib][lib_class].update(res[lib_class]) + + return module + +def main(): + module_info = json.load(open(ANDROID_PRODUCT_OUT + "/module-info.json")) + # turn all of the static_libs and shared_libs lists into sets to make them + # easier to update + for _, module in module_info.items(): + module["shared_libs"] = set(module["shared_libs"]) + module["static_libs"] = set(module["static_libs"]) + + args = parse_args() + + if args.module: + if args.module not in module_info: + print("Module {} does not exist".format(args.module)) + exit(1) + + includedStatically = defaultdict(set) + includedSharedly = defaultdict(set) + includedBothly = defaultdict(set) + transitive = TransitiveHelper() + for name, module in module_info.items(): + if args.recursive: + # in this recursive mode we only want to see what is included by the executables + if "EXECUTABLES" not in module["class"]: + continue + module = transitive.flattenDeps(module, module_info) + # filter out fuzzers by their dependency on clang + if "libclang_rt.fuzzer" in module["static_libs"]: + continue + else: + if "NATIVE_TESTS" in module["class"]: + # We don't care about how tests are including libraries + continue + + # count all of the shared and static libs included in this module + for lib in module["shared_libs"]: + includedSharedly[lib].add(name) + for lib in module["static_libs"]: + includedStatically[lib].add(name) + + intersection = set(module["shared_libs"]).intersection( + module["static_libs"] + ) + if intersection: + includedBothly[name] = intersection + + if args.print_shared: + print( + "Shared libraries that are included by fewer than {} modules on a" + " device:".format(MAX_SHARED_INCLUSIONS) + ) + for name, libs in includedSharedly.items(): + if len(libs) < MAX_SHARED_INCLUSIONS: + print("{}: {} included by: {}".format(name, len(libs), libs)) + + if args.print_static: + print( + "Libraries that are included statically by more than {} modules on a" + " device:".format(MIN_STATIC_INCLUSIONS) + ) + for name, libs in includedStatically.items(): + if len(libs) > MIN_STATIC_INCLUSIONS: + print("{}: {} included by: {}".format(name, len(libs), libs)) + + if args.both: + allIncludedBothly = set() + for name, libs in includedBothly.items(): + allIncludedBothly.update(libs) + + print( + "List of libraries used both statically and shared in the same" + " processes:\n {}\n\n".format("\n".join(sorted(allIncludedBothly))) + ) + print( + "List of libraries used both statically and shared in any processes:\n {}".format("\n".join(sorted(includedStatically.keys() & includedSharedly.keys())))) + + if args.module: + print(json.dumps(module_info[args.module], default=list, indent=2)) + print( + "{} is included in shared_libs {} times by these modules: {}".format( + args.module, len(includedSharedly[args.module]), + includedSharedly[args.module] + ) + ) + print( + "{} is included in static_libs {} times by these modules: {}".format( + args.module, len(includedStatically[args.module]), + includedStatically[args.module] + ) + ) + print("Shared libs included by this module that are used in fewer than {} processes:\n{}".format( + MAX_SHARED_INCLUSIONS, [x for x in module_info[args.module]["shared_libs"] if len(includedSharedly[x]) < MAX_SHARED_INCLUSIONS])) + + + +if __name__ == "__main__": + main()