From 2f5f16d574d522bc5f4f81ee7e1a9cc981340a23 Mon Sep 17 00:00:00 2001 From: Yi Kong Date: Wed, 23 Sep 2020 00:54:50 +0800 Subject: [PATCH] Tweak ThinLTO inling heuristics in absense of PGO profile We previously disabled inlining and unrolling completely during ThinLTO in absense of PGO profile. For global ThinLTO, we want to better balance binary size and performance. We evaluated a number of combination of heuristics with global ThinLTO configuration: binary size change no LTO baseline no inline, no unroll -0.54% no inline, unroll -0.50% import-instr-limit=5, unroll +0.02% import-instr-limit=10, unroll +0.13% Loop unrolling does not contribute much to the binary size, therefore it is re-enabled. import-instr-limit=5 balances the binary size savings from ThinLTO and size incrase due to additional optimisation. Bug: 78485207 Bug: 169004486 Test: TreeHugger Change-Id: I1c21153605e2ae42daa8857b06e27c081ee8ad85 --- cc/lto.go | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/cc/lto.go b/cc/lto.go index e03433731..d1903b893 100644 --- a/cc/lto.go +++ b/cc/lto.go @@ -117,12 +117,11 @@ func (lto *lto) flags(ctx BaseModuleContext, flags Flags) Flags { flags.Local.LdFlags = append(flags.Local.LdFlags, cachePolicyFormat+policy) } - // If the module does not have a profile, be conservative and do not inline - // or unroll loops during LTO, in order to prevent significant size bloat. + // If the module does not have a profile, be conservative and limit cross TU inline + // limit to 5 LLVM IR instructions, to balance binary size increase and performance. if !ctx.isPgoCompile() { flags.Local.LdFlags = append(flags.Local.LdFlags, - "-Wl,-plugin-opt,-inline-threshold=0", - "-Wl,-plugin-opt,-unroll-threshold=0") + "-Wl,-plugin-opt,-import-instr-limit=5") } } return flags