Split large files for BBOTA v3.
For BBOTA v3, we need to stash source blocks to support resumable feature. However, with the growth of file size and the shrink of the cache size, source blocks that represent a file are too large to be stashed as a whole. CL in [1] solves the issue by replacing the diff command with a "new" command. However, it may increase the generated package size substantially (e.g. from ~100MB to ~400MB). With this CL, if a file spans too many blocks, we split it into smaller pieces by generating multiple commands. For the same case above, it reduces the package size to ~150MB. One potential downside is that after splitting, files like .jar, .apk and .zip can no longer use imgdiff. We may lose the potential benefit of using imgdiff for patch size reduction. [1] commit82c47981bdBug: 22430577 Change-Id: Iee1ad6543f3d40368e079e418cc31728e1ab3f48 (cherry picked from commit9a5caf2c30)
This commit is contained in:
		| @@ -297,7 +297,6 @@ class BlockImageDiff(object): | ||||
|     out = [] | ||||
|  | ||||
|     total = 0 | ||||
|     performs_read = False | ||||
|  | ||||
|     stashes = {} | ||||
|     stashed_blocks = 0 | ||||
| @@ -409,7 +408,6 @@ class BlockImageDiff(object): | ||||
|         out.append("%s %s\n" % (xf.style, xf.tgt_ranges.to_string_raw())) | ||||
|         total += tgt_size | ||||
|       elif xf.style == "move": | ||||
|         performs_read = True | ||||
|         assert xf.tgt_ranges | ||||
|         assert xf.src_ranges.size() == tgt_size | ||||
|         if xf.src_ranges != xf.tgt_ranges: | ||||
| @@ -434,7 +432,6 @@ class BlockImageDiff(object): | ||||
|                 xf.tgt_ranges.to_string_raw(), src_str)) | ||||
|           total += tgt_size | ||||
|       elif xf.style in ("bsdiff", "imgdiff"): | ||||
|         performs_read = True | ||||
|         assert xf.tgt_ranges | ||||
|         assert xf.src_ranges | ||||
|         if self.version == 1: | ||||
| @@ -539,6 +536,7 @@ class BlockImageDiff(object): | ||||
|     max_allowed = cache_size * stash_threshold / self.tgt.blocksize | ||||
|  | ||||
|     stashed_blocks = 0 | ||||
|     new_blocks = 0 | ||||
|  | ||||
|     # Now go through all the commands. Compute the required stash size on the | ||||
|     # fly. If a command requires excess stash than available, it deletes the | ||||
| @@ -554,8 +552,7 @@ class BlockImageDiff(object): | ||||
|           # that will use this stash and replace the command with "new". | ||||
|           use_cmd = stashes[idx][2] | ||||
|           replaced_cmds.append(use_cmd) | ||||
|           print("  %s replaced due to an explicit stash of %d blocks." % ( | ||||
|               use_cmd, sr.size())) | ||||
|           print("%10d  %9s  %s" % (sr.size(), "explicit", use_cmd)) | ||||
|         else: | ||||
|           stashed_blocks += sr.size() | ||||
|  | ||||
| @@ -570,8 +567,7 @@ class BlockImageDiff(object): | ||||
|         if xf.src_ranges.overlaps(xf.tgt_ranges): | ||||
|           if stashed_blocks + xf.src_ranges.size() > max_allowed: | ||||
|             replaced_cmds.append(xf) | ||||
|             print("  %s replaced due to an implicit stash of %d blocks." % ( | ||||
|                 xf, xf.src_ranges.size())) | ||||
|             print("%10d  %9s  %s" % (xf.src_ranges.size(), "implicit", xf)) | ||||
|  | ||||
|       # Replace the commands in replaced_cmds with "new"s. | ||||
|       for cmd in replaced_cmds: | ||||
| @@ -581,9 +577,13 @@ class BlockImageDiff(object): | ||||
|           def_cmd = stashes[idx][1] | ||||
|           assert (idx, sr) in def_cmd.stash_before | ||||
|           def_cmd.stash_before.remove((idx, sr)) | ||||
|           new_blocks += sr.size() | ||||
|  | ||||
|         cmd.ConvertToNew() | ||||
|  | ||||
|     print("  Total %d blocks are packed as new blocks due to insufficient " | ||||
|           "cache size." % (new_blocks,)) | ||||
|  | ||||
|   def ComputePatches(self, prefix): | ||||
|     print("Reticulating splines...") | ||||
|     diff_q = [] | ||||
| @@ -939,6 +939,57 @@ class BlockImageDiff(object): | ||||
|           a.goes_after[b] = size | ||||
|  | ||||
|   def FindTransfers(self): | ||||
|     """Parse the file_map to generate all the transfers.""" | ||||
|  | ||||
|     def AddTransfer(tgt_name, src_name, tgt_ranges, src_ranges, style, by_id, | ||||
|                     split=False): | ||||
|       """Wrapper function for adding a Transfer(). | ||||
|  | ||||
|       For BBOTA v3, we need to stash source blocks for resumable feature. | ||||
|       However, with the growth of file size and the shrink of the cache | ||||
|       partition source blocks are too large to be stashed. If a file occupies | ||||
|       too many blocks (greater than MAX_BLOCKS_PER_DIFF_TRANSFER), we split it | ||||
|       into smaller pieces by getting multiple Transfer()s. | ||||
|  | ||||
|       The downside is that after splitting, we can no longer use imgdiff but | ||||
|       only bsdiff.""" | ||||
|  | ||||
|       MAX_BLOCKS_PER_DIFF_TRANSFER = 1024 | ||||
|  | ||||
|       # We care about diff transfers only. | ||||
|       if style != "diff" or not split: | ||||
|         Transfer(tgt_name, src_name, tgt_ranges, src_ranges, style, by_id) | ||||
|         return | ||||
|  | ||||
|       # Change nothing for small files. | ||||
|       if (tgt_ranges.size() <= MAX_BLOCKS_PER_DIFF_TRANSFER and | ||||
|           src_ranges.size() <= MAX_BLOCKS_PER_DIFF_TRANSFER): | ||||
|         Transfer(tgt_name, src_name, tgt_ranges, src_ranges, style, by_id) | ||||
|         return | ||||
|  | ||||
|       pieces = 0 | ||||
|       while (tgt_ranges.size() > MAX_BLOCKS_PER_DIFF_TRANSFER and | ||||
|              src_ranges.size() > MAX_BLOCKS_PER_DIFF_TRANSFER): | ||||
|         tgt_split_name = "%s-%d" % (tgt_name, pieces) | ||||
|         src_split_name = "%s-%d" % (src_name, pieces) | ||||
|         tgt_first = tgt_ranges.first(MAX_BLOCKS_PER_DIFF_TRANSFER) | ||||
|         src_first = src_ranges.first(MAX_BLOCKS_PER_DIFF_TRANSFER) | ||||
|         Transfer(tgt_split_name, src_split_name, tgt_first, src_first, style, | ||||
|                  by_id) | ||||
|  | ||||
|         tgt_ranges = tgt_ranges.subtract(tgt_first) | ||||
|         src_ranges = src_ranges.subtract(src_first) | ||||
|         pieces += 1 | ||||
|  | ||||
|       # Handle remaining blocks. | ||||
|       if tgt_ranges.size() or src_ranges.size(): | ||||
|         # Must be both non-empty. | ||||
|         assert tgt_ranges.size() and src_ranges.size() | ||||
|         tgt_split_name = "%s-%d" % (tgt_name, pieces) | ||||
|         src_split_name = "%s-%d" % (src_name, pieces) | ||||
|         Transfer(tgt_split_name, src_split_name, tgt_ranges, src_ranges, style, | ||||
|                  by_id) | ||||
|  | ||||
|     empty = RangeSet() | ||||
|     for tgt_fn, tgt_ranges in self.tgt.file_map.items(): | ||||
|       if tgt_fn == "__ZERO": | ||||
| @@ -946,28 +997,28 @@ class BlockImageDiff(object): | ||||
|         # in any file and that are filled with zeros.  We have a | ||||
|         # special transfer style for zero blocks. | ||||
|         src_ranges = self.src.file_map.get("__ZERO", empty) | ||||
|         Transfer(tgt_fn, "__ZERO", tgt_ranges, src_ranges, | ||||
|         AddTransfer(tgt_fn, "__ZERO", tgt_ranges, src_ranges, | ||||
|                     "zero", self.transfers) | ||||
|         continue | ||||
|  | ||||
|       elif tgt_fn == "__COPY": | ||||
|         # "__COPY" domain includes all the blocks not contained in any | ||||
|         # file and that need to be copied unconditionally to the target. | ||||
|         Transfer(tgt_fn, None, tgt_ranges, empty, "new", self.transfers) | ||||
|         AddTransfer(tgt_fn, None, tgt_ranges, empty, "new", self.transfers) | ||||
|         continue | ||||
|  | ||||
|       elif tgt_fn in self.src.file_map: | ||||
|         # Look for an exact pathname match in the source. | ||||
|         Transfer(tgt_fn, tgt_fn, tgt_ranges, self.src.file_map[tgt_fn], | ||||
|                  "diff", self.transfers) | ||||
|         AddTransfer(tgt_fn, tgt_fn, tgt_ranges, self.src.file_map[tgt_fn], | ||||
|                     "diff", self.transfers, self.version >= 3) | ||||
|         continue | ||||
|  | ||||
|       b = os.path.basename(tgt_fn) | ||||
|       if b in self.src_basenames: | ||||
|         # Look for an exact basename match in the source. | ||||
|         src_fn = self.src_basenames[b] | ||||
|         Transfer(tgt_fn, src_fn, tgt_ranges, self.src.file_map[src_fn], | ||||
|                  "diff", self.transfers) | ||||
|         AddTransfer(tgt_fn, src_fn, tgt_ranges, self.src.file_map[src_fn], | ||||
|                     "diff", self.transfers, self.version >= 3) | ||||
|         continue | ||||
|  | ||||
|       b = re.sub("[0-9]+", "#", b) | ||||
| @@ -977,11 +1028,11 @@ class BlockImageDiff(object): | ||||
|         # for .so files that contain version numbers in the filename | ||||
|         # that get bumped.) | ||||
|         src_fn = self.src_numpatterns[b] | ||||
|         Transfer(tgt_fn, src_fn, tgt_ranges, self.src.file_map[src_fn], | ||||
|                  "diff", self.transfers) | ||||
|         AddTransfer(tgt_fn, src_fn, tgt_ranges, self.src.file_map[src_fn], | ||||
|                     "diff", self.transfers, self.version >= 3) | ||||
|         continue | ||||
|  | ||||
|       Transfer(tgt_fn, None, tgt_ranges, empty, "new", self.transfers) | ||||
|       AddTransfer(tgt_fn, None, tgt_ranges, empty, "new", self.transfers) | ||||
|  | ||||
|   def AbbreviateSourceNames(self): | ||||
|     for k in self.src.file_map.keys(): | ||||
|   | ||||
| @@ -24,6 +24,7 @@ class RangeSet(object): | ||||
|   lots of runs.""" | ||||
|  | ||||
|   def __init__(self, data=None): | ||||
|     # TODO(tbao): monotonic is broken when passing in a tuple. | ||||
|     self.monotonic = False | ||||
|     if isinstance(data, str): | ||||
|       self._parse_internal(data) | ||||
| @@ -260,6 +261,38 @@ class RangeSet(object): | ||||
|       out = out.union(RangeSet(str(s1) + "-" + str(e1-1))) | ||||
|     return out | ||||
|  | ||||
|   def first(self, n): | ||||
|     """Return the RangeSet that contains at most the first 'n' integers. | ||||
|  | ||||
|     >>> RangeSet("0-9").first(1) | ||||
|     <RangeSet("0")> | ||||
|     >>> RangeSet("10-19").first(5) | ||||
|     <RangeSet("10-14")> | ||||
|     >>> RangeSet("10-19").first(15) | ||||
|     <RangeSet("10-19")> | ||||
|     >>> RangeSet("10-19 30-39").first(3) | ||||
|     <RangeSet("10-12")> | ||||
|     >>> RangeSet("10-19 30-39").first(15) | ||||
|     <RangeSet("10-19 30-34")> | ||||
|     >>> RangeSet("10-19 30-39").first(30) | ||||
|     <RangeSet("10-19 30-39")> | ||||
|     >>> RangeSet("0-9").first(0) | ||||
|     <RangeSet("")> | ||||
|     """ | ||||
|  | ||||
|     if self.size() <= n: | ||||
|       return self | ||||
|  | ||||
|     out = [] | ||||
|     for s, e in self: | ||||
|       if e - s >= n: | ||||
|         out += (s, s+n) | ||||
|         break | ||||
|       else: | ||||
|         out += (s, e) | ||||
|         n -= e - s | ||||
|     return RangeSet(data=out) | ||||
|  | ||||
|  | ||||
| if __name__ == "__main__": | ||||
|   import doctest | ||||
|   | ||||
		Reference in New Issue
	
	Block a user