soong_jar: Parallel compression

This compresses multiple files in parallel, and will split up larger
files (5MB+) into smaller chunks (1MB) to compress in parallel.

There is a small size overhead to recombine the chunks, but it's only a
few bytes per chunk, so for a 1MB chunk, it's minimal.

Rough numbers, with everything in the page cache, this can compress
~4GB (1000 files) down to 1GB in 6.5 seconds, instead of 120 seconds with
the non-parallel soong_jar and 150 seconds with zip.

Go's DEFLATE algorithm is still a bit worse than zip's -- about 3.5%
larger file sizes, but for most of our "dist" targets that is fine.

Change-Id: Ie4886c7d0f954ace46e599156e35fea7e74d6dd7
This commit is contained in:
Dan Willemsen
2016-08-04 15:43:03 -07:00
parent 3bf1a08505
commit 017d8937fa
4 changed files with 635 additions and 42 deletions

View File

@@ -15,6 +15,7 @@
package zip
import (
"errors"
"io"
)
@@ -68,3 +69,108 @@ func (w *Writer) CopyFrom(orig *File, newName string) error {
_, err = w.cw.Write(buf)
return err
}
// CreateCompressedHeader adds a file to the zip file using the provied
// FileHeader for the file metadata.
// It returns a Writer to which the already compressed file contents
// should be written.
//
// The UncompressedSize64 and CRC32 entries in the FileHeader must be filled
// out already.
//
// The file's contents must be written to the io.Writer before the next
// call to Create, CreateHeader, CreateCompressedHeader, or Close. The
// provided FileHeader fh must not be modified after a call to
// CreateCompressedHeader
func (w *Writer) CreateCompressedHeader(fh *FileHeader) (io.WriteCloser, error) {
if w.last != nil && !w.last.closed {
if err := w.last.close(); err != nil {
return nil, err
}
}
if len(w.dir) > 0 && w.dir[len(w.dir)-1].FileHeader == fh {
// See https://golang.org/issue/11144 confusion.
return nil, errors.New("archive/zip: invalid duplicate FileHeader")
}
fh.Flags |= 0x8 // we will write a data descriptor
fh.CreatorVersion = fh.CreatorVersion&0xff00 | zipVersion20 // preserve compatibility byte
fh.ReaderVersion = zipVersion20
fw := &compressedFileWriter{
fileWriter{
zipw: w.cw,
compCount: &countWriter{w: w.cw},
},
}
h := &header{
FileHeader: fh,
offset: uint64(w.cw.count),
}
w.dir = append(w.dir, h)
fw.header = h
if err := writeHeader(w.cw, fh); err != nil {
return nil, err
}
w.last = &fw.fileWriter
return fw, nil
}
type compressedFileWriter struct {
fileWriter
}
func (w *compressedFileWriter) Write(p []byte) (int, error) {
if w.closed {
return 0, errors.New("zip: write to closed file")
}
return w.compCount.Write(p)
}
func (w *compressedFileWriter) Close() error {
if w.closed {
return errors.New("zip: file closed twice")
}
w.closed = true
// update FileHeader
fh := w.header.FileHeader
fh.CompressedSize64 = uint64(w.compCount.count)
if fh.isZip64() {
fh.CompressedSize = uint32max
fh.UncompressedSize = uint32max
fh.ReaderVersion = zipVersion45 // requires 4.5 - File uses ZIP64 format extensions
} else {
fh.CompressedSize = uint32(fh.CompressedSize64)
fh.UncompressedSize = uint32(fh.UncompressedSize64)
}
// Write data descriptor. This is more complicated than one would
// think, see e.g. comments in zipfile.c:putextended() and
// http://bugs.sun.com/bugdatabase/view_bug.do?bug_id=7073588.
// The approach here is to write 8 byte sizes if needed without
// adding a zip64 extra in the local header (too late anyway).
var buf []byte
if fh.isZip64() {
buf = make([]byte, dataDescriptor64Len)
} else {
buf = make([]byte, dataDescriptorLen)
}
b := writeBuf(buf)
b.uint32(dataDescriptorSignature) // de-facto standard, required by OS X
b.uint32(fh.CRC32)
if fh.isZip64() {
b.uint64(fh.CompressedSize64)
b.uint64(fh.UncompressedSize64)
} else {
b.uint32(fh.CompressedSize)
b.uint32(fh.UncompressedSize)
}
_, err := w.zipw.Write(buf)
return err
}