1// Copyright 2017 Google Inc. All rights reserved. 2// 3// Licensed under the Apache License, Version 2.0 (the "License"); 4// you may not use this file except in compliance with the License. 5// You may obtain a copy of the License at 6// 7// http://www.apache.org/licenses/LICENSE-2.0 8// 9// Unless required by applicable law or agreed to in writing, software 10// distributed under the License is distributed on an "AS IS" BASIS, 11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12// See the License for the specific language governing permissions and 13// limitations under the License. 14 15package main 16 17import ( 18 "errors" 19 "flag" 20 "fmt" 21 "hash/crc32" 22 "io" 23 "io/ioutil" 24 "log" 25 "os" 26 "path/filepath" 27 "sort" 28 29 "github.com/google/blueprint/pathtools" 30 31 "android/soong/jar" 32 "android/soong/third_party/zip" 33 soongZip "android/soong/zip" 34) 35 36// Input zip: we can open it, close it, and obtain an array of entries 37type InputZip interface { 38 Name() string 39 Open() error 40 Close() error 41 Entries() []*zip.File 42 IsOpen() bool 43} 44 45// An entry that can be written to the output zip 46type ZipEntryContents interface { 47 String() string 48 IsDir() bool 49 CRC32() uint32 50 Size() uint64 51 WriteToZip(dest string, zw *zip.Writer) error 52} 53 54// a ZipEntryFromZip is a ZipEntryContents that pulls its content from another zip 55// identified by the input zip and the index of the entry in its entries array 56type ZipEntryFromZip struct { 57 inputZip InputZip 58 index int 59 name string 60 isDir bool 61 crc32 uint32 62 size uint64 63} 64 65func NewZipEntryFromZip(inputZip InputZip, entryIndex int) *ZipEntryFromZip { 66 fi := inputZip.Entries()[entryIndex] 67 newEntry := ZipEntryFromZip{inputZip: inputZip, 68 index: entryIndex, 69 name: fi.Name, 70 isDir: fi.FileInfo().IsDir(), 71 crc32: fi.CRC32, 72 size: fi.UncompressedSize64, 73 } 74 return &newEntry 75} 76 77func (ze ZipEntryFromZip) String() string { 78 return fmt.Sprintf("%s!%s", ze.inputZip.Name(), ze.name) 79} 80 81func (ze ZipEntryFromZip) IsDir() bool { 82 return ze.isDir 83} 84 85func (ze ZipEntryFromZip) CRC32() uint32 { 86 return ze.crc32 87} 88 89func (ze ZipEntryFromZip) Size() uint64 { 90 return ze.size 91} 92 93func (ze ZipEntryFromZip) WriteToZip(dest string, zw *zip.Writer) error { 94 if err := ze.inputZip.Open(); err != nil { 95 return err 96 } 97 return zw.CopyFrom(ze.inputZip.Entries()[ze.index], dest) 98} 99 100// a ZipEntryFromBuffer is a ZipEntryContents that pulls its content from a []byte 101type ZipEntryFromBuffer struct { 102 fh *zip.FileHeader 103 content []byte 104} 105 106func (be ZipEntryFromBuffer) String() string { 107 return "internal buffer" 108} 109 110func (be ZipEntryFromBuffer) IsDir() bool { 111 return be.fh.FileInfo().IsDir() 112} 113 114func (be ZipEntryFromBuffer) CRC32() uint32 { 115 return crc32.ChecksumIEEE(be.content) 116} 117 118func (be ZipEntryFromBuffer) Size() uint64 { 119 return uint64(len(be.content)) 120} 121 122func (be ZipEntryFromBuffer) WriteToZip(dest string, zw *zip.Writer) error { 123 w, err := zw.CreateHeader(be.fh) 124 if err != nil { 125 return err 126 } 127 128 if !be.IsDir() { 129 _, err = w.Write(be.content) 130 if err != nil { 131 return err 132 } 133 } 134 135 return nil 136} 137 138// Processing state. 139type OutputZip struct { 140 outputWriter *zip.Writer 141 stripDirEntries bool 142 emulateJar bool 143 sortEntries bool 144 ignoreDuplicates bool 145 excludeDirs []string 146 excludeFiles []string 147 sourceByDest map[string]ZipEntryContents 148} 149 150func NewOutputZip(outputWriter *zip.Writer, sortEntries, emulateJar, stripDirEntries, ignoreDuplicates bool) *OutputZip { 151 return &OutputZip{ 152 outputWriter: outputWriter, 153 stripDirEntries: stripDirEntries, 154 emulateJar: emulateJar, 155 sortEntries: sortEntries, 156 sourceByDest: make(map[string]ZipEntryContents, 0), 157 ignoreDuplicates: ignoreDuplicates, 158 } 159} 160 161func (oz *OutputZip) setExcludeDirs(excludeDirs []string) { 162 oz.excludeDirs = make([]string, len(excludeDirs)) 163 for i, dir := range excludeDirs { 164 oz.excludeDirs[i] = filepath.Clean(dir) 165 } 166} 167 168func (oz *OutputZip) setExcludeFiles(excludeFiles []string) { 169 oz.excludeFiles = excludeFiles 170} 171 172// Adds an entry with given name whose source is given ZipEntryContents. Returns old ZipEntryContents 173// if entry with given name already exists. 174func (oz *OutputZip) addZipEntry(name string, source ZipEntryContents) (ZipEntryContents, error) { 175 if existingSource, exists := oz.sourceByDest[name]; exists { 176 return existingSource, nil 177 } 178 oz.sourceByDest[name] = source 179 // Delay writing an entry if entries need to be rearranged. 180 if oz.emulateJar || oz.sortEntries { 181 return nil, nil 182 } 183 return nil, source.WriteToZip(name, oz.outputWriter) 184} 185 186// Adds an entry for the manifest (META-INF/MANIFEST.MF from the given file 187func (oz *OutputZip) addManifest(manifestPath string) error { 188 if !oz.stripDirEntries { 189 if _, err := oz.addZipEntry(jar.MetaDir, ZipEntryFromBuffer{jar.MetaDirFileHeader(), nil}); err != nil { 190 return err 191 } 192 } 193 contents, err := ioutil.ReadFile(manifestPath) 194 if err == nil { 195 fh, buf, err := jar.ManifestFileContents(contents) 196 if err == nil { 197 _, err = oz.addZipEntry(jar.ManifestFile, ZipEntryFromBuffer{fh, buf}) 198 } 199 } 200 return err 201} 202 203// Adds an entry with given name and contents read from given file 204func (oz *OutputZip) addZipEntryFromFile(name string, path string) error { 205 buf, err := ioutil.ReadFile(path) 206 if err == nil { 207 fh := &zip.FileHeader{ 208 Name: name, 209 Method: zip.Store, 210 UncompressedSize64: uint64(len(buf)), 211 } 212 fh.SetMode(0700) 213 fh.SetModTime(jar.DefaultTime) 214 _, err = oz.addZipEntry(name, ZipEntryFromBuffer{fh, buf}) 215 } 216 return err 217} 218 219func (oz *OutputZip) addEmptyEntry(entry string) error { 220 var emptyBuf []byte 221 fh := &zip.FileHeader{ 222 Name: entry, 223 Method: zip.Store, 224 UncompressedSize64: uint64(len(emptyBuf)), 225 } 226 fh.SetMode(0700) 227 fh.SetModTime(jar.DefaultTime) 228 _, err := oz.addZipEntry(entry, ZipEntryFromBuffer{fh, emptyBuf}) 229 return err 230} 231 232// Returns true if given entry is to be excluded 233func (oz *OutputZip) isEntryExcluded(name string) bool { 234 for _, dir := range oz.excludeDirs { 235 dir = filepath.Clean(dir) 236 patterns := []string{ 237 dir + "/", // the directory itself 238 dir + "/**/*", // files recursively in the directory 239 dir + "/**/*/", // directories recursively in the directory 240 } 241 242 for _, pattern := range patterns { 243 match, err := pathtools.Match(pattern, name) 244 if err != nil { 245 panic(fmt.Errorf("%s: %s", err.Error(), pattern)) 246 } 247 if match { 248 if oz.emulateJar { 249 // When merging jar files, don't strip META-INF/MANIFEST.MF even if stripping META-INF is 250 // requested. 251 // TODO(ccross): which files does this affect? 252 if name != jar.MetaDir && name != jar.ManifestFile { 253 return true 254 } 255 } 256 return true 257 } 258 } 259 } 260 261 for _, pattern := range oz.excludeFiles { 262 match, err := pathtools.Match(pattern, name) 263 if err != nil { 264 panic(fmt.Errorf("%s: %s", err.Error(), pattern)) 265 } 266 if match { 267 return true 268 } 269 } 270 return false 271} 272 273// Creates a zip entry whose contents is an entry from the given input zip. 274func (oz *OutputZip) copyEntry(inputZip InputZip, index int) error { 275 entry := NewZipEntryFromZip(inputZip, index) 276 if oz.stripDirEntries && entry.IsDir() { 277 return nil 278 } 279 existingEntry, err := oz.addZipEntry(entry.name, entry) 280 if err != nil { 281 return err 282 } 283 if existingEntry == nil { 284 return nil 285 } 286 287 // File types should match 288 if existingEntry.IsDir() != entry.IsDir() { 289 return fmt.Errorf("Directory/file mismatch at %v from %v and %v\n", 290 entry.name, existingEntry, entry) 291 } 292 293 if oz.ignoreDuplicates || 294 // Skip manifest and module info files that are not from the first input file 295 (oz.emulateJar && entry.name == jar.ManifestFile || entry.name == jar.ModuleInfoClass) || 296 // Identical entries 297 (existingEntry.CRC32() == entry.CRC32() && existingEntry.Size() == entry.Size()) || 298 // Directory entries 299 entry.IsDir() { 300 return nil 301 } 302 303 return fmt.Errorf("Duplicate path %v found in %v and %v\n", entry.name, existingEntry, inputZip.Name()) 304} 305 306func (oz *OutputZip) entriesArray() []string { 307 entries := make([]string, len(oz.sourceByDest)) 308 i := 0 309 for entry := range oz.sourceByDest { 310 entries[i] = entry 311 i++ 312 } 313 return entries 314} 315 316func (oz *OutputZip) jarSorted() []string { 317 entries := oz.entriesArray() 318 sort.SliceStable(entries, func(i, j int) bool { return jar.EntryNamesLess(entries[i], entries[j]) }) 319 return entries 320} 321 322func (oz *OutputZip) alphanumericSorted() []string { 323 entries := oz.entriesArray() 324 sort.Strings(entries) 325 return entries 326} 327 328func (oz *OutputZip) writeEntries(entries []string) error { 329 for _, entry := range entries { 330 source, _ := oz.sourceByDest[entry] 331 if err := source.WriteToZip(entry, oz.outputWriter); err != nil { 332 return err 333 } 334 } 335 return nil 336} 337 338func (oz *OutputZip) getUninitializedPythonPackages(inputZips []InputZip) ([]string, error) { 339 // the runfiles packages needs to be populated with "__init__.py". 340 // the runfiles dirs have been treated as packages. 341 allPackages := make(map[string]bool) 342 initedPackages := make(map[string]bool) 343 getPackage := func(path string) string { 344 ret := filepath.Dir(path) 345 // filepath.Dir("abc") -> "." and filepath.Dir("/abc") -> "/". 346 if ret == "." || ret == "/" { 347 return "" 348 } 349 return ret 350 } 351 352 // put existing __init__.py files to a set first. This set is used for preventing 353 // generated __init__.py files from overwriting existing ones. 354 for _, inputZip := range inputZips { 355 if err := inputZip.Open(); err != nil { 356 return nil, err 357 } 358 for _, file := range inputZip.Entries() { 359 pyPkg := getPackage(file.Name) 360 if filepath.Base(file.Name) == "__init__.py" { 361 if _, found := initedPackages[pyPkg]; found { 362 panic(fmt.Errorf("found __init__.py path duplicates during pars merging: %q", file.Name)) 363 } 364 initedPackages[pyPkg] = true 365 } 366 for pyPkg != "" { 367 if _, found := allPackages[pyPkg]; found { 368 break 369 } 370 allPackages[pyPkg] = true 371 pyPkg = getPackage(pyPkg) 372 } 373 } 374 } 375 noInitPackages := make([]string, 0) 376 for pyPkg := range allPackages { 377 if _, found := initedPackages[pyPkg]; !found { 378 noInitPackages = append(noInitPackages, pyPkg) 379 } 380 } 381 return noInitPackages, nil 382} 383 384// An InputZip owned by the InputZipsManager. Opened ManagedInputZip's are chained in the open order. 385type ManagedInputZip struct { 386 owner *InputZipsManager 387 realInputZip InputZip 388 older *ManagedInputZip 389 newer *ManagedInputZip 390} 391 392// Maintains the array of ManagedInputZips, keeping track of open input ones. When an InputZip is opened, 393// may close some other InputZip to limit the number of open ones. 394type InputZipsManager struct { 395 inputZips []*ManagedInputZip 396 nOpenZips int 397 maxOpenZips int 398 openInputZips *ManagedInputZip 399} 400 401func (miz *ManagedInputZip) unlink() { 402 olderMiz := miz.older 403 newerMiz := miz.newer 404 if newerMiz.older != miz || olderMiz.newer != miz { 405 panic(fmt.Errorf("removing %p:%#v: broken list between %p:%#v and %p:%#v", 406 miz, miz, newerMiz, newerMiz, olderMiz, olderMiz)) 407 } 408 olderMiz.newer = newerMiz 409 newerMiz.older = olderMiz 410 miz.newer = nil 411 miz.older = nil 412} 413 414func (miz *ManagedInputZip) link(olderMiz *ManagedInputZip) { 415 if olderMiz.newer != nil || olderMiz.older != nil { 416 panic(fmt.Errorf("inputZip is already open")) 417 } 418 oldOlderMiz := miz.older 419 if oldOlderMiz.newer != miz { 420 panic(fmt.Errorf("broken list between %p:%#v and %p:%#v", miz, miz, oldOlderMiz, oldOlderMiz)) 421 } 422 miz.older = olderMiz 423 olderMiz.older = oldOlderMiz 424 oldOlderMiz.newer = olderMiz 425 olderMiz.newer = miz 426} 427 428func NewInputZipsManager(nInputZips, maxOpenZips int) *InputZipsManager { 429 if maxOpenZips < 3 { 430 panic(fmt.Errorf("open zips limit should be above 3")) 431 } 432 // In the fake element .older points to the most recently opened InputZip, and .newer points to the oldest. 433 head := new(ManagedInputZip) 434 head.older = head 435 head.newer = head 436 return &InputZipsManager{ 437 inputZips: make([]*ManagedInputZip, 0, nInputZips), 438 maxOpenZips: maxOpenZips, 439 openInputZips: head, 440 } 441} 442 443// InputZip factory 444func (izm *InputZipsManager) Manage(inz InputZip) InputZip { 445 iz := &ManagedInputZip{owner: izm, realInputZip: inz} 446 izm.inputZips = append(izm.inputZips, iz) 447 return iz 448} 449 450// Opens or reopens ManagedInputZip. 451func (izm *InputZipsManager) reopen(miz *ManagedInputZip) error { 452 if miz.realInputZip.IsOpen() { 453 if miz != izm.openInputZips { 454 miz.unlink() 455 izm.openInputZips.link(miz) 456 } 457 return nil 458 } 459 if izm.nOpenZips >= izm.maxOpenZips { 460 if err := izm.close(izm.openInputZips.older); err != nil { 461 return err 462 } 463 } 464 if err := miz.realInputZip.Open(); err != nil { 465 return err 466 } 467 izm.openInputZips.link(miz) 468 izm.nOpenZips++ 469 return nil 470} 471 472func (izm *InputZipsManager) close(miz *ManagedInputZip) error { 473 if miz.IsOpen() { 474 err := miz.realInputZip.Close() 475 izm.nOpenZips-- 476 miz.unlink() 477 return err 478 } 479 return nil 480} 481 482// Checks that openInputZips deque is valid 483func (izm *InputZipsManager) checkOpenZipsDeque() { 484 nReallyOpen := 0 485 el := izm.openInputZips 486 for { 487 elNext := el.older 488 if elNext.newer != el { 489 panic(fmt.Errorf("Element:\n %p: %v\nNext:\n %p %v", el, el, elNext, elNext)) 490 } 491 if elNext == izm.openInputZips { 492 break 493 } 494 el = elNext 495 if !el.IsOpen() { 496 panic(fmt.Errorf("Found unopened element")) 497 } 498 nReallyOpen++ 499 if nReallyOpen > izm.nOpenZips { 500 panic(fmt.Errorf("found %d open zips, should be %d", nReallyOpen, izm.nOpenZips)) 501 } 502 } 503 if nReallyOpen > izm.nOpenZips { 504 panic(fmt.Errorf("found %d open zips, should be %d", nReallyOpen, izm.nOpenZips)) 505 } 506} 507 508func (miz *ManagedInputZip) Name() string { 509 return miz.realInputZip.Name() 510} 511 512func (miz *ManagedInputZip) Open() error { 513 return miz.owner.reopen(miz) 514} 515 516func (miz *ManagedInputZip) Close() error { 517 return miz.owner.close(miz) 518} 519 520func (miz *ManagedInputZip) IsOpen() bool { 521 return miz.realInputZip.IsOpen() 522} 523 524func (miz *ManagedInputZip) Entries() []*zip.File { 525 if !miz.IsOpen() { 526 panic(fmt.Errorf("%s: is not open", miz.Name())) 527 } 528 return miz.realInputZip.Entries() 529} 530 531// Actual processing. 532func mergeZips(inputZips []InputZip, writer *zip.Writer, manifest, pyMain string, 533 sortEntries, emulateJar, emulatePar, stripDirEntries, ignoreDuplicates bool, 534 excludeFiles, excludeDirs []string, zipsToNotStrip map[string]bool) error { 535 536 out := NewOutputZip(writer, sortEntries, emulateJar, stripDirEntries, ignoreDuplicates) 537 out.setExcludeFiles(excludeFiles) 538 out.setExcludeDirs(excludeDirs) 539 if manifest != "" { 540 if err := out.addManifest(manifest); err != nil { 541 return err 542 } 543 } 544 if pyMain != "" { 545 if err := out.addZipEntryFromFile("__main__.py", pyMain); err != nil { 546 return err 547 } 548 } 549 550 if emulatePar { 551 noInitPackages, err := out.getUninitializedPythonPackages(inputZips) 552 if err != nil { 553 return err 554 } 555 for _, uninitializedPyPackage := range noInitPackages { 556 if err = out.addEmptyEntry(filepath.Join(uninitializedPyPackage, "__init__.py")); err != nil { 557 return err 558 } 559 } 560 } 561 562 // Finally, add entries from all the input zips. 563 for _, inputZip := range inputZips { 564 _, copyFully := zipsToNotStrip[inputZip.Name()] 565 if err := inputZip.Open(); err != nil { 566 return err 567 } 568 569 for i, entry := range inputZip.Entries() { 570 if copyFully || !out.isEntryExcluded(entry.Name) { 571 if err := out.copyEntry(inputZip, i); err != nil { 572 return err 573 } 574 } 575 } 576 // Unless we need to rearrange the entries, the input zip can now be closed. 577 if !(emulateJar || sortEntries) { 578 if err := inputZip.Close(); err != nil { 579 return err 580 } 581 } 582 } 583 584 if emulateJar { 585 return out.writeEntries(out.jarSorted()) 586 } else if sortEntries { 587 return out.writeEntries(out.alphanumericSorted()) 588 } 589 return nil 590} 591 592// Process command line 593type fileList []string 594 595func (f *fileList) String() string { 596 return `""` 597} 598 599func (f *fileList) Set(name string) error { 600 *f = append(*f, filepath.Clean(name)) 601 602 return nil 603} 604 605type zipsToNotStripSet map[string]bool 606 607func (s zipsToNotStripSet) String() string { 608 return `""` 609} 610 611func (s zipsToNotStripSet) Set(path string) error { 612 s[path] = true 613 return nil 614} 615 616var ( 617 sortEntries = flag.Bool("s", false, "sort entries (defaults to the order from the input zip files)") 618 emulateJar = flag.Bool("j", false, "sort zip entries using jar ordering (META-INF first)") 619 emulatePar = flag.Bool("p", false, "merge zip entries based on par format") 620 excludeDirs fileList 621 excludeFiles fileList 622 zipsToNotStrip = make(zipsToNotStripSet) 623 stripDirEntries = flag.Bool("D", false, "strip directory entries from the output zip file") 624 manifest = flag.String("m", "", "manifest file to insert in jar") 625 pyMain = flag.String("pm", "", "__main__.py file to insert in par") 626 prefix = flag.String("prefix", "", "A file to prefix to the zip file") 627 ignoreDuplicates = flag.Bool("ignore-duplicates", false, "take each entry from the first zip it exists in and don't warn") 628) 629 630func init() { 631 flag.Var(&excludeDirs, "stripDir", "directories to be excluded from the output zip, accepts wildcards") 632 flag.Var(&excludeFiles, "stripFile", "files to be excluded from the output zip, accepts wildcards") 633 flag.Var(&zipsToNotStrip, "zipToNotStrip", "the input zip file which is not applicable for stripping") 634} 635 636type FileInputZip struct { 637 name string 638 reader *zip.ReadCloser 639} 640 641func (fiz *FileInputZip) Name() string { 642 return fiz.name 643} 644 645func (fiz *FileInputZip) Close() error { 646 if fiz.IsOpen() { 647 reader := fiz.reader 648 fiz.reader = nil 649 return reader.Close() 650 } 651 return nil 652} 653 654func (fiz *FileInputZip) Entries() []*zip.File { 655 if !fiz.IsOpen() { 656 panic(fmt.Errorf("%s: is not open", fiz.Name())) 657 } 658 return fiz.reader.File 659} 660 661func (fiz *FileInputZip) IsOpen() bool { 662 return fiz.reader != nil 663} 664 665func (fiz *FileInputZip) Open() error { 666 if fiz.IsOpen() { 667 return nil 668 } 669 var err error 670 if fiz.reader, err = zip.OpenReader(fiz.Name()); err != nil { 671 return fmt.Errorf("%s: %s", fiz.Name(), err.Error()) 672 } 673 return nil 674} 675 676func main() { 677 flag.Usage = func() { 678 fmt.Fprintln(os.Stderr, "usage: merge_zips [-jpsD] [-m manifest] [--prefix script] [-pm __main__.py] OutputZip [inputs...]") 679 flag.PrintDefaults() 680 } 681 682 // parse args 683 flag.Parse() 684 args := flag.Args() 685 if len(args) < 1 { 686 flag.Usage() 687 os.Exit(1) 688 } 689 outputPath := args[0] 690 inputs := make([]string, 0) 691 for _, input := range args[1:] { 692 if input[0] == '@' { 693 bytes, err := ioutil.ReadFile(input[1:]) 694 if err != nil { 695 log.Fatal(err) 696 } 697 inputs = append(inputs, soongZip.ReadRespFile(bytes)...) 698 continue 699 } 700 inputs = append(inputs, input) 701 continue 702 } 703 704 log.SetFlags(log.Lshortfile) 705 706 // make writer 707 outputZip, err := os.Create(outputPath) 708 if err != nil { 709 log.Fatal(err) 710 } 711 defer outputZip.Close() 712 713 var offset int64 714 if *prefix != "" { 715 prefixFile, err := os.Open(*prefix) 716 if err != nil { 717 log.Fatal(err) 718 } 719 offset, err = io.Copy(outputZip, prefixFile) 720 if err != nil { 721 log.Fatal(err) 722 } 723 } 724 725 writer := zip.NewWriter(outputZip) 726 defer func() { 727 err := writer.Close() 728 if err != nil { 729 log.Fatal(err) 730 } 731 }() 732 writer.SetOffset(offset) 733 734 if *manifest != "" && !*emulateJar { 735 log.Fatal(errors.New("must specify -j when specifying a manifest via -m")) 736 } 737 738 if *pyMain != "" && !*emulatePar { 739 log.Fatal(errors.New("must specify -p when specifying a Python __main__.py via -pm")) 740 } 741 742 // do merge 743 inputZipsManager := NewInputZipsManager(len(inputs), 1000) 744 inputZips := make([]InputZip, len(inputs)) 745 for i, input := range inputs { 746 inputZips[i] = inputZipsManager.Manage(&FileInputZip{name: input}) 747 } 748 err = mergeZips(inputZips, writer, *manifest, *pyMain, *sortEntries, *emulateJar, *emulatePar, 749 *stripDirEntries, *ignoreDuplicates, []string(excludeFiles), []string(excludeDirs), 750 map[string]bool(zipsToNotStrip)) 751 if err != nil { 752 log.Fatal(err) 753 } 754} 755