1// Copyright 2017 Google Inc. All rights reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15package main
16
17import (
18	"errors"
19	"flag"
20	"fmt"
21	"hash/crc32"
22	"io"
23	"io/ioutil"
24	"log"
25	"os"
26	"path/filepath"
27	"sort"
28
29	"github.com/google/blueprint/pathtools"
30
31	"android/soong/jar"
32	"android/soong/third_party/zip"
33	soongZip "android/soong/zip"
34)
35
36// Input zip: we can open it, close it, and obtain an array of entries
37type InputZip interface {
38	Name() string
39	Open() error
40	Close() error
41	Entries() []*zip.File
42	IsOpen() bool
43}
44
45// An entry that can be written to the output zip
46type ZipEntryContents interface {
47	String() string
48	IsDir() bool
49	CRC32() uint32
50	Size() uint64
51	WriteToZip(dest string, zw *zip.Writer) error
52}
53
54// a ZipEntryFromZip is a ZipEntryContents that pulls its content from another zip
55// identified by the input zip and the index of the entry in its entries array
56type ZipEntryFromZip struct {
57	inputZip InputZip
58	index    int
59	name     string
60	isDir    bool
61	crc32    uint32
62	size     uint64
63}
64
65func NewZipEntryFromZip(inputZip InputZip, entryIndex int) *ZipEntryFromZip {
66	fi := inputZip.Entries()[entryIndex]
67	newEntry := ZipEntryFromZip{inputZip: inputZip,
68		index: entryIndex,
69		name:  fi.Name,
70		isDir: fi.FileInfo().IsDir(),
71		crc32: fi.CRC32,
72		size:  fi.UncompressedSize64,
73	}
74	return &newEntry
75}
76
77func (ze ZipEntryFromZip) String() string {
78	return fmt.Sprintf("%s!%s", ze.inputZip.Name(), ze.name)
79}
80
81func (ze ZipEntryFromZip) IsDir() bool {
82	return ze.isDir
83}
84
85func (ze ZipEntryFromZip) CRC32() uint32 {
86	return ze.crc32
87}
88
89func (ze ZipEntryFromZip) Size() uint64 {
90	return ze.size
91}
92
93func (ze ZipEntryFromZip) WriteToZip(dest string, zw *zip.Writer) error {
94	if err := ze.inputZip.Open(); err != nil {
95		return err
96	}
97	return zw.CopyFrom(ze.inputZip.Entries()[ze.index], dest)
98}
99
100// a ZipEntryFromBuffer is a ZipEntryContents that pulls its content from a []byte
101type ZipEntryFromBuffer struct {
102	fh      *zip.FileHeader
103	content []byte
104}
105
106func (be ZipEntryFromBuffer) String() string {
107	return "internal buffer"
108}
109
110func (be ZipEntryFromBuffer) IsDir() bool {
111	return be.fh.FileInfo().IsDir()
112}
113
114func (be ZipEntryFromBuffer) CRC32() uint32 {
115	return crc32.ChecksumIEEE(be.content)
116}
117
118func (be ZipEntryFromBuffer) Size() uint64 {
119	return uint64(len(be.content))
120}
121
122func (be ZipEntryFromBuffer) WriteToZip(dest string, zw *zip.Writer) error {
123	w, err := zw.CreateHeader(be.fh)
124	if err != nil {
125		return err
126	}
127
128	if !be.IsDir() {
129		_, err = w.Write(be.content)
130		if err != nil {
131			return err
132		}
133	}
134
135	return nil
136}
137
138// Processing state.
139type OutputZip struct {
140	outputWriter     *zip.Writer
141	stripDirEntries  bool
142	emulateJar       bool
143	sortEntries      bool
144	ignoreDuplicates bool
145	excludeDirs      []string
146	excludeFiles     []string
147	sourceByDest     map[string]ZipEntryContents
148}
149
150func NewOutputZip(outputWriter *zip.Writer, sortEntries, emulateJar, stripDirEntries, ignoreDuplicates bool) *OutputZip {
151	return &OutputZip{
152		outputWriter:     outputWriter,
153		stripDirEntries:  stripDirEntries,
154		emulateJar:       emulateJar,
155		sortEntries:      sortEntries,
156		sourceByDest:     make(map[string]ZipEntryContents, 0),
157		ignoreDuplicates: ignoreDuplicates,
158	}
159}
160
161func (oz *OutputZip) setExcludeDirs(excludeDirs []string) {
162	oz.excludeDirs = make([]string, len(excludeDirs))
163	for i, dir := range excludeDirs {
164		oz.excludeDirs[i] = filepath.Clean(dir)
165	}
166}
167
168func (oz *OutputZip) setExcludeFiles(excludeFiles []string) {
169	oz.excludeFiles = excludeFiles
170}
171
172// Adds an entry with given name whose source is given ZipEntryContents. Returns old ZipEntryContents
173// if entry with given name already exists.
174func (oz *OutputZip) addZipEntry(name string, source ZipEntryContents) (ZipEntryContents, error) {
175	if existingSource, exists := oz.sourceByDest[name]; exists {
176		return existingSource, nil
177	}
178	oz.sourceByDest[name] = source
179	// Delay writing an entry if entries need to be rearranged.
180	if oz.emulateJar || oz.sortEntries {
181		return nil, nil
182	}
183	return nil, source.WriteToZip(name, oz.outputWriter)
184}
185
186// Adds an entry for the manifest (META-INF/MANIFEST.MF from the given file
187func (oz *OutputZip) addManifest(manifestPath string) error {
188	if !oz.stripDirEntries {
189		if _, err := oz.addZipEntry(jar.MetaDir, ZipEntryFromBuffer{jar.MetaDirFileHeader(), nil}); err != nil {
190			return err
191		}
192	}
193	contents, err := ioutil.ReadFile(manifestPath)
194	if err == nil {
195		fh, buf, err := jar.ManifestFileContents(contents)
196		if err == nil {
197			_, err = oz.addZipEntry(jar.ManifestFile, ZipEntryFromBuffer{fh, buf})
198		}
199	}
200	return err
201}
202
203// Adds an entry with given name and contents read from given file
204func (oz *OutputZip) addZipEntryFromFile(name string, path string) error {
205	buf, err := ioutil.ReadFile(path)
206	if err == nil {
207		fh := &zip.FileHeader{
208			Name:               name,
209			Method:             zip.Store,
210			UncompressedSize64: uint64(len(buf)),
211		}
212		fh.SetMode(0700)
213		fh.SetModTime(jar.DefaultTime)
214		_, err = oz.addZipEntry(name, ZipEntryFromBuffer{fh, buf})
215	}
216	return err
217}
218
219func (oz *OutputZip) addEmptyEntry(entry string) error {
220	var emptyBuf []byte
221	fh := &zip.FileHeader{
222		Name:               entry,
223		Method:             zip.Store,
224		UncompressedSize64: uint64(len(emptyBuf)),
225	}
226	fh.SetMode(0700)
227	fh.SetModTime(jar.DefaultTime)
228	_, err := oz.addZipEntry(entry, ZipEntryFromBuffer{fh, emptyBuf})
229	return err
230}
231
232// Returns true if given entry is to be excluded
233func (oz *OutputZip) isEntryExcluded(name string) bool {
234	for _, dir := range oz.excludeDirs {
235		dir = filepath.Clean(dir)
236		patterns := []string{
237			dir + "/",      // the directory itself
238			dir + "/**/*",  // files recursively in the directory
239			dir + "/**/*/", // directories recursively in the directory
240		}
241
242		for _, pattern := range patterns {
243			match, err := pathtools.Match(pattern, name)
244			if err != nil {
245				panic(fmt.Errorf("%s: %s", err.Error(), pattern))
246			}
247			if match {
248				if oz.emulateJar {
249					// When merging jar files, don't strip META-INF/MANIFEST.MF even if stripping META-INF is
250					// requested.
251					// TODO(ccross): which files does this affect?
252					if name != jar.MetaDir && name != jar.ManifestFile {
253						return true
254					}
255				}
256				return true
257			}
258		}
259	}
260
261	for _, pattern := range oz.excludeFiles {
262		match, err := pathtools.Match(pattern, name)
263		if err != nil {
264			panic(fmt.Errorf("%s: %s", err.Error(), pattern))
265		}
266		if match {
267			return true
268		}
269	}
270	return false
271}
272
273// Creates a zip entry whose contents is an entry from the given input zip.
274func (oz *OutputZip) copyEntry(inputZip InputZip, index int) error {
275	entry := NewZipEntryFromZip(inputZip, index)
276	if oz.stripDirEntries && entry.IsDir() {
277		return nil
278	}
279	existingEntry, err := oz.addZipEntry(entry.name, entry)
280	if err != nil {
281		return err
282	}
283	if existingEntry == nil {
284		return nil
285	}
286
287	// File types should match
288	if existingEntry.IsDir() != entry.IsDir() {
289		return fmt.Errorf("Directory/file mismatch at %v from %v and %v\n",
290			entry.name, existingEntry, entry)
291	}
292
293	if oz.ignoreDuplicates ||
294		// Skip manifest and module info files that are not from the first input file
295		(oz.emulateJar && entry.name == jar.ManifestFile || entry.name == jar.ModuleInfoClass) ||
296		// Identical entries
297		(existingEntry.CRC32() == entry.CRC32() && existingEntry.Size() == entry.Size()) ||
298		// Directory entries
299		entry.IsDir() {
300		return nil
301	}
302
303	return fmt.Errorf("Duplicate path %v found in %v and %v\n", entry.name, existingEntry, inputZip.Name())
304}
305
306func (oz *OutputZip) entriesArray() []string {
307	entries := make([]string, len(oz.sourceByDest))
308	i := 0
309	for entry := range oz.sourceByDest {
310		entries[i] = entry
311		i++
312	}
313	return entries
314}
315
316func (oz *OutputZip) jarSorted() []string {
317	entries := oz.entriesArray()
318	sort.SliceStable(entries, func(i, j int) bool { return jar.EntryNamesLess(entries[i], entries[j]) })
319	return entries
320}
321
322func (oz *OutputZip) alphanumericSorted() []string {
323	entries := oz.entriesArray()
324	sort.Strings(entries)
325	return entries
326}
327
328func (oz *OutputZip) writeEntries(entries []string) error {
329	for _, entry := range entries {
330		source, _ := oz.sourceByDest[entry]
331		if err := source.WriteToZip(entry, oz.outputWriter); err != nil {
332			return err
333		}
334	}
335	return nil
336}
337
338func (oz *OutputZip) getUninitializedPythonPackages(inputZips []InputZip) ([]string, error) {
339	// the runfiles packages needs to be populated with "__init__.py".
340	// the runfiles dirs have been treated as packages.
341	allPackages := make(map[string]bool)
342	initedPackages := make(map[string]bool)
343	getPackage := func(path string) string {
344		ret := filepath.Dir(path)
345		// filepath.Dir("abc") -> "." and filepath.Dir("/abc") -> "/".
346		if ret == "." || ret == "/" {
347			return ""
348		}
349		return ret
350	}
351
352	// put existing __init__.py files to a set first. This set is used for preventing
353	// generated __init__.py files from overwriting existing ones.
354	for _, inputZip := range inputZips {
355		if err := inputZip.Open(); err != nil {
356			return nil, err
357		}
358		for _, file := range inputZip.Entries() {
359			pyPkg := getPackage(file.Name)
360			if filepath.Base(file.Name) == "__init__.py" {
361				if _, found := initedPackages[pyPkg]; found {
362					panic(fmt.Errorf("found __init__.py path duplicates during pars merging: %q", file.Name))
363				}
364				initedPackages[pyPkg] = true
365			}
366			for pyPkg != "" {
367				if _, found := allPackages[pyPkg]; found {
368					break
369				}
370				allPackages[pyPkg] = true
371				pyPkg = getPackage(pyPkg)
372			}
373		}
374	}
375	noInitPackages := make([]string, 0)
376	for pyPkg := range allPackages {
377		if _, found := initedPackages[pyPkg]; !found {
378			noInitPackages = append(noInitPackages, pyPkg)
379		}
380	}
381	return noInitPackages, nil
382}
383
384// An InputZip owned by the InputZipsManager. Opened ManagedInputZip's are chained in the open order.
385type ManagedInputZip struct {
386	owner        *InputZipsManager
387	realInputZip InputZip
388	older        *ManagedInputZip
389	newer        *ManagedInputZip
390}
391
392// Maintains the array of ManagedInputZips, keeping track of open input ones. When an InputZip is opened,
393// may close some other InputZip to limit the number of open ones.
394type InputZipsManager struct {
395	inputZips     []*ManagedInputZip
396	nOpenZips     int
397	maxOpenZips   int
398	openInputZips *ManagedInputZip
399}
400
401func (miz *ManagedInputZip) unlink() {
402	olderMiz := miz.older
403	newerMiz := miz.newer
404	if newerMiz.older != miz || olderMiz.newer != miz {
405		panic(fmt.Errorf("removing %p:%#v: broken list between %p:%#v and %p:%#v",
406			miz, miz, newerMiz, newerMiz, olderMiz, olderMiz))
407	}
408	olderMiz.newer = newerMiz
409	newerMiz.older = olderMiz
410	miz.newer = nil
411	miz.older = nil
412}
413
414func (miz *ManagedInputZip) link(olderMiz *ManagedInputZip) {
415	if olderMiz.newer != nil || olderMiz.older != nil {
416		panic(fmt.Errorf("inputZip is already open"))
417	}
418	oldOlderMiz := miz.older
419	if oldOlderMiz.newer != miz {
420		panic(fmt.Errorf("broken list between %p:%#v and %p:%#v", miz, miz, oldOlderMiz, oldOlderMiz))
421	}
422	miz.older = olderMiz
423	olderMiz.older = oldOlderMiz
424	oldOlderMiz.newer = olderMiz
425	olderMiz.newer = miz
426}
427
428func NewInputZipsManager(nInputZips, maxOpenZips int) *InputZipsManager {
429	if maxOpenZips < 3 {
430		panic(fmt.Errorf("open zips limit should be above 3"))
431	}
432	// In the fake element .older points to the most recently opened InputZip, and .newer points to the oldest.
433	head := new(ManagedInputZip)
434	head.older = head
435	head.newer = head
436	return &InputZipsManager{
437		inputZips:     make([]*ManagedInputZip, 0, nInputZips),
438		maxOpenZips:   maxOpenZips,
439		openInputZips: head,
440	}
441}
442
443// InputZip factory
444func (izm *InputZipsManager) Manage(inz InputZip) InputZip {
445	iz := &ManagedInputZip{owner: izm, realInputZip: inz}
446	izm.inputZips = append(izm.inputZips, iz)
447	return iz
448}
449
450// Opens or reopens ManagedInputZip.
451func (izm *InputZipsManager) reopen(miz *ManagedInputZip) error {
452	if miz.realInputZip.IsOpen() {
453		if miz != izm.openInputZips {
454			miz.unlink()
455			izm.openInputZips.link(miz)
456		}
457		return nil
458	}
459	if izm.nOpenZips >= izm.maxOpenZips {
460		if err := izm.close(izm.openInputZips.older); err != nil {
461			return err
462		}
463	}
464	if err := miz.realInputZip.Open(); err != nil {
465		return err
466	}
467	izm.openInputZips.link(miz)
468	izm.nOpenZips++
469	return nil
470}
471
472func (izm *InputZipsManager) close(miz *ManagedInputZip) error {
473	if miz.IsOpen() {
474		err := miz.realInputZip.Close()
475		izm.nOpenZips--
476		miz.unlink()
477		return err
478	}
479	return nil
480}
481
482// Checks that openInputZips deque is valid
483func (izm *InputZipsManager) checkOpenZipsDeque() {
484	nReallyOpen := 0
485	el := izm.openInputZips
486	for {
487		elNext := el.older
488		if elNext.newer != el {
489			panic(fmt.Errorf("Element:\n  %p: %v\nNext:\n  %p %v", el, el, elNext, elNext))
490		}
491		if elNext == izm.openInputZips {
492			break
493		}
494		el = elNext
495		if !el.IsOpen() {
496			panic(fmt.Errorf("Found unopened element"))
497		}
498		nReallyOpen++
499		if nReallyOpen > izm.nOpenZips {
500			panic(fmt.Errorf("found %d open zips, should be %d", nReallyOpen, izm.nOpenZips))
501		}
502	}
503	if nReallyOpen > izm.nOpenZips {
504		panic(fmt.Errorf("found %d open zips, should be %d", nReallyOpen, izm.nOpenZips))
505	}
506}
507
508func (miz *ManagedInputZip) Name() string {
509	return miz.realInputZip.Name()
510}
511
512func (miz *ManagedInputZip) Open() error {
513	return miz.owner.reopen(miz)
514}
515
516func (miz *ManagedInputZip) Close() error {
517	return miz.owner.close(miz)
518}
519
520func (miz *ManagedInputZip) IsOpen() bool {
521	return miz.realInputZip.IsOpen()
522}
523
524func (miz *ManagedInputZip) Entries() []*zip.File {
525	if !miz.IsOpen() {
526		panic(fmt.Errorf("%s: is not open", miz.Name()))
527	}
528	return miz.realInputZip.Entries()
529}
530
531// Actual processing.
532func mergeZips(inputZips []InputZip, writer *zip.Writer, manifest, pyMain string,
533	sortEntries, emulateJar, emulatePar, stripDirEntries, ignoreDuplicates bool,
534	excludeFiles, excludeDirs []string, zipsToNotStrip map[string]bool) error {
535
536	out := NewOutputZip(writer, sortEntries, emulateJar, stripDirEntries, ignoreDuplicates)
537	out.setExcludeFiles(excludeFiles)
538	out.setExcludeDirs(excludeDirs)
539	if manifest != "" {
540		if err := out.addManifest(manifest); err != nil {
541			return err
542		}
543	}
544	if pyMain != "" {
545		if err := out.addZipEntryFromFile("__main__.py", pyMain); err != nil {
546			return err
547		}
548	}
549
550	if emulatePar {
551		noInitPackages, err := out.getUninitializedPythonPackages(inputZips)
552		if err != nil {
553			return err
554		}
555		for _, uninitializedPyPackage := range noInitPackages {
556			if err = out.addEmptyEntry(filepath.Join(uninitializedPyPackage, "__init__.py")); err != nil {
557				return err
558			}
559		}
560	}
561
562	// Finally, add entries from all the input zips.
563	for _, inputZip := range inputZips {
564		_, copyFully := zipsToNotStrip[inputZip.Name()]
565		if err := inputZip.Open(); err != nil {
566			return err
567		}
568
569		for i, entry := range inputZip.Entries() {
570			if copyFully || !out.isEntryExcluded(entry.Name) {
571				if err := out.copyEntry(inputZip, i); err != nil {
572					return err
573				}
574			}
575		}
576		// Unless we need to rearrange the entries, the input zip can now be closed.
577		if !(emulateJar || sortEntries) {
578			if err := inputZip.Close(); err != nil {
579				return err
580			}
581		}
582	}
583
584	if emulateJar {
585		return out.writeEntries(out.jarSorted())
586	} else if sortEntries {
587		return out.writeEntries(out.alphanumericSorted())
588	}
589	return nil
590}
591
592// Process command line
593type fileList []string
594
595func (f *fileList) String() string {
596	return `""`
597}
598
599func (f *fileList) Set(name string) error {
600	*f = append(*f, filepath.Clean(name))
601
602	return nil
603}
604
605type zipsToNotStripSet map[string]bool
606
607func (s zipsToNotStripSet) String() string {
608	return `""`
609}
610
611func (s zipsToNotStripSet) Set(path string) error {
612	s[path] = true
613	return nil
614}
615
616var (
617	sortEntries      = flag.Bool("s", false, "sort entries (defaults to the order from the input zip files)")
618	emulateJar       = flag.Bool("j", false, "sort zip entries using jar ordering (META-INF first)")
619	emulatePar       = flag.Bool("p", false, "merge zip entries based on par format")
620	excludeDirs      fileList
621	excludeFiles     fileList
622	zipsToNotStrip   = make(zipsToNotStripSet)
623	stripDirEntries  = flag.Bool("D", false, "strip directory entries from the output zip file")
624	manifest         = flag.String("m", "", "manifest file to insert in jar")
625	pyMain           = flag.String("pm", "", "__main__.py file to insert in par")
626	prefix           = flag.String("prefix", "", "A file to prefix to the zip file")
627	ignoreDuplicates = flag.Bool("ignore-duplicates", false, "take each entry from the first zip it exists in and don't warn")
628)
629
630func init() {
631	flag.Var(&excludeDirs, "stripDir", "directories to be excluded from the output zip, accepts wildcards")
632	flag.Var(&excludeFiles, "stripFile", "files to be excluded from the output zip, accepts wildcards")
633	flag.Var(&zipsToNotStrip, "zipToNotStrip", "the input zip file which is not applicable for stripping")
634}
635
636type FileInputZip struct {
637	name   string
638	reader *zip.ReadCloser
639}
640
641func (fiz *FileInputZip) Name() string {
642	return fiz.name
643}
644
645func (fiz *FileInputZip) Close() error {
646	if fiz.IsOpen() {
647		reader := fiz.reader
648		fiz.reader = nil
649		return reader.Close()
650	}
651	return nil
652}
653
654func (fiz *FileInputZip) Entries() []*zip.File {
655	if !fiz.IsOpen() {
656		panic(fmt.Errorf("%s: is not open", fiz.Name()))
657	}
658	return fiz.reader.File
659}
660
661func (fiz *FileInputZip) IsOpen() bool {
662	return fiz.reader != nil
663}
664
665func (fiz *FileInputZip) Open() error {
666	if fiz.IsOpen() {
667		return nil
668	}
669	var err error
670	if fiz.reader, err = zip.OpenReader(fiz.Name()); err != nil {
671		return fmt.Errorf("%s: %s", fiz.Name(), err.Error())
672	}
673	return nil
674}
675
676func main() {
677	flag.Usage = func() {
678		fmt.Fprintln(os.Stderr, "usage: merge_zips [-jpsD] [-m manifest] [--prefix script] [-pm __main__.py] OutputZip [inputs...]")
679		flag.PrintDefaults()
680	}
681
682	// parse args
683	flag.Parse()
684	args := flag.Args()
685	if len(args) < 1 {
686		flag.Usage()
687		os.Exit(1)
688	}
689	outputPath := args[0]
690	inputs := make([]string, 0)
691	for _, input := range args[1:] {
692		if input[0] == '@' {
693			bytes, err := ioutil.ReadFile(input[1:])
694			if err != nil {
695				log.Fatal(err)
696			}
697			inputs = append(inputs, soongZip.ReadRespFile(bytes)...)
698			continue
699		}
700		inputs = append(inputs, input)
701		continue
702	}
703
704	log.SetFlags(log.Lshortfile)
705
706	// make writer
707	outputZip, err := os.Create(outputPath)
708	if err != nil {
709		log.Fatal(err)
710	}
711	defer outputZip.Close()
712
713	var offset int64
714	if *prefix != "" {
715		prefixFile, err := os.Open(*prefix)
716		if err != nil {
717			log.Fatal(err)
718		}
719		offset, err = io.Copy(outputZip, prefixFile)
720		if err != nil {
721			log.Fatal(err)
722		}
723	}
724
725	writer := zip.NewWriter(outputZip)
726	defer func() {
727		err := writer.Close()
728		if err != nil {
729			log.Fatal(err)
730		}
731	}()
732	writer.SetOffset(offset)
733
734	if *manifest != "" && !*emulateJar {
735		log.Fatal(errors.New("must specify -j when specifying a manifest via -m"))
736	}
737
738	if *pyMain != "" && !*emulatePar {
739		log.Fatal(errors.New("must specify -p when specifying a Python __main__.py via -pm"))
740	}
741
742	// do merge
743	inputZipsManager := NewInputZipsManager(len(inputs), 1000)
744	inputZips := make([]InputZip, len(inputs))
745	for i, input := range inputs {
746		inputZips[i] = inputZipsManager.Manage(&FileInputZip{name: input})
747	}
748	err = mergeZips(inputZips, writer, *manifest, *pyMain, *sortEntries, *emulateJar, *emulatePar,
749		*stripDirEntries, *ignoreDuplicates, []string(excludeFiles), []string(excludeDirs),
750		map[string]bool(zipsToNotStrip))
751	if err != nil {
752		log.Fatal(err)
753	}
754}
755