1 /*
2  * Copyright (C) 2013 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #pragma once
18 
19 /*
20  * Read-only access to Zip archives, with minimal heap allocation.
21  */
22 
23 #include <stdint.h>
24 #include <string.h>
25 #include <sys/cdefs.h>
26 #include <sys/types.h>
27 
28 #include <functional>
29 #include <string>
30 #include <string_view>
31 
32 #include "android-base/off64_t.h"
33 
34 /* Zip compression methods we support */
35 enum {
36   kCompressStored = 0,    // no compression
37   kCompressDeflated = 8,  // standard deflate
38 };
39 
40 // This struct holds the common information of a zip entry other than the
41 // the entry size. The compressed and uncompressed length will be handled
42 // separately in the derived class.
43 struct ZipEntryCommon {
44   // Compression method. One of kCompressStored or kCompressDeflated.
45   // See also `gpbf` for deflate subtypes.
46   uint16_t method;
47 
48   // Modification time. The zipfile format specifies
49   // that the first two little endian bytes contain the time
50   // and the last two little endian bytes contain the date.
51   // See `GetModificationTime`. Use signed integer to avoid the
52   // sub-overflow.
53   // TODO: should be overridden by extra time field, if present.
54   int32_t mod_time;
55 
56   // Returns `mod_time` as a broken-down struct tm.
57   struct tm GetModificationTime() const;
58 
59   // Suggested Unix mode for this entry, from the zip archive if created on
60   // Unix, or a default otherwise. See also `external_file_attributes`.
61   mode_t unix_mode;
62 
63   // 1 if this entry contains a data descriptor segment, 0
64   // otherwise.
65   uint8_t has_data_descriptor;
66 
67   // Crc32 value of this ZipEntry. This information might
68   // either be stored in the local file header or in a special
69   // Data descriptor footer at the end of the file entry.
70   uint32_t crc32;
71 
72   // If the value of uncompressed length and compressed length are stored in
73   // the zip64 extended info of the extra field.
74   bool zip64_format_size{false};
75 
76   // The offset to the start of data for this ZipEntry.
77   off64_t offset;
78 
79   // The version of zip and the host file system this came from (for zipinfo).
80   uint16_t version_made_by;
81 
82   // The raw attributes, whose interpretation depends on the host
83   // file system in `version_made_by` (for zipinfo). See also `unix_mode`.
84   uint32_t external_file_attributes;
85 
86   // Specifics about the deflation (for zipinfo).
87   uint16_t gpbf;
88   // Whether this entry is believed to be text or binary (for zipinfo).
89   bool is_text;
90 };
91 
92 struct ZipEntry64;
93 // Many users of the library assume the entry size is capped at UNIT32_MAX. So we keep
94 // the interface for the old ZipEntry here; and we could switch them over to the new
95 // ZipEntry64 later.
96 struct ZipEntry : public ZipEntryCommon {
97   // Compressed length of this ZipEntry. The maximum value is UNIT32_MAX.
98   // Might be present either in the local file header or in the data
99   // descriptor footer.
100   uint32_t compressed_length{0};
101 
102   // Uncompressed length of this ZipEntry. The maximum value is UNIT32_MAX.
103   // Might be present either in the local file header or in the data
104   // descriptor footer.
105   uint32_t uncompressed_length{0};
106 
107   // Copies the contents of a ZipEntry64 object to a 32 bits ZipEntry. Returns 0 if the
108   // size of the entry fits into uint32_t, returns a negative error code
109   // (kUnsupportedEntrySize) otherwise.
110   static int32_t CopyFromZipEntry64(ZipEntry* dst, const ZipEntry64* src);
111 
112  private:
113   ZipEntry& operator=(const ZipEntryCommon& other) {
114     ZipEntryCommon::operator=(other);
115     return *this;
116   }
117 };
118 
119 // Represents information about a zip entry in a zip file.
120 struct ZipEntry64 : public ZipEntryCommon {
121   // Compressed length of this ZipEntry. The maximum value is UNIT64_MAX.
122   // Might be present either in the local file header, the zip64 extended field,
123   // or in the data descriptor footer.
124   uint64_t compressed_length{0};
125 
126   // Uncompressed length of this ZipEntry. The maximum value is UNIT64_MAX.
127   // Might be present either in the local file header, the zip64 extended field,
128   // or in the data descriptor footer.
129   uint64_t uncompressed_length{0};
130 
131   explicit ZipEntry64() = default;
ZipEntry64ZipEntry64132   explicit ZipEntry64(const ZipEntry& zip_entry) : ZipEntryCommon(zip_entry) {
133     compressed_length = zip_entry.compressed_length;
134     uncompressed_length = zip_entry.uncompressed_length;
135   }
136 };
137 
138 struct ZipArchive;
139 typedef ZipArchive* ZipArchiveHandle;
140 
141 /*
142  * Open a Zip archive, and sets handle to the value of the opaque
143  * handle for the file. This handle must be released by calling
144  * CloseArchive with this handle.
145  *
146  * Returns 0 on success, and negative values on failure.
147  */
148 int32_t OpenArchive(const char* fileName, ZipArchiveHandle* handle);
149 
150 /*
151  * Like OpenArchive, but takes a file descriptor open for reading
152  * at the start of the file.  The descriptor must be mappable (this does
153  * not allow access to a stream).
154  *
155  * Sets handle to the value of the opaque handle for this file descriptor.
156  * This handle must be released by calling CloseArchive with this handle.
157  *
158  * If assume_ownership parameter is 'true' calling CloseArchive will close
159  * the file.
160  *
161  * This function maps and scans the central directory and builds a table
162  * of entries for future lookups.
163  *
164  * "debugFileName" will appear in error messages, but is not otherwise used.
165  *
166  * Returns 0 on success, and negative values on failure.
167  */
168 int32_t OpenArchiveFd(const int fd, const char* debugFileName, ZipArchiveHandle* handle,
169                       bool assume_ownership = true);
170 
171 int32_t OpenArchiveFdRange(const int fd, const char* debugFileName, ZipArchiveHandle* handle,
172                            off64_t length, off64_t offset, bool assume_ownership = true);
173 
174 int32_t OpenArchiveFromMemory(const void* address, size_t length, const char* debugFileName,
175                               ZipArchiveHandle* handle);
176 /*
177  * Close archive, releasing resources associated with it. This will
178  * unmap the central directory of the zipfile and free all internal
179  * data structures associated with the file. It is an error to use
180  * this handle for any further operations without an intervening
181  * call to one of the OpenArchive variants.
182  */
183 void CloseArchive(ZipArchiveHandle archive);
184 
185 /** See GetArchiveInfo(). */
186 struct ZipArchiveInfo {
187   /** The size in bytes of the archive itself. Used by zipinfo. */
188   off64_t archive_size;
189   /** The number of entries in the archive. */
190   uint64_t entry_count;
191 };
192 
193 /**
194  * Returns information about the given archive.
195  */
196 ZipArchiveInfo GetArchiveInfo(ZipArchiveHandle archive);
197 
198 /*
199  * Find an entry in the Zip archive, by name. |data| must be non-null.
200  *
201  * Returns 0 if an entry is found, and populates |data| with information
202  * about this entry. Returns negative values otherwise.
203  *
204  * It's important to note that |data->crc32|, |data->compLen| and
205  * |data->uncompLen| might be set to values from the central directory
206  * if this file entry contains a data descriptor footer. To verify crc32s
207  * and length, a call to VerifyCrcAndLengths must be made after entry data
208  * has been processed.
209  *
210  * On non-Windows platforms this method does not modify internal state and
211  * can be called concurrently.
212  */
213 int32_t FindEntry(const ZipArchiveHandle archive, const std::string_view entryName,
214                   ZipEntry64* data);
215 
216 /*
217  * Start iterating over all entries of a zip file. The order of iteration
218  * is not guaranteed to be the same as the order of elements
219  * in the central directory but is stable for a given zip file. |cookie| will
220  * contain the value of an opaque cookie which can be used to make one or more
221  * calls to Next. All calls to StartIteration must be matched by a call to
222  * EndIteration to free any allocated memory.
223  *
224  * This method also accepts optional prefix and suffix to restrict iteration to
225  * entry names that start with |optional_prefix| or end with |optional_suffix|.
226  *
227  * Returns 0 on success and negative values on failure.
228  */
229 int32_t StartIteration(ZipArchiveHandle archive, void** cookie_ptr,
230                        const std::string_view optional_prefix = "",
231                        const std::string_view optional_suffix = "");
232 
233 /*
234  * Start iterating over all entries of a zip file. Use the matcher functor to
235  * restrict iteration to entry names that make the functor return true.
236  *
237  * Returns 0 on success and negative values on failure.
238  */
239 int32_t StartIteration(ZipArchiveHandle archive, void** cookie_ptr,
240                        std::function<bool(std::string_view entry_name)> matcher);
241 
242 /*
243  * Advance to the next element in the zipfile in iteration order.
244  *
245  * Returns 0 on success, -1 if there are no more elements in this
246  * archive and lower negative values on failure.
247  */
248 int32_t Next(void* cookie, ZipEntry64* data, std::string_view* name);
249 int32_t Next(void* cookie, ZipEntry64* data, std::string* name);
250 
251 /*
252  * End iteration over all entries of a zip file and frees the memory allocated
253  * in StartIteration.
254  */
255 void EndIteration(void* cookie);
256 
257 /*
258  * Uncompress and write an entry to an open file identified by |fd|.
259  * |entry->uncompressed_length| bytes will be written to the file at
260  * its current offset, and the file will be truncated at the end of
261  * the uncompressed data (no truncation if |fd| references a block
262  * device).
263  *
264  * Returns 0 on success and negative values on failure.
265  */
266 int32_t ExtractEntryToFile(ZipArchiveHandle archive, const ZipEntry64* entry, int fd);
267 
268 /**
269  * Uncompress a given zip entry to the memory region at |begin| and of
270  * size |size|. This size is expected to be the same as the *declared*
271  * uncompressed length of the zip entry. It is an error if the *actual*
272  * number of uncompressed bytes differs from this number.
273  *
274  * Returns 0 on success and negative values on failure.
275  */
276 int32_t ExtractToMemory(ZipArchiveHandle archive, const ZipEntry64* entry, uint8_t* begin,
277                         size_t size);
278 
279 int GetFileDescriptor(const ZipArchiveHandle archive);
280 
281 /**
282  * Returns the offset of the zip archive in the backing file descriptor, or 0 if the zip archive is
283  * not backed by a file descriptor.
284  */
285 off64_t GetFileDescriptorOffset(const ZipArchiveHandle archive);
286 
287 const char* ErrorCodeString(int32_t error_code);
288 
289 // Many users of libziparchive assume the entry size to be 32 bits long. So we keep these
290 // interfaces that use 32 bit ZipEntry to make old code work. TODO(xunchang) Remove the 32 bit
291 // wrapper functions once we switch all users to recognize ZipEntry64.
292 int32_t FindEntry(const ZipArchiveHandle archive, const std::string_view entryName, ZipEntry* data);
293 int32_t Next(void* cookie, ZipEntry* data, std::string* name);
294 int32_t Next(void* cookie, ZipEntry* data, std::string_view* name);
295 int32_t ExtractEntryToFile(ZipArchiveHandle archive, const ZipEntry* entry, int fd);
296 int32_t ExtractToMemory(ZipArchiveHandle archive, const ZipEntry* entry, uint8_t* begin,
297                         size_t size);
298 
299 #if !defined(_WIN32)
300 typedef bool (*ProcessZipEntryFunction)(const uint8_t* buf, size_t buf_size, void* cookie);
301 
302 /*
303  * Stream the uncompressed data through the supplied function,
304  * passing cookie to it each time it gets called.
305  */
306 int32_t ProcessZipEntryContents(ZipArchiveHandle archive, const ZipEntry* entry,
307                                 ProcessZipEntryFunction func, void* cookie);
308 int32_t ProcessZipEntryContents(ZipArchiveHandle archive, const ZipEntry64* entry,
309                                 ProcessZipEntryFunction func, void* cookie);
310 #endif
311 
312 namespace zip_archive {
313 
314 class Writer {
315  public:
316   virtual bool Append(uint8_t* buf, size_t buf_size) = 0;
317   virtual ~Writer();
318 
319  protected:
320   Writer() = default;
321 
322  private:
323   Writer(const Writer&) = delete;
324   void operator=(const Writer&) = delete;
325 };
326 
327 class Reader {
328  public:
329   virtual bool ReadAtOffset(uint8_t* buf, size_t len, off64_t offset) const = 0;
330   virtual ~Reader();
331 
332  protected:
333   Reader() = default;
334 
335  private:
336   Reader(const Reader&) = delete;
337   void operator=(const Reader&) = delete;
338 };
339 
340 /*
341  * Inflates the first |compressed_length| bytes of |reader| to a given |writer|.
342  * |crc_out| is set to the CRC32 checksum of the uncompressed data.
343  *
344  * Returns 0 on success and negative values on failure, for example if |reader|
345  * cannot supply the right amount of data, or if the number of bytes written to
346  * data does not match |uncompressed_length|.
347  *
348  * If |crc_out| is not nullptr, it is set to the crc32 checksum of the
349  * uncompressed data.
350  */
351 int32_t Inflate(const Reader& reader, const uint64_t compressed_length,
352                 const uint64_t uncompressed_length, Writer* writer, uint64_t* crc_out);
353 }  // namespace zip_archive
354