1 /*
2  * Copyright (C) 2017 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef _APPLYPATCH_IMGDIFF_IMAGE_H
18 #define _APPLYPATCH_IMGDIFF_IMAGE_H
19 
20 #include <stddef.h>
21 #include <stdio.h>
22 #include <sys/types.h>
23 
24 #include <string>
25 #include <vector>
26 
27 #include <bsdiff/bsdiff.h>
28 #include <ziparchive/zip_archive.h>
29 #include <zlib.h>
30 
31 #include "imgdiff.h"
32 #include "otautil/rangeset.h"
33 
34 class ImageChunk {
35  public:
36   static constexpr auto WINDOWBITS = -15;  // 32kb window; negative to indicate a raw stream.
37   static constexpr auto MEMLEVEL = 8;      // the default value.
38   static constexpr auto METHOD = Z_DEFLATED;
39   static constexpr auto STRATEGY = Z_DEFAULT_STRATEGY;
40 
41   ImageChunk(int type, size_t start, const std::vector<uint8_t>* file_content, size_t raw_data_len,
42              std::string entry_name = {});
43 
GetType()44   int GetType() const {
45     return type_;
46   }
47 
48   const uint8_t* GetRawData() const;
GetRawDataLength()49   size_t GetRawDataLength() const {
50     return raw_data_len_;
51   }
GetEntryName()52   const std::string& GetEntryName() const {
53     return entry_name_;
54   }
GetStartOffset()55   size_t GetStartOffset() const {
56     return start_;
57   }
GetCompressLevel()58   int GetCompressLevel() const {
59     return compress_level_;
60   }
61 
62   // CHUNK_DEFLATE will return the uncompressed data for diff, while other types will simply return
63   // the raw data.
64   const uint8_t* DataForPatch() const;
65   size_t DataLengthForPatch() const;
66 
67   void Dump(size_t index) const;
68 
69   void SetUncompressedData(std::vector<uint8_t> data);
70   bool SetBonusData(const std::vector<uint8_t>& bonus_data);
71 
72   bool operator==(const ImageChunk& other) const;
73   bool operator!=(const ImageChunk& other) const {
74     return !(*this == other);
75   }
76 
77   /*
78    * Cause a gzip chunk to be treated as a normal chunk (ie, as a blob of uninterpreted data).
79    * The resulting patch will likely be about as big as the target file, but it lets us handle
80    * the case of images where some gzip chunks are reconstructible but others aren't (by treating
81    * the ones that aren't as normal chunks).
82    */
83   void ChangeDeflateChunkToNormal();
84 
85   /*
86    * Verify that we can reproduce exactly the same compressed data that we started with.  Sets the
87    * level, method, windowBits, memLevel, and strategy fields in the chunk to the encoding
88    * parameters needed to produce the right output.
89    */
90   bool ReconstructDeflateChunk();
91   bool IsAdjacentNormal(const ImageChunk& other) const;
92   void MergeAdjacentNormal(const ImageChunk& other);
93 
94   /*
95    * Compute a bsdiff patch between |src| and |tgt|; Store the result in the patch_data.
96    * |bsdiff_cache| can be used to cache the suffix array if the same |src| chunk is used
97    * repeatedly, pass nullptr if not needed.
98    */
99   static bool MakePatch(const ImageChunk& tgt, const ImageChunk& src,
100                         std::vector<uint8_t>* patch_data,
101                         bsdiff::SuffixArrayIndexInterface** bsdiff_cache);
102 
103  private:
104   bool TryReconstruction(int level);
105 
106   int type_;                                    // CHUNK_NORMAL, CHUNK_DEFLATE, CHUNK_RAW
107   size_t start_;                                // offset of chunk in the original input file
108   const std::vector<uint8_t>* input_file_ptr_;  // ptr to the full content of original input file
109   size_t raw_data_len_;
110 
111   // deflate encoder parameters
112   int compress_level_;
113 
114   // --- for CHUNK_DEFLATE chunks only: ---
115   std::vector<uint8_t> uncompressed_data_;
116   std::string entry_name_;  // used for zip entries
117 };
118 
119 // PatchChunk stores the patch data between a source chunk and a target chunk. It also keeps track
120 // of the metadata of src&tgt chunks (e.g. offset, raw data length, uncompressed data length).
121 class PatchChunk {
122  public:
123   PatchChunk(const ImageChunk& tgt, const ImageChunk& src, std::vector<uint8_t> data);
124 
125   // Construct a CHUNK_RAW patch from the target data directly.
126   explicit PatchChunk(const ImageChunk& tgt);
127 
128   // Return true if raw data size is smaller than the patch size.
129   static bool RawDataIsSmaller(const ImageChunk& tgt, size_t patch_size);
130 
131   // Update the source start with the new offset within the source range.
132   void UpdateSourceOffset(const SortedRangeSet& src_range);
133 
134   // Return the total size (header + data) of the patch.
135   size_t PatchSize() const;
136 
137   static bool WritePatchDataToFd(const std::vector<PatchChunk>& patch_chunks, int patch_fd);
138 
139  private:
140   size_t GetHeaderSize() const;
141   size_t WriteHeaderToFd(int fd, size_t offset, size_t index) const;
142 
143   // The patch chunk type is the same as the target chunk type. The only exception is we change
144   // the |type_| to CHUNK_RAW if target length is smaller than the patch size.
145   int type_;
146 
147   size_t source_start_;
148   size_t source_len_;
149   size_t source_uncompressed_len_;
150 
151   size_t target_start_;  // offset of the target chunk within the target file
152   size_t target_len_;
153   size_t target_uncompressed_len_;
154   size_t target_compress_level_;  // the deflate compression level of the target chunk.
155 
156   std::vector<uint8_t> data_;  // storage for the patch data
157 };
158 
159 // Interface for zip_mode and image_mode images. We initialize the image from an input file and
160 // split the file content into a list of image chunks.
161 class Image {
162  public:
Image(bool is_source)163   explicit Image(bool is_source) : is_source_(is_source) {}
164 
~Image()165   virtual ~Image() {}
166 
167   // Create a list of image chunks from input file.
168   virtual bool Initialize(const std::string& filename) = 0;
169 
170   // Look for runs of adjacent normal chunks and compress them down into a single chunk.  (Such
171   // runs can be produced when deflate chunks are changed to normal chunks.)
172   void MergeAdjacentNormalChunks();
173 
174   void DumpChunks() const;
175 
176   // Non const iterators to access the stored ImageChunks.
begin()177   std::vector<ImageChunk>::iterator begin() {
178     return chunks_.begin();
179   }
180 
end()181   std::vector<ImageChunk>::iterator end() {
182     return chunks_.end();
183   }
184 
cbegin()185   std::vector<ImageChunk>::const_iterator cbegin() const {
186     return chunks_.cbegin();
187   }
188 
cend()189   std::vector<ImageChunk>::const_iterator cend() const {
190     return chunks_.cend();
191   }
192 
193   ImageChunk& operator[](size_t i);
194   const ImageChunk& operator[](size_t i) const;
195 
NumOfChunks()196   size_t NumOfChunks() const {
197     return chunks_.size();
198   }
199 
200  protected:
201   bool ReadFile(const std::string& filename, std::vector<uint8_t>* file_content);
202 
203   bool is_source_;                     // True if it's for source chunks.
204   std::vector<ImageChunk> chunks_;     // Internal storage of ImageChunk.
205   std::vector<uint8_t> file_content_;  // Store the whole input file in memory.
206 };
207 
208 class ZipModeImage : public Image {
209  public:
Image(is_source)210   explicit ZipModeImage(bool is_source, size_t limit = 0) : Image(is_source), limit_(limit) {}
211 
212   bool Initialize(const std::string& filename) override;
213 
214   // Initialize a fake ZipModeImage from an existing ImageChunk vector. For src img pieces, we
215   // reconstruct a new file_content based on the source ranges; but it's not needed for the tgt img
216   // pieces; because for each chunk both the data and their offset within the file are unchanged.
Initialize(const std::vector<ImageChunk> & chunks,const std::vector<uint8_t> & file_content)217   void Initialize(const std::vector<ImageChunk>& chunks, const std::vector<uint8_t>& file_content) {
218     chunks_ = chunks;
219     file_content_ = file_content;
220   }
221 
222   // The pesudo source chunk for bsdiff if there's no match for the given target chunk. It's in
223   // fact the whole source file.
224   ImageChunk PseudoSource() const;
225 
226   // Find the matching deflate source chunk by entry name. Search for normal chunks also if
227   // |find_normal| is true.
228   ImageChunk* FindChunkByName(const std::string& name, bool find_normal = false);
229 
230   const ImageChunk* FindChunkByName(const std::string& name, bool find_normal = false) const;
231 
232   // Verify that we can reconstruct the deflate chunks; also change the type to CHUNK_NORMAL if
233   // src and tgt are identical.
234   static bool CheckAndProcessChunks(ZipModeImage* tgt_image, ZipModeImage* src_image);
235 
236   // Compute the patch between tgt & src images, and write the data into |patch_name|.
237   static bool GeneratePatches(const ZipModeImage& tgt_image, const ZipModeImage& src_image,
238                               const std::string& patch_name);
239 
240   // Compute the patch based on the lists of split src and tgt images. Generate patches for each
241   // pair of split pieces and write the data to |patch_name|. If |debug_dir| is specified, write
242   // each split src data and patch data into that directory.
243   static bool GeneratePatches(const std::vector<ZipModeImage>& split_tgt_images,
244                               const std::vector<ZipModeImage>& split_src_images,
245                               const std::vector<SortedRangeSet>& split_src_ranges,
246                               const std::string& patch_name, const std::string& split_info_file,
247                               const std::string& debug_dir);
248 
249   // Split the tgt chunks and src chunks based on the size limit.
250   static bool SplitZipModeImageWithLimit(const ZipModeImage& tgt_image,
251                                          const ZipModeImage& src_image,
252                                          std::vector<ZipModeImage>* split_tgt_images,
253                                          std::vector<ZipModeImage>* split_src_images,
254                                          std::vector<SortedRangeSet>* split_src_ranges);
255 
256  private:
257   // Initialize image chunks based on the zip entries.
258   bool InitializeChunks(const std::string& filename, ZipArchiveHandle handle);
259   // Add the a zip entry to the list.
260   bool AddZipEntryToChunks(ZipArchiveHandle handle, const std::string& entry_name, ZipEntry* entry);
261   // Return the real size of the zip file. (omit the trailing zeros that used for alignment)
262   bool GetZipFileSize(size_t* input_file_size);
263 
264   static void ValidateSplitImages(const std::vector<ZipModeImage>& split_tgt_images,
265                                   const std::vector<ZipModeImage>& split_src_images,
266                                   std::vector<SortedRangeSet>& split_src_ranges,
267                                   size_t total_tgt_size);
268   // Construct the fake split images based on the chunks info and source ranges; and move them into
269   // the given vectors. Return true if we add a new split image into |split_tgt_images|, and
270   // false otherwise.
271   static bool AddSplitImageFromChunkList(const ZipModeImage& tgt_image,
272                                          const ZipModeImage& src_image,
273                                          const SortedRangeSet& split_src_ranges,
274                                          const std::vector<ImageChunk>& split_tgt_chunks,
275                                          const std::vector<ImageChunk>& split_src_chunks,
276                                          std::vector<ZipModeImage>* split_tgt_images,
277                                          std::vector<ZipModeImage>* split_src_images);
278 
279   // Function that actually iterates the tgt_chunks and makes patches.
280   static bool GeneratePatchesInternal(const ZipModeImage& tgt_image, const ZipModeImage& src_image,
281                                       std::vector<PatchChunk>* patch_chunks);
282 
283   // size limit in bytes of each chunk. Also, if the length of one zip_entry exceeds the limit,
284   // we'll split that entry into several smaller chunks in advance.
285   size_t limit_;
286 };
287 
288 class ImageModeImage : public Image {
289  public:
ImageModeImage(bool is_source)290   explicit ImageModeImage(bool is_source) : Image(is_source) {}
291 
292   // Initialize the image chunks list by searching the magic numbers in an image file.
293   bool Initialize(const std::string& filename) override;
294 
295   bool SetBonusData(const std::vector<uint8_t>& bonus_data);
296 
297   // In Image Mode, verify that the source and target images have the same chunk structure (ie, the
298   // same sequence of deflate and normal chunks).
299   static bool CheckAndProcessChunks(ImageModeImage* tgt_image, ImageModeImage* src_image);
300 
301   // In image mode, generate patches against the given source chunks and bonus_data; write the
302   // result to |patch_name|.
303   static bool GeneratePatches(const ImageModeImage& tgt_image, const ImageModeImage& src_image,
304                               const std::string& patch_name);
305 };
306 
307 #endif  // _APPLYPATCH_IMGDIFF_IMAGE_H
308