1 /* 2 * Copyright (C) 2017 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #ifndef _APPLYPATCH_IMGDIFF_IMAGE_H 18 #define _APPLYPATCH_IMGDIFF_IMAGE_H 19 20 #include <stddef.h> 21 #include <stdio.h> 22 #include <sys/types.h> 23 24 #include <string> 25 #include <vector> 26 27 #include <bsdiff/bsdiff.h> 28 #include <ziparchive/zip_archive.h> 29 #include <zlib.h> 30 31 #include "imgdiff.h" 32 #include "otautil/rangeset.h" 33 34 class ImageChunk { 35 public: 36 static constexpr auto WINDOWBITS = -15; // 32kb window; negative to indicate a raw stream. 37 static constexpr auto MEMLEVEL = 8; // the default value. 38 static constexpr auto METHOD = Z_DEFLATED; 39 static constexpr auto STRATEGY = Z_DEFAULT_STRATEGY; 40 41 ImageChunk(int type, size_t start, const std::vector<uint8_t>* file_content, size_t raw_data_len, 42 std::string entry_name = {}); 43 GetType()44 int GetType() const { 45 return type_; 46 } 47 48 const uint8_t* GetRawData() const; GetRawDataLength()49 size_t GetRawDataLength() const { 50 return raw_data_len_; 51 } GetEntryName()52 const std::string& GetEntryName() const { 53 return entry_name_; 54 } GetStartOffset()55 size_t GetStartOffset() const { 56 return start_; 57 } GetCompressLevel()58 int GetCompressLevel() const { 59 return compress_level_; 60 } 61 62 // CHUNK_DEFLATE will return the uncompressed data for diff, while other types will simply return 63 // the raw data. 64 const uint8_t* DataForPatch() const; 65 size_t DataLengthForPatch() const; 66 67 void Dump(size_t index) const; 68 69 void SetUncompressedData(std::vector<uint8_t> data); 70 bool SetBonusData(const std::vector<uint8_t>& bonus_data); 71 72 bool operator==(const ImageChunk& other) const; 73 bool operator!=(const ImageChunk& other) const { 74 return !(*this == other); 75 } 76 77 /* 78 * Cause a gzip chunk to be treated as a normal chunk (ie, as a blob of uninterpreted data). 79 * The resulting patch will likely be about as big as the target file, but it lets us handle 80 * the case of images where some gzip chunks are reconstructible but others aren't (by treating 81 * the ones that aren't as normal chunks). 82 */ 83 void ChangeDeflateChunkToNormal(); 84 85 /* 86 * Verify that we can reproduce exactly the same compressed data that we started with. Sets the 87 * level, method, windowBits, memLevel, and strategy fields in the chunk to the encoding 88 * parameters needed to produce the right output. 89 */ 90 bool ReconstructDeflateChunk(); 91 bool IsAdjacentNormal(const ImageChunk& other) const; 92 void MergeAdjacentNormal(const ImageChunk& other); 93 94 /* 95 * Compute a bsdiff patch between |src| and |tgt|; Store the result in the patch_data. 96 * |bsdiff_cache| can be used to cache the suffix array if the same |src| chunk is used 97 * repeatedly, pass nullptr if not needed. 98 */ 99 static bool MakePatch(const ImageChunk& tgt, const ImageChunk& src, 100 std::vector<uint8_t>* patch_data, 101 bsdiff::SuffixArrayIndexInterface** bsdiff_cache); 102 103 private: 104 bool TryReconstruction(int level); 105 106 int type_; // CHUNK_NORMAL, CHUNK_DEFLATE, CHUNK_RAW 107 size_t start_; // offset of chunk in the original input file 108 const std::vector<uint8_t>* input_file_ptr_; // ptr to the full content of original input file 109 size_t raw_data_len_; 110 111 // deflate encoder parameters 112 int compress_level_; 113 114 // --- for CHUNK_DEFLATE chunks only: --- 115 std::vector<uint8_t> uncompressed_data_; 116 std::string entry_name_; // used for zip entries 117 }; 118 119 // PatchChunk stores the patch data between a source chunk and a target chunk. It also keeps track 120 // of the metadata of src&tgt chunks (e.g. offset, raw data length, uncompressed data length). 121 class PatchChunk { 122 public: 123 PatchChunk(const ImageChunk& tgt, const ImageChunk& src, std::vector<uint8_t> data); 124 125 // Construct a CHUNK_RAW patch from the target data directly. 126 explicit PatchChunk(const ImageChunk& tgt); 127 128 // Return true if raw data size is smaller than the patch size. 129 static bool RawDataIsSmaller(const ImageChunk& tgt, size_t patch_size); 130 131 // Update the source start with the new offset within the source range. 132 void UpdateSourceOffset(const SortedRangeSet& src_range); 133 134 // Return the total size (header + data) of the patch. 135 size_t PatchSize() const; 136 137 static bool WritePatchDataToFd(const std::vector<PatchChunk>& patch_chunks, int patch_fd); 138 139 private: 140 size_t GetHeaderSize() const; 141 size_t WriteHeaderToFd(int fd, size_t offset, size_t index) const; 142 143 // The patch chunk type is the same as the target chunk type. The only exception is we change 144 // the |type_| to CHUNK_RAW if target length is smaller than the patch size. 145 int type_; 146 147 size_t source_start_; 148 size_t source_len_; 149 size_t source_uncompressed_len_; 150 151 size_t target_start_; // offset of the target chunk within the target file 152 size_t target_len_; 153 size_t target_uncompressed_len_; 154 size_t target_compress_level_; // the deflate compression level of the target chunk. 155 156 std::vector<uint8_t> data_; // storage for the patch data 157 }; 158 159 // Interface for zip_mode and image_mode images. We initialize the image from an input file and 160 // split the file content into a list of image chunks. 161 class Image { 162 public: Image(bool is_source)163 explicit Image(bool is_source) : is_source_(is_source) {} 164 ~Image()165 virtual ~Image() {} 166 167 // Create a list of image chunks from input file. 168 virtual bool Initialize(const std::string& filename) = 0; 169 170 // Look for runs of adjacent normal chunks and compress them down into a single chunk. (Such 171 // runs can be produced when deflate chunks are changed to normal chunks.) 172 void MergeAdjacentNormalChunks(); 173 174 void DumpChunks() const; 175 176 // Non const iterators to access the stored ImageChunks. begin()177 std::vector<ImageChunk>::iterator begin() { 178 return chunks_.begin(); 179 } 180 end()181 std::vector<ImageChunk>::iterator end() { 182 return chunks_.end(); 183 } 184 cbegin()185 std::vector<ImageChunk>::const_iterator cbegin() const { 186 return chunks_.cbegin(); 187 } 188 cend()189 std::vector<ImageChunk>::const_iterator cend() const { 190 return chunks_.cend(); 191 } 192 193 ImageChunk& operator[](size_t i); 194 const ImageChunk& operator[](size_t i) const; 195 NumOfChunks()196 size_t NumOfChunks() const { 197 return chunks_.size(); 198 } 199 200 protected: 201 bool ReadFile(const std::string& filename, std::vector<uint8_t>* file_content); 202 203 bool is_source_; // True if it's for source chunks. 204 std::vector<ImageChunk> chunks_; // Internal storage of ImageChunk. 205 std::vector<uint8_t> file_content_; // Store the whole input file in memory. 206 }; 207 208 class ZipModeImage : public Image { 209 public: Image(is_source)210 explicit ZipModeImage(bool is_source, size_t limit = 0) : Image(is_source), limit_(limit) {} 211 212 bool Initialize(const std::string& filename) override; 213 214 // Initialize a fake ZipModeImage from an existing ImageChunk vector. For src img pieces, we 215 // reconstruct a new file_content based on the source ranges; but it's not needed for the tgt img 216 // pieces; because for each chunk both the data and their offset within the file are unchanged. Initialize(const std::vector<ImageChunk> & chunks,const std::vector<uint8_t> & file_content)217 void Initialize(const std::vector<ImageChunk>& chunks, const std::vector<uint8_t>& file_content) { 218 chunks_ = chunks; 219 file_content_ = file_content; 220 } 221 222 // The pesudo source chunk for bsdiff if there's no match for the given target chunk. It's in 223 // fact the whole source file. 224 ImageChunk PseudoSource() const; 225 226 // Find the matching deflate source chunk by entry name. Search for normal chunks also if 227 // |find_normal| is true. 228 ImageChunk* FindChunkByName(const std::string& name, bool find_normal = false); 229 230 const ImageChunk* FindChunkByName(const std::string& name, bool find_normal = false) const; 231 232 // Verify that we can reconstruct the deflate chunks; also change the type to CHUNK_NORMAL if 233 // src and tgt are identical. 234 static bool CheckAndProcessChunks(ZipModeImage* tgt_image, ZipModeImage* src_image); 235 236 // Compute the patch between tgt & src images, and write the data into |patch_name|. 237 static bool GeneratePatches(const ZipModeImage& tgt_image, const ZipModeImage& src_image, 238 const std::string& patch_name); 239 240 // Compute the patch based on the lists of split src and tgt images. Generate patches for each 241 // pair of split pieces and write the data to |patch_name|. If |debug_dir| is specified, write 242 // each split src data and patch data into that directory. 243 static bool GeneratePatches(const std::vector<ZipModeImage>& split_tgt_images, 244 const std::vector<ZipModeImage>& split_src_images, 245 const std::vector<SortedRangeSet>& split_src_ranges, 246 const std::string& patch_name, const std::string& split_info_file, 247 const std::string& debug_dir); 248 249 // Split the tgt chunks and src chunks based on the size limit. 250 static bool SplitZipModeImageWithLimit(const ZipModeImage& tgt_image, 251 const ZipModeImage& src_image, 252 std::vector<ZipModeImage>* split_tgt_images, 253 std::vector<ZipModeImage>* split_src_images, 254 std::vector<SortedRangeSet>* split_src_ranges); 255 256 private: 257 // Initialize image chunks based on the zip entries. 258 bool InitializeChunks(const std::string& filename, ZipArchiveHandle handle); 259 // Add the a zip entry to the list. 260 bool AddZipEntryToChunks(ZipArchiveHandle handle, const std::string& entry_name, ZipEntry* entry); 261 // Return the real size of the zip file. (omit the trailing zeros that used for alignment) 262 bool GetZipFileSize(size_t* input_file_size); 263 264 static void ValidateSplitImages(const std::vector<ZipModeImage>& split_tgt_images, 265 const std::vector<ZipModeImage>& split_src_images, 266 std::vector<SortedRangeSet>& split_src_ranges, 267 size_t total_tgt_size); 268 // Construct the fake split images based on the chunks info and source ranges; and move them into 269 // the given vectors. Return true if we add a new split image into |split_tgt_images|, and 270 // false otherwise. 271 static bool AddSplitImageFromChunkList(const ZipModeImage& tgt_image, 272 const ZipModeImage& src_image, 273 const SortedRangeSet& split_src_ranges, 274 const std::vector<ImageChunk>& split_tgt_chunks, 275 const std::vector<ImageChunk>& split_src_chunks, 276 std::vector<ZipModeImage>* split_tgt_images, 277 std::vector<ZipModeImage>* split_src_images); 278 279 // Function that actually iterates the tgt_chunks and makes patches. 280 static bool GeneratePatchesInternal(const ZipModeImage& tgt_image, const ZipModeImage& src_image, 281 std::vector<PatchChunk>* patch_chunks); 282 283 // size limit in bytes of each chunk. Also, if the length of one zip_entry exceeds the limit, 284 // we'll split that entry into several smaller chunks in advance. 285 size_t limit_; 286 }; 287 288 class ImageModeImage : public Image { 289 public: ImageModeImage(bool is_source)290 explicit ImageModeImage(bool is_source) : Image(is_source) {} 291 292 // Initialize the image chunks list by searching the magic numbers in an image file. 293 bool Initialize(const std::string& filename) override; 294 295 bool SetBonusData(const std::vector<uint8_t>& bonus_data); 296 297 // In Image Mode, verify that the source and target images have the same chunk structure (ie, the 298 // same sequence of deflate and normal chunks). 299 static bool CheckAndProcessChunks(ImageModeImage* tgt_image, ImageModeImage* src_image); 300 301 // In image mode, generate patches against the given source chunks and bonus_data; write the 302 // result to |patch_name|. 303 static bool GeneratePatches(const ImageModeImage& tgt_image, const ImageModeImage& src_image, 304 const std::string& patch_name); 305 }; 306 307 #endif // _APPLYPATCH_IMGDIFF_IMAGE_H 308