1 // Copyright (C) 2019 The Android Open Source Project
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //      http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #pragma once
16 
17 #include <stdint.h>
18 
19 #include <vector>
20 
21 namespace android {
22 namespace snapshot {
23 
24 class DmSnapCowSizeCalculator {
25   public:
DmSnapCowSizeCalculator(unsigned int sector_bytes,unsigned int chunk_sectors)26     DmSnapCowSizeCalculator(unsigned int sector_bytes, unsigned int chunk_sectors)
27         : sector_bytes_(sector_bytes),
28           chunk_sectors_(chunk_sectors),
29           exceptions_per_chunk(chunk_sectors_ * sector_bytes_ / (64 * 2 / 8)) {}
30 
WriteByte(uint64_t address)31     void WriteByte(uint64_t address) { WriteSector(address / sector_bytes_); }
WriteSector(uint64_t sector)32     void WriteSector(uint64_t sector) { WriteChunk(sector / chunk_sectors_); }
WriteChunk(uint64_t chunk_id)33     void WriteChunk(uint64_t chunk_id) {
34         if (modified_chunks_.size() <= chunk_id) {
35             modified_chunks_.resize(chunk_id + 1, false);
36         }
37         modified_chunks_[chunk_id] = true;
38     }
39 
cow_size_bytes()40     uint64_t cow_size_bytes() const { return cow_size_sectors() * sector_bytes_; }
cow_size_sectors()41     uint64_t cow_size_sectors() const { return cow_size_chunks() * chunk_sectors_; }
42 
43     /*
44      * The COW device has a precise internal structure as follows:
45      *
46      * - header (1 chunk)
47      * - #0 map and chunks
48      *   - map (1 chunk)
49      *   - chunks addressable by previous map (exceptions_per_chunk)
50      * - #1 map and chunks
51      *   - map (1 chunk)
52      *   - chunks addressable by previous map (exceptions_per_chunk)
53      * ...
54      * - #n: map and chunks
55      *   - map (1 chunk)
56      *   - chunks addressable by previous map (exceptions_per_chunk)
57      * - 1 extra chunk
58      */
cow_size_chunks()59     uint64_t cow_size_chunks() const {
60         uint64_t modified_chunks_count = 0;
61         uint64_t cow_chunks = 0;
62 
63         for (const auto& c : modified_chunks_) {
64             if (c) {
65                 ++modified_chunks_count;
66             }
67         }
68 
69         /* disk header + padding = 1 chunk */
70         cow_chunks += 1;
71 
72         /* snapshot modified chunks */
73         cow_chunks += modified_chunks_count;
74 
75         /* snapshot chunks index metadata */
76         cow_chunks += 1 + modified_chunks_count / exceptions_per_chunk;
77 
78         return cow_chunks;
79     }
80 
81   private:
82     /*
83      * Size of each sector in bytes.
84      */
85     const uint64_t sector_bytes_;
86 
87     /*
88      * Size of each chunk in sectors.
89      */
90     const uint64_t chunk_sectors_;
91 
92     /*
93      * The COW device stores tables to map the modified chunks. Each table
94      * has the size of exactly 1 chunk.
95      * Each row of the table (also called exception in the kernel) contains two
96      * 64 bit indices to identify the corresponding chunk, and this 128 bit row
97      * size is a constant.
98      * The number of exceptions that each table can contain determines the
99      * number of data chunks that separate two consecutive tables. This value
100      * is then fundamental to compute the space overhead introduced by the
101      * tables in COW devices.
102      */
103     const uint64_t exceptions_per_chunk;
104 
105     /*
106      * |modified_chunks_| is a container that keeps trace of the modified
107      * chunks.
108      * Multiple options were considered when choosing the most appropriate data
109      * structure for this container. Here follows a summary of why vector<bool>
110      * has been chosen, taking as a reference a snapshot partition of 4 GiB and
111      * chunk size of 4 KiB.
112      * - std::set<uint64_t> is very space-efficient for a small number of
113      *   operations, but if the whole snapshot is changed, it would need to
114      *   store
115      *     4 GiB / 4 KiB * (64 bit / 8) = 8 MiB
116      *   just for the data, plus the additional data overhead for the red-black
117      *   tree used for data sorting (if each rb-tree element stores 3 address
118      *   and the word-aligne color, the total size grows to 32 MiB).
119      * - std::bitset<N> is not a good fit because requires a priori knowledge,
120      *   at compile time, of the bitset size.
121      * - std::vector<bool> is a special case of vector, which uses a data
122      *   compression that allows reducing the space utilization of each element
123      *   to 1 bit. In detail, this data structure is composed of a resizable
124      *   array of words, each of them representing a bitmap. On a 64 bit
125      *   device, modifying the whole 4 GiB snapshot grows this container up to
126      *     4 * GiB / 4 KiB / 64 = 64 KiB
127      *   that, even if is the same space requirement to change a single byte at
128      *   the highest address of the snapshot, is a very affordable space
129      *   requirement.
130      */
131     std::vector<bool> modified_chunks_;
132 };
133 
134 }  // namespace snapshot
135 }  // namespace android
136