1 // Copyright (C) 2019 The Android Open Source Project
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //      http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #include <libsnapshot/snapshot.h>
16 
17 #include <dirent.h>
18 #include <math.h>
19 #include <sys/file.h>
20 #include <sys/types.h>
21 #include <sys/unistd.h>
22 
23 #include <optional>
24 #include <thread>
25 #include <unordered_set>
26 
27 #include <android-base/file.h>
28 #include <android-base/logging.h>
29 #include <android-base/parseint.h>
30 #include <android-base/strings.h>
31 #include <android-base/unique_fd.h>
32 #include <ext4_utils/ext4_utils.h>
33 #include <fs_mgr.h>
34 #include <fs_mgr_dm_linear.h>
35 #include <fstab/fstab.h>
36 #include <libdm/dm.h>
37 #include <libfiemap/image_manager.h>
38 #include <liblp/liblp.h>
39 
40 #include <android/snapshot/snapshot.pb.h>
41 #include <libsnapshot/snapshot_stats.h>
42 #include "device_info.h"
43 #include "partition_cow_creator.h"
44 #include "snapshot_metadata_updater.h"
45 #include "utility.h"
46 
47 namespace android {
48 namespace snapshot {
49 
50 using android::base::unique_fd;
51 using android::dm::DeviceMapper;
52 using android::dm::DmDeviceState;
53 using android::dm::DmTable;
54 using android::dm::DmTargetLinear;
55 using android::dm::DmTargetSnapshot;
56 using android::dm::kSectorSize;
57 using android::dm::SnapshotStorageMode;
58 using android::fiemap::FiemapStatus;
59 using android::fiemap::IImageManager;
60 using android::fs_mgr::CreateDmTable;
61 using android::fs_mgr::CreateLogicalPartition;
62 using android::fs_mgr::CreateLogicalPartitionParams;
63 using android::fs_mgr::GetPartitionGroupName;
64 using android::fs_mgr::GetPartitionName;
65 using android::fs_mgr::LpMetadata;
66 using android::fs_mgr::MetadataBuilder;
67 using android::fs_mgr::SlotNumberForSlotSuffix;
68 using android::hardware::boot::V1_1::MergeStatus;
69 using chromeos_update_engine::DeltaArchiveManifest;
70 using chromeos_update_engine::Extent;
71 using chromeos_update_engine::InstallOperation;
72 template <typename T>
73 using RepeatedPtrField = google::protobuf::RepeatedPtrField<T>;
74 using std::chrono::duration_cast;
75 using namespace std::chrono_literals;
76 using namespace std::string_literals;
77 
78 static constexpr char kBootIndicatorPath[] = "/metadata/ota/snapshot-boot";
79 static constexpr char kRollbackIndicatorPath[] = "/metadata/ota/rollback-indicator";
80 static constexpr auto kUpdateStateCheckInterval = 2s;
81 
82 // Note: IImageManager is an incomplete type in the header, so the default
83 // destructor doesn't work.
~SnapshotManager()84 SnapshotManager::~SnapshotManager() {}
85 
New(IDeviceInfo * info)86 std::unique_ptr<SnapshotManager> SnapshotManager::New(IDeviceInfo* info) {
87     if (!info) {
88         info = new DeviceInfo();
89     }
90     return std::unique_ptr<SnapshotManager>(new SnapshotManager(info));
91 }
92 
NewForFirstStageMount(IDeviceInfo * info)93 std::unique_ptr<SnapshotManager> SnapshotManager::NewForFirstStageMount(IDeviceInfo* info) {
94     auto sm = New(info);
95     if (!sm || !sm->ForceLocalImageManager()) {
96         return nullptr;
97     }
98     return sm;
99 }
100 
SnapshotManager(IDeviceInfo * device)101 SnapshotManager::SnapshotManager(IDeviceInfo* device) : device_(device) {
102     gsid_dir_ = device_->GetGsidDir();
103     metadata_dir_ = device_->GetMetadataDir();
104 }
105 
GetCowName(const std::string & snapshot_name)106 static std::string GetCowName(const std::string& snapshot_name) {
107     return snapshot_name + "-cow";
108 }
109 
GetCowImageDeviceName(const std::string & snapshot_name)110 static std::string GetCowImageDeviceName(const std::string& snapshot_name) {
111     return snapshot_name + "-cow-img";
112 }
113 
GetBaseDeviceName(const std::string & partition_name)114 static std::string GetBaseDeviceName(const std::string& partition_name) {
115     return partition_name + "-base";
116 }
117 
GetSnapshotExtraDeviceName(const std::string & snapshot_name)118 static std::string GetSnapshotExtraDeviceName(const std::string& snapshot_name) {
119     return snapshot_name + "-inner";
120 }
121 
BeginUpdate()122 bool SnapshotManager::BeginUpdate() {
123     bool needs_merge = false;
124     if (!TryCancelUpdate(&needs_merge)) {
125         return false;
126     }
127     if (needs_merge) {
128         LOG(INFO) << "Wait for merge (if any) before beginning a new update.";
129         auto state = ProcessUpdateState();
130         LOG(INFO) << "Merged with state = " << state;
131     }
132 
133     auto file = LockExclusive();
134     if (!file) return false;
135 
136     // Purge the ImageManager just in case there is a corrupt lp_metadata file
137     // lying around. (NB: no need to return false on an error, we can let the
138     // update try to progress.)
139     if (EnsureImageManager()) {
140         images_->RemoveAllImages();
141     }
142 
143     auto state = ReadUpdateState(file.get());
144     if (state != UpdateState::None) {
145         LOG(ERROR) << "An update is already in progress, cannot begin a new update";
146         return false;
147     }
148     return WriteUpdateState(file.get(), UpdateState::Initiated);
149 }
150 
CancelUpdate()151 bool SnapshotManager::CancelUpdate() {
152     bool needs_merge = false;
153     if (!TryCancelUpdate(&needs_merge)) {
154         return false;
155     }
156     if (needs_merge) {
157         LOG(ERROR) << "Cannot cancel update after it has completed or started merging";
158     }
159     return !needs_merge;
160 }
161 
TryCancelUpdate(bool * needs_merge)162 bool SnapshotManager::TryCancelUpdate(bool* needs_merge) {
163     *needs_merge = false;
164 
165     auto file = LockExclusive();
166     if (!file) return false;
167 
168     UpdateState state = ReadUpdateState(file.get());
169     if (state == UpdateState::None) return true;
170 
171     if (state == UpdateState::Initiated) {
172         LOG(INFO) << "Update has been initiated, now canceling";
173         return RemoveAllUpdateState(file.get());
174     }
175 
176     if (state == UpdateState::Unverified) {
177         // We completed an update, but it can still be canceled if we haven't booted into it.
178         auto slot = GetCurrentSlot();
179         if (slot != Slot::Target) {
180             LOG(INFO) << "Canceling previously completed updates (if any)";
181             return RemoveAllUpdateState(file.get());
182         }
183     }
184     *needs_merge = true;
185     return true;
186 }
187 
ReadUpdateSourceSlotSuffix()188 std::string SnapshotManager::ReadUpdateSourceSlotSuffix() {
189     auto boot_file = GetSnapshotBootIndicatorPath();
190     std::string contents;
191     if (!android::base::ReadFileToString(boot_file, &contents)) {
192         PLOG(WARNING) << "Cannot read " << boot_file;
193         return {};
194     }
195     return contents;
196 }
197 
GetCurrentSlot()198 SnapshotManager::Slot SnapshotManager::GetCurrentSlot() {
199     auto contents = ReadUpdateSourceSlotSuffix();
200     if (contents.empty()) {
201         return Slot::Unknown;
202     }
203     if (device_->GetSlotSuffix() == contents) {
204         return Slot::Source;
205     }
206     return Slot::Target;
207 }
208 
RemoveFileIfExists(const std::string & path)209 static bool RemoveFileIfExists(const std::string& path) {
210     std::string message;
211     if (!android::base::RemoveFileIfExists(path, &message)) {
212         LOG(ERROR) << "Remove failed: " << path << ": " << message;
213         return false;
214     }
215     return true;
216 }
217 
RemoveAllUpdateState(LockedFile * lock,const std::function<bool ()> & prolog)218 bool SnapshotManager::RemoveAllUpdateState(LockedFile* lock, const std::function<bool()>& prolog) {
219     if (prolog && !prolog()) {
220         LOG(WARNING) << "Can't RemoveAllUpdateState: prolog failed.";
221         return false;
222     }
223 
224     LOG(INFO) << "Removing all update state.";
225 
226     if (!RemoveAllSnapshots(lock)) {
227         LOG(ERROR) << "Could not remove all snapshots";
228         return false;
229     }
230 
231     // It's okay if these fail:
232     // - For SnapshotBoot and Rollback, first-stage init performs a deeper check after
233     // reading the indicator file, so it's not a problem if it still exists
234     // after the update completes.
235     // - For ForwardMerge, FinishedSnapshotWrites asserts that the existence of the indicator
236     // matches the incoming update.
237     std::vector<std::string> files = {
238             GetSnapshotBootIndicatorPath(),
239             GetRollbackIndicatorPath(),
240             GetForwardMergeIndicatorPath(),
241     };
242     for (const auto& file : files) {
243         RemoveFileIfExists(file);
244     }
245 
246     // If this fails, we'll keep trying to remove the update state (as the
247     // device reboots or starts a new update) until it finally succeeds.
248     return WriteUpdateState(lock, UpdateState::None);
249 }
250 
FinishedSnapshotWrites(bool wipe)251 bool SnapshotManager::FinishedSnapshotWrites(bool wipe) {
252     auto lock = LockExclusive();
253     if (!lock) return false;
254 
255     auto update_state = ReadUpdateState(lock.get());
256     if (update_state == UpdateState::Unverified) {
257         LOG(INFO) << "FinishedSnapshotWrites already called before. Ignored.";
258         return true;
259     }
260 
261     if (update_state != UpdateState::Initiated) {
262         LOG(ERROR) << "Can only transition to the Unverified state from the Initiated state.";
263         return false;
264     }
265 
266     if (!EnsureNoOverflowSnapshot(lock.get())) {
267         LOG(ERROR) << "Cannot ensure there are no overflow snapshots.";
268         return false;
269     }
270 
271     if (!UpdateForwardMergeIndicator(wipe)) {
272         return false;
273     }
274 
275     // This file is written on boot to detect whether a rollback occurred. It
276     // MUST NOT exist before rebooting, otherwise, we're at risk of deleting
277     // snapshots too early.
278     if (!RemoveFileIfExists(GetRollbackIndicatorPath())) {
279         return false;
280     }
281 
282     // This file acts as both a quick indicator for init (it can use access(2)
283     // to decide how to do first-stage mounts), and it stores the old slot, so
284     // we can tell whether or not we performed a rollback.
285     auto contents = device_->GetSlotSuffix();
286     auto boot_file = GetSnapshotBootIndicatorPath();
287     if (!WriteStringToFileAtomic(contents, boot_file)) {
288         PLOG(ERROR) << "write failed: " << boot_file;
289         return false;
290     }
291     return WriteUpdateState(lock.get(), UpdateState::Unverified);
292 }
293 
CreateSnapshot(LockedFile * lock,SnapshotStatus * status)294 bool SnapshotManager::CreateSnapshot(LockedFile* lock, SnapshotStatus* status) {
295     CHECK(lock);
296     CHECK(lock->lock_mode() == LOCK_EX);
297     CHECK(status);
298 
299     if (status->name().empty()) {
300         LOG(ERROR) << "SnapshotStatus has no name.";
301         return false;
302     }
303     // Check these sizes. Like liblp, we guarantee the partition size is
304     // respected, which means it has to be sector-aligned. (This guarantee is
305     // useful for locating avb footers correctly). The COW file size, however,
306     // can be arbitrarily larger than specified, so we can safely round it up.
307     if (status->device_size() % kSectorSize != 0) {
308         LOG(ERROR) << "Snapshot " << status->name()
309                    << " device size is not a multiple of the sector size: "
310                    << status->device_size();
311         return false;
312     }
313     if (status->snapshot_size() % kSectorSize != 0) {
314         LOG(ERROR) << "Snapshot " << status->name()
315                    << " snapshot size is not a multiple of the sector size: "
316                    << status->snapshot_size();
317         return false;
318     }
319     if (status->cow_partition_size() % kSectorSize != 0) {
320         LOG(ERROR) << "Snapshot " << status->name()
321                    << " cow partition size is not a multiple of the sector size: "
322                    << status->cow_partition_size();
323         return false;
324     }
325     if (status->cow_file_size() % kSectorSize != 0) {
326         LOG(ERROR) << "Snapshot " << status->name()
327                    << " cow file size is not a multiple of the sector size: "
328                    << status->cow_file_size();
329         return false;
330     }
331 
332     status->set_state(SnapshotState::CREATED);
333     status->set_sectors_allocated(0);
334     status->set_metadata_sectors(0);
335 
336     if (!WriteSnapshotStatus(lock, *status)) {
337         PLOG(ERROR) << "Could not write snapshot status: " << status->name();
338         return false;
339     }
340     return true;
341 }
342 
CreateCowImage(LockedFile * lock,const std::string & name)343 Return SnapshotManager::CreateCowImage(LockedFile* lock, const std::string& name) {
344     CHECK(lock);
345     CHECK(lock->lock_mode() == LOCK_EX);
346     if (!EnsureImageManager()) return Return::Error();
347 
348     SnapshotStatus status;
349     if (!ReadSnapshotStatus(lock, name, &status)) {
350         return Return::Error();
351     }
352 
353     // The COW file size should have been rounded up to the nearest sector in CreateSnapshot.
354     if (status.cow_file_size() % kSectorSize != 0) {
355         LOG(ERROR) << "Snapshot " << name << " COW file size is not a multiple of the sector size: "
356                    << status.cow_file_size();
357         return Return::Error();
358     }
359 
360     std::string cow_image_name = GetCowImageDeviceName(name);
361     int cow_flags = IImageManager::CREATE_IMAGE_DEFAULT;
362     return Return(images_->CreateBackingImage(cow_image_name, status.cow_file_size(), cow_flags));
363 }
364 
MapSnapshot(LockedFile * lock,const std::string & name,const std::string & base_device,const std::string & cow_device,const std::chrono::milliseconds & timeout_ms,std::string * dev_path)365 bool SnapshotManager::MapSnapshot(LockedFile* lock, const std::string& name,
366                                   const std::string& base_device, const std::string& cow_device,
367                                   const std::chrono::milliseconds& timeout_ms,
368                                   std::string* dev_path) {
369     CHECK(lock);
370 
371     SnapshotStatus status;
372     if (!ReadSnapshotStatus(lock, name, &status)) {
373         return false;
374     }
375     if (status.state() == SnapshotState::NONE || status.state() == SnapshotState::MERGE_COMPLETED) {
376         LOG(ERROR) << "Should not create a snapshot device for " << name
377                    << " after merging has completed.";
378         return false;
379     }
380 
381     // Validate the block device size, as well as the requested snapshot size.
382     // Note that during first-stage init, we don't have the device paths.
383     if (android::base::StartsWith(base_device, "/")) {
384         unique_fd fd(open(base_device.c_str(), O_RDONLY | O_CLOEXEC));
385         if (fd < 0) {
386             PLOG(ERROR) << "open failed: " << base_device;
387             return false;
388         }
389         auto dev_size = get_block_device_size(fd);
390         if (!dev_size) {
391             PLOG(ERROR) << "Could not determine block device size: " << base_device;
392             return false;
393         }
394         if (status.device_size() != dev_size) {
395             LOG(ERROR) << "Block device size for " << base_device << " does not match"
396                        << "(expected " << status.device_size() << ", got " << dev_size << ")";
397             return false;
398         }
399     }
400     if (status.device_size() % kSectorSize != 0) {
401         LOG(ERROR) << "invalid blockdev size for " << base_device << ": " << status.device_size();
402         return false;
403     }
404     if (status.snapshot_size() % kSectorSize != 0 ||
405         status.snapshot_size() > status.device_size()) {
406         LOG(ERROR) << "Invalid snapshot size for " << base_device << ": " << status.snapshot_size();
407         return false;
408     }
409     uint64_t snapshot_sectors = status.snapshot_size() / kSectorSize;
410     uint64_t linear_sectors = (status.device_size() - status.snapshot_size()) / kSectorSize;
411 
412     auto& dm = DeviceMapper::Instance();
413 
414     // Note that merging is a global state. We do track whether individual devices
415     // have completed merging, but the start of the merge process is considered
416     // atomic.
417     SnapshotStorageMode mode;
418     switch (ReadUpdateState(lock)) {
419         case UpdateState::MergeCompleted:
420         case UpdateState::MergeNeedsReboot:
421             LOG(ERROR) << "Should not create a snapshot device for " << name
422                        << " after global merging has completed.";
423             return false;
424         case UpdateState::Merging:
425         case UpdateState::MergeFailed:
426             // Note: MergeFailed indicates that a merge is in progress, but
427             // is possibly stalled. We still have to honor the merge.
428             mode = SnapshotStorageMode::Merge;
429             break;
430         default:
431             mode = SnapshotStorageMode::Persistent;
432             break;
433     }
434 
435     // The kernel (tested on 4.19) crashes horribly if a device has both a snapshot
436     // and a linear target in the same table. Instead, we stack them, and give the
437     // snapshot device a different name. It is not exposed to the caller in this
438     // case.
439     auto snap_name = (linear_sectors > 0) ? GetSnapshotExtraDeviceName(name) : name;
440 
441     DmTable table;
442     table.Emplace<DmTargetSnapshot>(0, snapshot_sectors, base_device, cow_device, mode,
443                                     kSnapshotChunkSize);
444     if (!dm.CreateDevice(snap_name, table, dev_path, timeout_ms)) {
445         LOG(ERROR) << "Could not create snapshot device: " << snap_name;
446         return false;
447     }
448 
449     if (linear_sectors) {
450         std::string snap_dev;
451         if (!dm.GetDeviceString(snap_name, &snap_dev)) {
452             LOG(ERROR) << "Cannot determine major/minor for: " << snap_name;
453             return false;
454         }
455 
456         // Our stacking will looks like this:
457         //     [linear, linear] ; to snapshot, and non-snapshot region of base device
458         //     [snapshot-inner]
459         //     [base device]   [cow]
460         DmTable table;
461         table.Emplace<DmTargetLinear>(0, snapshot_sectors, snap_dev, 0);
462         table.Emplace<DmTargetLinear>(snapshot_sectors, linear_sectors, base_device,
463                                       snapshot_sectors);
464         if (!dm.CreateDevice(name, table, dev_path, timeout_ms)) {
465             LOG(ERROR) << "Could not create outer snapshot device: " << name;
466             dm.DeleteDevice(snap_name);
467             return false;
468         }
469     }
470 
471     // :TODO: when merging is implemented, we need to add an argument to the
472     // status indicating how much progress is left to merge. (device-mapper
473     // does not retain the initial values, so we can't derive them.)
474     return true;
475 }
476 
MapCowImage(const std::string & name,const std::chrono::milliseconds & timeout_ms)477 std::optional<std::string> SnapshotManager::MapCowImage(
478         const std::string& name, const std::chrono::milliseconds& timeout_ms) {
479     if (!EnsureImageManager()) return std::nullopt;
480     auto cow_image_name = GetCowImageDeviceName(name);
481 
482     bool ok;
483     std::string cow_dev;
484     if (has_local_image_manager_) {
485         // If we forced a local image manager, it means we don't have binder,
486         // which means first-stage init. We must use device-mapper.
487         const auto& opener = device_->GetPartitionOpener();
488         ok = images_->MapImageWithDeviceMapper(opener, cow_image_name, &cow_dev);
489     } else {
490         ok = images_->MapImageDevice(cow_image_name, timeout_ms, &cow_dev);
491     }
492 
493     if (ok) {
494         LOG(INFO) << "Mapped " << cow_image_name << " to " << cow_dev;
495         return cow_dev;
496     }
497     LOG(ERROR) << "Could not map image device: " << cow_image_name;
498     return std::nullopt;
499 }
500 
UnmapSnapshot(LockedFile * lock,const std::string & name)501 bool SnapshotManager::UnmapSnapshot(LockedFile* lock, const std::string& name) {
502     CHECK(lock);
503 
504     auto& dm = DeviceMapper::Instance();
505     if (!dm.DeleteDeviceIfExists(name)) {
506         LOG(ERROR) << "Could not delete snapshot device: " << name;
507         return false;
508     }
509 
510     auto snapshot_extra_device = GetSnapshotExtraDeviceName(name);
511     if (!dm.DeleteDeviceIfExists(snapshot_extra_device)) {
512         LOG(ERROR) << "Could not delete snapshot inner device: " << snapshot_extra_device;
513         return false;
514     }
515 
516     return true;
517 }
518 
UnmapCowImage(const std::string & name)519 bool SnapshotManager::UnmapCowImage(const std::string& name) {
520     if (!EnsureImageManager()) return false;
521     return images_->UnmapImageIfExists(GetCowImageDeviceName(name));
522 }
523 
DeleteSnapshot(LockedFile * lock,const std::string & name)524 bool SnapshotManager::DeleteSnapshot(LockedFile* lock, const std::string& name) {
525     CHECK(lock);
526     CHECK(lock->lock_mode() == LOCK_EX);
527     if (!EnsureImageManager()) return false;
528 
529     if (!UnmapCowDevices(lock, name)) {
530         return false;
531     }
532 
533     // We can't delete snapshots in recovery. The only way we'd try is it we're
534     // completing or canceling a merge in preparation for a data wipe, in which
535     // case, we don't care if the file sticks around.
536     if (device_->IsRecovery()) {
537         LOG(INFO) << "Skipping delete of snapshot " << name << " in recovery.";
538         return true;
539     }
540 
541     auto cow_image_name = GetCowImageDeviceName(name);
542     if (images_->BackingImageExists(cow_image_name)) {
543         if (!images_->DeleteBackingImage(cow_image_name)) {
544             return false;
545         }
546     }
547 
548     std::string error;
549     auto file_path = GetSnapshotStatusFilePath(name);
550     if (!android::base::RemoveFileIfExists(file_path, &error)) {
551         LOG(ERROR) << "Failed to remove status file " << file_path << ": " << error;
552         return false;
553     }
554     return true;
555 }
556 
InitiateMerge(uint64_t * cow_file_size)557 bool SnapshotManager::InitiateMerge(uint64_t* cow_file_size) {
558     auto lock = LockExclusive();
559     if (!lock) return false;
560 
561     UpdateState state = ReadUpdateState(lock.get());
562     if (state != UpdateState::Unverified) {
563         LOG(ERROR) << "Cannot begin a merge if an update has not been verified";
564         return false;
565     }
566 
567     auto slot = GetCurrentSlot();
568     if (slot != Slot::Target) {
569         LOG(ERROR) << "Device cannot merge while not booting from new slot";
570         return false;
571     }
572 
573     std::vector<std::string> snapshots;
574     if (!ListSnapshots(lock.get(), &snapshots)) {
575         LOG(ERROR) << "Could not list snapshots";
576         return false;
577     }
578 
579     auto other_suffix = device_->GetOtherSlotSuffix();
580 
581     auto& dm = DeviceMapper::Instance();
582     for (const auto& snapshot : snapshots) {
583         if (android::base::EndsWith(snapshot, other_suffix)) {
584             // Allow the merge to continue, but log this unexpected case.
585             LOG(ERROR) << "Unexpected snapshot found during merge: " << snapshot;
586             continue;
587         }
588 
589         // The device has to be mapped, since everything should be merged at
590         // the same time. This is a fairly serious error. We could forcefully
591         // map everything here, but it should have been mapped during first-
592         // stage init.
593         if (dm.GetState(snapshot) == DmDeviceState::INVALID) {
594             LOG(ERROR) << "Cannot begin merge; device " << snapshot << " is not mapped.";
595             return false;
596         }
597     }
598 
599     auto metadata = ReadCurrentMetadata();
600     for (auto it = snapshots.begin(); it != snapshots.end();) {
601         switch (GetMetadataPartitionState(*metadata, *it)) {
602             case MetadataPartitionState::Flashed:
603                 LOG(WARNING) << "Detected re-flashing for partition " << *it
604                              << ". Skip merging it.";
605                 [[fallthrough]];
606             case MetadataPartitionState::None: {
607                 LOG(WARNING) << "Deleting snapshot for partition " << *it;
608                 if (!DeleteSnapshot(lock.get(), *it)) {
609                     LOG(WARNING) << "Cannot delete snapshot for partition " << *it
610                                  << ". Skip merging it anyways.";
611                 }
612                 it = snapshots.erase(it);
613             } break;
614             case MetadataPartitionState::Updated: {
615                 ++it;
616             } break;
617         }
618     }
619 
620     uint64_t total_cow_file_size = 0;
621     DmTargetSnapshot::Status initial_target_values = {};
622     for (const auto& snapshot : snapshots) {
623         DmTargetSnapshot::Status current_status;
624         if (!QuerySnapshotStatus(snapshot, nullptr, &current_status)) {
625             return false;
626         }
627         initial_target_values.sectors_allocated += current_status.sectors_allocated;
628         initial_target_values.total_sectors += current_status.total_sectors;
629         initial_target_values.metadata_sectors += current_status.metadata_sectors;
630 
631         SnapshotStatus snapshot_status;
632         if (!ReadSnapshotStatus(lock.get(), snapshot, &snapshot_status)) {
633             return false;
634         }
635         total_cow_file_size += snapshot_status.cow_file_size();
636     }
637 
638     if (cow_file_size) {
639         *cow_file_size = total_cow_file_size;
640     }
641 
642     SnapshotUpdateStatus initial_status;
643     initial_status.set_state(UpdateState::Merging);
644     initial_status.set_sectors_allocated(initial_target_values.sectors_allocated);
645     initial_status.set_total_sectors(initial_target_values.total_sectors);
646     initial_status.set_metadata_sectors(initial_target_values.metadata_sectors);
647 
648     // Point of no return - mark that we're starting a merge. From now on every
649     // snapshot must be a merge target.
650     if (!WriteSnapshotUpdateStatus(lock.get(), initial_status)) {
651         return false;
652     }
653 
654     bool rewrote_all = true;
655     for (const auto& snapshot : snapshots) {
656         // If this fails, we have no choice but to continue. Everything must
657         // be merged. This is not an ideal state to be in, but it is safe,
658         // because we the next boot will try again.
659         if (!SwitchSnapshotToMerge(lock.get(), snapshot)) {
660             LOG(ERROR) << "Failed to switch snapshot to a merge target: " << snapshot;
661             rewrote_all = false;
662         }
663     }
664 
665     // If we couldn't switch everything to a merge target, pre-emptively mark
666     // this merge as failed. It will get acknowledged when WaitForMerge() is
667     // called.
668     if (!rewrote_all) {
669         WriteUpdateState(lock.get(), UpdateState::MergeFailed);
670     }
671 
672     // Return true no matter what, because a merge was initiated.
673     return true;
674 }
675 
SwitchSnapshotToMerge(LockedFile * lock,const std::string & name)676 bool SnapshotManager::SwitchSnapshotToMerge(LockedFile* lock, const std::string& name) {
677     SnapshotStatus status;
678     if (!ReadSnapshotStatus(lock, name, &status)) {
679         return false;
680     }
681     if (status.state() != SnapshotState::CREATED) {
682         LOG(WARNING) << "Snapshot " << name
683                      << " has unexpected state: " << SnapshotState_Name(status.state());
684     }
685 
686     // After this, we return true because we technically did switch to a merge
687     // target. Everything else we do here is just informational.
688     auto dm_name = GetSnapshotDeviceName(name, status);
689     if (!RewriteSnapshotDeviceTable(dm_name)) {
690         return false;
691     }
692 
693     status.set_state(SnapshotState::MERGING);
694 
695     DmTargetSnapshot::Status dm_status;
696     if (!QuerySnapshotStatus(dm_name, nullptr, &dm_status)) {
697         LOG(ERROR) << "Could not query merge status for snapshot: " << dm_name;
698     }
699     status.set_sectors_allocated(dm_status.sectors_allocated);
700     status.set_metadata_sectors(dm_status.metadata_sectors);
701     if (!WriteSnapshotStatus(lock, status)) {
702         LOG(ERROR) << "Could not update status file for snapshot: " << name;
703     }
704     return true;
705 }
706 
RewriteSnapshotDeviceTable(const std::string & dm_name)707 bool SnapshotManager::RewriteSnapshotDeviceTable(const std::string& dm_name) {
708     auto& dm = DeviceMapper::Instance();
709 
710     std::vector<DeviceMapper::TargetInfo> old_targets;
711     if (!dm.GetTableInfo(dm_name, &old_targets)) {
712         LOG(ERROR) << "Could not read snapshot device table: " << dm_name;
713         return false;
714     }
715     if (old_targets.size() != 1 || DeviceMapper::GetTargetType(old_targets[0].spec) != "snapshot") {
716         LOG(ERROR) << "Unexpected device-mapper table for snapshot: " << dm_name;
717         return false;
718     }
719 
720     std::string base_device, cow_device;
721     if (!DmTargetSnapshot::GetDevicesFromParams(old_targets[0].data, &base_device, &cow_device)) {
722         LOG(ERROR) << "Could not derive underlying devices for snapshot: " << dm_name;
723         return false;
724     }
725 
726     DmTable table;
727     table.Emplace<DmTargetSnapshot>(0, old_targets[0].spec.length, base_device, cow_device,
728                                     SnapshotStorageMode::Merge, kSnapshotChunkSize);
729     if (!dm.LoadTableAndActivate(dm_name, table)) {
730         LOG(ERROR) << "Could not swap device-mapper tables on snapshot device " << dm_name;
731         return false;
732     }
733     LOG(INFO) << "Successfully switched snapshot device to a merge target: " << dm_name;
734     return true;
735 }
736 
737 enum class TableQuery {
738     Table,
739     Status,
740 };
741 
GetSingleTarget(const std::string & dm_name,TableQuery query,DeviceMapper::TargetInfo * target)742 static bool GetSingleTarget(const std::string& dm_name, TableQuery query,
743                             DeviceMapper::TargetInfo* target) {
744     auto& dm = DeviceMapper::Instance();
745     if (dm.GetState(dm_name) == DmDeviceState::INVALID) {
746         return false;
747     }
748 
749     std::vector<DeviceMapper::TargetInfo> targets;
750     bool result;
751     if (query == TableQuery::Status) {
752         result = dm.GetTableStatus(dm_name, &targets);
753     } else {
754         result = dm.GetTableInfo(dm_name, &targets);
755     }
756     if (!result) {
757         LOG(ERROR) << "Could not query device: " << dm_name;
758         return false;
759     }
760     if (targets.size() != 1) {
761         return false;
762     }
763 
764     *target = std::move(targets[0]);
765     return true;
766 }
767 
IsSnapshotDevice(const std::string & dm_name,TargetInfo * target)768 bool SnapshotManager::IsSnapshotDevice(const std::string& dm_name, TargetInfo* target) {
769     DeviceMapper::TargetInfo snap_target;
770     if (!GetSingleTarget(dm_name, TableQuery::Status, &snap_target)) {
771         return false;
772     }
773     auto type = DeviceMapper::GetTargetType(snap_target.spec);
774     if (type != "snapshot" && type != "snapshot-merge") {
775         return false;
776     }
777     if (target) {
778         *target = std::move(snap_target);
779     }
780     return true;
781 }
782 
QuerySnapshotStatus(const std::string & dm_name,std::string * target_type,DmTargetSnapshot::Status * status)783 bool SnapshotManager::QuerySnapshotStatus(const std::string& dm_name, std::string* target_type,
784                                           DmTargetSnapshot::Status* status) {
785     DeviceMapper::TargetInfo target;
786     if (!IsSnapshotDevice(dm_name, &target)) {
787         LOG(ERROR) << "Device " << dm_name << " is not a snapshot or snapshot-merge device";
788         return false;
789     }
790     if (!DmTargetSnapshot::ParseStatusText(target.data, status)) {
791         LOG(ERROR) << "Could not parse snapshot status text: " << dm_name;
792         return false;
793     }
794     if (target_type) {
795         *target_type = DeviceMapper::GetTargetType(target.spec);
796     }
797     return true;
798 }
799 
800 // Note that when a merge fails, we will *always* try again to complete the
801 // merge each time the device boots. There is no harm in doing so, and if
802 // the problem was transient, we might manage to get a new outcome.
ProcessUpdateState(const std::function<bool ()> & callback,const std::function<bool ()> & before_cancel)803 UpdateState SnapshotManager::ProcessUpdateState(const std::function<bool()>& callback,
804                                                 const std::function<bool()>& before_cancel) {
805     while (true) {
806         UpdateState state = CheckMergeState(before_cancel);
807         if (state == UpdateState::MergeFailed) {
808             AcknowledgeMergeFailure();
809         }
810         if (state != UpdateState::Merging) {
811             // Either there is no merge, or the merge was finished, so no need
812             // to keep waiting.
813             return state;
814         }
815 
816         if (callback && !callback()) {
817             return state;
818         }
819 
820         // This wait is not super time sensitive, so we have a relatively
821         // low polling frequency.
822         std::this_thread::sleep_for(kUpdateStateCheckInterval);
823     }
824 }
825 
CheckMergeState(const std::function<bool ()> & before_cancel)826 UpdateState SnapshotManager::CheckMergeState(const std::function<bool()>& before_cancel) {
827     auto lock = LockExclusive();
828     if (!lock) {
829         return UpdateState::MergeFailed;
830     }
831 
832     UpdateState state = CheckMergeState(lock.get(), before_cancel);
833     if (state == UpdateState::MergeCompleted) {
834         // Do this inside the same lock. Failures get acknowledged without the
835         // lock, because flock() might have failed.
836         AcknowledgeMergeSuccess(lock.get());
837     } else if (state == UpdateState::Cancelled) {
838         if (!RemoveAllUpdateState(lock.get(), before_cancel)) {
839             return ReadSnapshotUpdateStatus(lock.get()).state();
840         }
841     }
842     return state;
843 }
844 
CheckMergeState(LockedFile * lock,const std::function<bool ()> & before_cancel)845 UpdateState SnapshotManager::CheckMergeState(LockedFile* lock,
846                                              const std::function<bool()>& before_cancel) {
847     UpdateState state = ReadUpdateState(lock);
848     switch (state) {
849         case UpdateState::None:
850         case UpdateState::MergeCompleted:
851             // Harmless races are allowed between two callers of WaitForMerge,
852             // so in both of these cases we just propagate the state.
853             return state;
854 
855         case UpdateState::Merging:
856         case UpdateState::MergeNeedsReboot:
857         case UpdateState::MergeFailed:
858             // We'll poll each snapshot below. Note that for the NeedsReboot
859             // case, we always poll once to give cleanup another opportunity to
860             // run.
861             break;
862 
863         case UpdateState::Unverified:
864             // This is an edge case. Normally cancelled updates are detected
865             // via the merge poll below, but if we never started a merge, we
866             // need to also check here.
867             if (HandleCancelledUpdate(lock, before_cancel)) {
868                 return UpdateState::Cancelled;
869             }
870             return state;
871 
872         default:
873             return state;
874     }
875 
876     std::vector<std::string> snapshots;
877     if (!ListSnapshots(lock, &snapshots)) {
878         return UpdateState::MergeFailed;
879     }
880 
881     bool cancelled = false;
882     bool failed = false;
883     bool merging = false;
884     bool needs_reboot = false;
885     for (const auto& snapshot : snapshots) {
886         UpdateState snapshot_state = CheckTargetMergeState(lock, snapshot);
887         switch (snapshot_state) {
888             case UpdateState::MergeFailed:
889                 failed = true;
890                 break;
891             case UpdateState::Merging:
892                 merging = true;
893                 break;
894             case UpdateState::MergeNeedsReboot:
895                 needs_reboot = true;
896                 break;
897             case UpdateState::MergeCompleted:
898                 break;
899             case UpdateState::Cancelled:
900                 cancelled = true;
901                 break;
902             default:
903                 LOG(ERROR) << "Unknown merge status for \"" << snapshot << "\": "
904                            << "\"" << snapshot_state << "\"";
905                 failed = true;
906                 break;
907         }
908     }
909 
910     if (merging) {
911         // Note that we handle "Merging" before we handle anything else. We
912         // want to poll until *nothing* is merging if we can, so everything has
913         // a chance to get marked as completed or failed.
914         return UpdateState::Merging;
915     }
916     if (failed) {
917         // Note: since there are many drop-out cases for failure, we acknowledge
918         // it in WaitForMerge rather than here and elsewhere.
919         return UpdateState::MergeFailed;
920     }
921     if (needs_reboot) {
922         WriteUpdateState(lock, UpdateState::MergeNeedsReboot);
923         return UpdateState::MergeNeedsReboot;
924     }
925     if (cancelled) {
926         // This is an edge case, that we handle as correctly as we sensibly can.
927         // The underlying partition has changed behind update_engine, and we've
928         // removed the snapshot as a result. The exact state of the update is
929         // undefined now, but this can only happen on an unlocked device where
930         // partitions can be flashed without wiping userdata.
931         return UpdateState::Cancelled;
932     }
933     return UpdateState::MergeCompleted;
934 }
935 
CheckTargetMergeState(LockedFile * lock,const std::string & name)936 UpdateState SnapshotManager::CheckTargetMergeState(LockedFile* lock, const std::string& name) {
937     SnapshotStatus snapshot_status;
938     if (!ReadSnapshotStatus(lock, name, &snapshot_status)) {
939         return UpdateState::MergeFailed;
940     }
941 
942     std::string dm_name = GetSnapshotDeviceName(name, snapshot_status);
943 
944     std::unique_ptr<LpMetadata> current_metadata;
945 
946     if (!IsSnapshotDevice(dm_name)) {
947         if (!current_metadata) {
948             current_metadata = ReadCurrentMetadata();
949         }
950 
951         if (!current_metadata ||
952             GetMetadataPartitionState(*current_metadata, name) != MetadataPartitionState::Updated) {
953             DeleteSnapshot(lock, name);
954             return UpdateState::Cancelled;
955         }
956 
957         // During a check, we decided the merge was complete, but we were unable to
958         // collapse the device-mapper stack and perform COW cleanup. If we haven't
959         // rebooted after this check, the device will still be a snapshot-merge
960         // target. If the have rebooted, the device will now be a linear target,
961         // and we can try cleanup again.
962         if (snapshot_status.state() == SnapshotState::MERGE_COMPLETED) {
963             // NB: It's okay if this fails now, we gave cleanup our best effort.
964             OnSnapshotMergeComplete(lock, name, snapshot_status);
965             return UpdateState::MergeCompleted;
966         }
967 
968         LOG(ERROR) << "Expected snapshot or snapshot-merge for device: " << dm_name;
969         return UpdateState::MergeFailed;
970     }
971 
972     // This check is expensive so it is only enabled for debugging.
973     DCHECK((current_metadata = ReadCurrentMetadata()) &&
974            GetMetadataPartitionState(*current_metadata, name) == MetadataPartitionState::Updated);
975 
976     std::string target_type;
977     DmTargetSnapshot::Status status;
978     if (!QuerySnapshotStatus(dm_name, &target_type, &status)) {
979         return UpdateState::MergeFailed;
980     }
981     if (target_type != "snapshot-merge") {
982         // We can get here if we failed to rewrite the target type in
983         // InitiateMerge(). If we failed to create the target in first-stage
984         // init, boot would not succeed.
985         LOG(ERROR) << "Snapshot " << name << " has incorrect target type: " << target_type;
986         return UpdateState::MergeFailed;
987     }
988 
989     // These two values are equal when merging is complete.
990     if (status.sectors_allocated != status.metadata_sectors) {
991         if (snapshot_status.state() == SnapshotState::MERGE_COMPLETED) {
992             LOG(ERROR) << "Snapshot " << name << " is merging after being marked merge-complete.";
993             return UpdateState::MergeFailed;
994         }
995         return UpdateState::Merging;
996     }
997 
998     // Merging is done. First, update the status file to indicate the merge
999     // is complete. We do this before calling OnSnapshotMergeComplete, even
1000     // though this means the write is potentially wasted work (since in the
1001     // ideal case we'll immediately delete the file).
1002     //
1003     // This makes it simpler to reason about the next reboot: no matter what
1004     // part of cleanup failed, first-stage init won't try to create another
1005     // snapshot device for this partition.
1006     snapshot_status.set_state(SnapshotState::MERGE_COMPLETED);
1007     if (!WriteSnapshotStatus(lock, snapshot_status)) {
1008         return UpdateState::MergeFailed;
1009     }
1010     if (!OnSnapshotMergeComplete(lock, name, snapshot_status)) {
1011         return UpdateState::MergeNeedsReboot;
1012     }
1013     return UpdateState::MergeCompleted;
1014 }
1015 
GetSnapshotBootIndicatorPath()1016 std::string SnapshotManager::GetSnapshotBootIndicatorPath() {
1017     return metadata_dir_ + "/" + android::base::Basename(kBootIndicatorPath);
1018 }
1019 
GetRollbackIndicatorPath()1020 std::string SnapshotManager::GetRollbackIndicatorPath() {
1021     return metadata_dir_ + "/" + android::base::Basename(kRollbackIndicatorPath);
1022 }
1023 
GetForwardMergeIndicatorPath()1024 std::string SnapshotManager::GetForwardMergeIndicatorPath() {
1025     return metadata_dir_ + "/allow-forward-merge";
1026 }
1027 
AcknowledgeMergeSuccess(LockedFile * lock)1028 void SnapshotManager::AcknowledgeMergeSuccess(LockedFile* lock) {
1029     // It's not possible to remove update state in recovery, so write an
1030     // indicator that cleanup is needed on reboot. If a factory data reset
1031     // was requested, it doesn't matter, everything will get wiped anyway.
1032     // To make testing easier we consider a /data wipe as cleaned up.
1033     if (device_->IsRecovery() && !in_factory_data_reset_) {
1034         WriteUpdateState(lock, UpdateState::MergeCompleted);
1035         return;
1036     }
1037 
1038     RemoveAllUpdateState(lock);
1039 }
1040 
AcknowledgeMergeFailure()1041 void SnapshotManager::AcknowledgeMergeFailure() {
1042     // Log first, so worst case, we always have a record of why the calls below
1043     // were being made.
1044     LOG(ERROR) << "Merge could not be completed and will be marked as failed.";
1045 
1046     auto lock = LockExclusive();
1047     if (!lock) return;
1048 
1049     // Since we released the lock in between WaitForMerge and here, it's
1050     // possible (1) the merge successfully completed or (2) was already
1051     // marked as a failure. So make sure to check the state again, and
1052     // only mark as a failure if appropriate.
1053     UpdateState state = ReadUpdateState(lock.get());
1054     if (state != UpdateState::Merging && state != UpdateState::MergeNeedsReboot) {
1055         return;
1056     }
1057 
1058     WriteUpdateState(lock.get(), UpdateState::MergeFailed);
1059 }
1060 
OnSnapshotMergeComplete(LockedFile * lock,const std::string & name,const SnapshotStatus & status)1061 bool SnapshotManager::OnSnapshotMergeComplete(LockedFile* lock, const std::string& name,
1062                                               const SnapshotStatus& status) {
1063     auto dm_name = GetSnapshotDeviceName(name, status);
1064     if (IsSnapshotDevice(dm_name)) {
1065         // We are extra-cautious here, to avoid deleting the wrong table.
1066         std::string target_type;
1067         DmTargetSnapshot::Status dm_status;
1068         if (!QuerySnapshotStatus(dm_name, &target_type, &dm_status)) {
1069             return false;
1070         }
1071         if (target_type != "snapshot-merge") {
1072             LOG(ERROR) << "Unexpected target type " << target_type
1073                        << " for snapshot device: " << dm_name;
1074             return false;
1075         }
1076         if (dm_status.sectors_allocated != dm_status.metadata_sectors) {
1077             LOG(ERROR) << "Merge is unexpectedly incomplete for device " << dm_name;
1078             return false;
1079         }
1080         if (!CollapseSnapshotDevice(name, status)) {
1081             LOG(ERROR) << "Unable to collapse snapshot: " << name;
1082             return false;
1083         }
1084         // Note that collapsing is implicitly an Unmap, so we don't need to
1085         // unmap the snapshot.
1086     }
1087 
1088     if (!DeleteSnapshot(lock, name)) {
1089         LOG(ERROR) << "Could not delete snapshot: " << name;
1090         return false;
1091     }
1092     return true;
1093 }
1094 
CollapseSnapshotDevice(const std::string & name,const SnapshotStatus & status)1095 bool SnapshotManager::CollapseSnapshotDevice(const std::string& name,
1096                                              const SnapshotStatus& status) {
1097     auto& dm = DeviceMapper::Instance();
1098     auto dm_name = GetSnapshotDeviceName(name, status);
1099 
1100     // Verify we have a snapshot-merge device.
1101     DeviceMapper::TargetInfo target;
1102     if (!GetSingleTarget(dm_name, TableQuery::Table, &target)) {
1103         return false;
1104     }
1105     if (DeviceMapper::GetTargetType(target.spec) != "snapshot-merge") {
1106         // This should be impossible, it was checked earlier.
1107         LOG(ERROR) << "Snapshot device has invalid target type: " << dm_name;
1108         return false;
1109     }
1110 
1111     std::string base_device, cow_device;
1112     if (!DmTargetSnapshot::GetDevicesFromParams(target.data, &base_device, &cow_device)) {
1113         LOG(ERROR) << "Could not parse snapshot device " << dm_name
1114                    << " parameters: " << target.data;
1115         return false;
1116     }
1117 
1118     uint64_t snapshot_sectors = status.snapshot_size() / kSectorSize;
1119     if (snapshot_sectors * kSectorSize != status.snapshot_size()) {
1120         LOG(ERROR) << "Snapshot " << name
1121                    << " size is not sector aligned: " << status.snapshot_size();
1122         return false;
1123     }
1124 
1125     if (dm_name != name) {
1126         // We've derived the base device, but we actually need to replace the
1127         // table of the outermost device. Do a quick verification that this
1128         // device looks like we expect it to.
1129         std::vector<DeviceMapper::TargetInfo> outer_table;
1130         if (!dm.GetTableInfo(name, &outer_table)) {
1131             LOG(ERROR) << "Could not validate outer snapshot table: " << name;
1132             return false;
1133         }
1134         if (outer_table.size() != 2) {
1135             LOG(ERROR) << "Expected 2 dm-linear targets for table " << name
1136                        << ", got: " << outer_table.size();
1137             return false;
1138         }
1139         for (const auto& target : outer_table) {
1140             auto target_type = DeviceMapper::GetTargetType(target.spec);
1141             if (target_type != "linear") {
1142                 LOG(ERROR) << "Outer snapshot table may only contain linear targets, but " << name
1143                            << " has target: " << target_type;
1144                 return false;
1145             }
1146         }
1147         if (outer_table[0].spec.length != snapshot_sectors) {
1148             LOG(ERROR) << "dm-snapshot " << name << " should have " << snapshot_sectors
1149                        << " sectors, got: " << outer_table[0].spec.length;
1150             return false;
1151         }
1152         uint64_t expected_device_sectors = status.device_size() / kSectorSize;
1153         uint64_t actual_device_sectors = outer_table[0].spec.length + outer_table[1].spec.length;
1154         if (expected_device_sectors != actual_device_sectors) {
1155             LOG(ERROR) << "Outer device " << name << " should have " << expected_device_sectors
1156                        << " sectors, got: " << actual_device_sectors;
1157             return false;
1158         }
1159     }
1160 
1161     uint32_t slot = SlotNumberForSlotSuffix(device_->GetSlotSuffix());
1162     // Create a DmTable that is identical to the base device.
1163     CreateLogicalPartitionParams base_device_params{
1164             .block_device = device_->GetSuperDevice(slot),
1165             .metadata_slot = slot,
1166             .partition_name = name,
1167             .partition_opener = &device_->GetPartitionOpener(),
1168     };
1169     DmTable table;
1170     if (!CreateDmTable(base_device_params, &table)) {
1171         LOG(ERROR) << "Could not create a DmTable for partition: " << name;
1172         return false;
1173     }
1174 
1175     // Note: we are replacing the *outer* table here, so we do not use dm_name.
1176     if (!dm.LoadTableAndActivate(name, table)) {
1177         return false;
1178     }
1179 
1180     // Attempt to delete the snapshot device if one still exists. Nothing
1181     // should be depending on the device, and device-mapper should have
1182     // flushed remaining I/O. We could in theory replace with dm-zero (or
1183     // re-use the table above), but for now it's better to know why this
1184     // would fail.
1185     if (dm_name != name && !dm.DeleteDeviceIfExists(dm_name)) {
1186         LOG(ERROR) << "Unable to delete snapshot device " << dm_name << ", COW cannot be "
1187                    << "reclaimed until after reboot.";
1188         return false;
1189     }
1190 
1191     // Cleanup the base device as well, since it is no longer used. This does
1192     // not block cleanup.
1193     auto base_name = GetBaseDeviceName(name);
1194     if (!dm.DeleteDeviceIfExists(base_name)) {
1195         LOG(ERROR) << "Unable to delete base device for snapshot: " << base_name;
1196     }
1197     return true;
1198 }
1199 
HandleCancelledUpdate(LockedFile * lock,const std::function<bool ()> & before_cancel)1200 bool SnapshotManager::HandleCancelledUpdate(LockedFile* lock,
1201                                             const std::function<bool()>& before_cancel) {
1202     auto slot = GetCurrentSlot();
1203     if (slot == Slot::Unknown) {
1204         return false;
1205     }
1206 
1207     // If all snapshots were reflashed, then cancel the entire update.
1208     if (AreAllSnapshotsCancelled(lock)) {
1209         LOG(WARNING) << "Detected re-flashing, cancelling unverified update.";
1210         return RemoveAllUpdateState(lock, before_cancel);
1211     }
1212 
1213     // If update has been rolled back, then cancel the entire update.
1214     // Client (update_engine) is responsible for doing additional cleanup work on its own states
1215     // when ProcessUpdateState() returns UpdateState::Cancelled.
1216     auto current_slot = GetCurrentSlot();
1217     if (current_slot != Slot::Source) {
1218         LOG(INFO) << "Update state is being processed while booting at " << current_slot
1219                   << " slot, taking no action.";
1220         return false;
1221     }
1222 
1223     // current_slot == Source. Attempt to detect rollbacks.
1224     if (access(GetRollbackIndicatorPath().c_str(), F_OK) != 0) {
1225         // This unverified update is not attempted. Take no action.
1226         PLOG(INFO) << "Rollback indicator not detected. "
1227                    << "Update state is being processed before reboot, taking no action.";
1228         return false;
1229     }
1230 
1231     LOG(WARNING) << "Detected rollback, cancelling unverified update.";
1232     return RemoveAllUpdateState(lock, before_cancel);
1233 }
1234 
ReadCurrentMetadata()1235 std::unique_ptr<LpMetadata> SnapshotManager::ReadCurrentMetadata() {
1236     const auto& opener = device_->GetPartitionOpener();
1237     uint32_t slot = SlotNumberForSlotSuffix(device_->GetSlotSuffix());
1238     auto super_device = device_->GetSuperDevice(slot);
1239     auto metadata = android::fs_mgr::ReadMetadata(opener, super_device, slot);
1240     if (!metadata) {
1241         LOG(ERROR) << "Could not read dynamic partition metadata for device: " << super_device;
1242         return nullptr;
1243     }
1244     return metadata;
1245 }
1246 
GetMetadataPartitionState(const LpMetadata & metadata,const std::string & name)1247 SnapshotManager::MetadataPartitionState SnapshotManager::GetMetadataPartitionState(
1248         const LpMetadata& metadata, const std::string& name) {
1249     auto partition = android::fs_mgr::FindPartition(metadata, name);
1250     if (!partition) return MetadataPartitionState::None;
1251     if (partition->attributes & LP_PARTITION_ATTR_UPDATED) {
1252         return MetadataPartitionState::Updated;
1253     }
1254     return MetadataPartitionState::Flashed;
1255 }
1256 
AreAllSnapshotsCancelled(LockedFile * lock)1257 bool SnapshotManager::AreAllSnapshotsCancelled(LockedFile* lock) {
1258     std::vector<std::string> snapshots;
1259     if (!ListSnapshots(lock, &snapshots)) {
1260         LOG(WARNING) << "Failed to list snapshots to determine whether device has been flashed "
1261                      << "after applying an update. Assuming no snapshots.";
1262         // Let HandleCancelledUpdate resets UpdateState.
1263         return true;
1264     }
1265 
1266     std::map<std::string, bool> flashing_status;
1267 
1268     if (!GetSnapshotFlashingStatus(lock, snapshots, &flashing_status)) {
1269         LOG(WARNING) << "Failed to determine whether partitions have been flashed. Not"
1270                      << "removing update states.";
1271         return false;
1272     }
1273 
1274     bool all_snapshots_cancelled = std::all_of(flashing_status.begin(), flashing_status.end(),
1275                                                [](const auto& pair) { return pair.second; });
1276 
1277     if (all_snapshots_cancelled) {
1278         LOG(WARNING) << "All partitions are re-flashed after update, removing all update states.";
1279     }
1280     return all_snapshots_cancelled;
1281 }
1282 
GetSnapshotFlashingStatus(LockedFile * lock,const std::vector<std::string> & snapshots,std::map<std::string,bool> * out)1283 bool SnapshotManager::GetSnapshotFlashingStatus(LockedFile* lock,
1284                                                 const std::vector<std::string>& snapshots,
1285                                                 std::map<std::string, bool>* out) {
1286     CHECK(lock);
1287 
1288     auto source_slot_suffix = ReadUpdateSourceSlotSuffix();
1289     if (source_slot_suffix.empty()) {
1290         return false;
1291     }
1292     uint32_t source_slot = SlotNumberForSlotSuffix(source_slot_suffix);
1293     uint32_t target_slot = (source_slot == 0) ? 1 : 0;
1294 
1295     // Attempt to detect re-flashing on each partition.
1296     // - If all partitions are re-flashed, we can proceed to cancel the whole update.
1297     // - If only some of the partitions are re-flashed, snapshots for re-flashed partitions are
1298     //   deleted. Caller is responsible for merging the rest of the snapshots.
1299     // - If none of the partitions are re-flashed, caller is responsible for merging the snapshots.
1300     //
1301     // Note that we use target slot metadata, since if an OTA has been applied
1302     // to the target slot, we can detect the UPDATED flag. Any kind of flash
1303     // operation against dynamic partitions ensures that all copies of the
1304     // metadata are in sync, so flashing all partitions on the source slot will
1305     // remove the UPDATED flag on the target slot as well.
1306     const auto& opener = device_->GetPartitionOpener();
1307     auto super_device = device_->GetSuperDevice(target_slot);
1308     auto metadata = android::fs_mgr::ReadMetadata(opener, super_device, target_slot);
1309     if (!metadata) {
1310         return false;
1311     }
1312 
1313     for (const auto& snapshot_name : snapshots) {
1314         if (GetMetadataPartitionState(*metadata, snapshot_name) ==
1315             MetadataPartitionState::Updated) {
1316             out->emplace(snapshot_name, false);
1317         } else {
1318             // Delete snapshots for partitions that are re-flashed after the update.
1319             LOG(WARNING) << "Detected re-flashing of partition " << snapshot_name << ".";
1320             out->emplace(snapshot_name, true);
1321         }
1322     }
1323     return true;
1324 }
1325 
RemoveAllSnapshots(LockedFile * lock)1326 bool SnapshotManager::RemoveAllSnapshots(LockedFile* lock) {
1327     std::vector<std::string> snapshots;
1328     if (!ListSnapshots(lock, &snapshots)) {
1329         LOG(ERROR) << "Could not list snapshots";
1330         return false;
1331     }
1332 
1333     std::map<std::string, bool> flashing_status;
1334     if (!GetSnapshotFlashingStatus(lock, snapshots, &flashing_status)) {
1335         LOG(WARNING) << "Failed to get flashing status";
1336     }
1337 
1338     auto current_slot = GetCurrentSlot();
1339     bool ok = true;
1340     bool has_mapped_cow_images = false;
1341     for (const auto& name : snapshots) {
1342         // If booting off source slot, it is okay to unmap and delete all the snapshots.
1343         // If boot indicator is missing, update state is None or Initiated, so
1344         //   it is also okay to unmap and delete all the snapshots.
1345         // If booting off target slot,
1346         //  - should not unmap because:
1347         //    - In Android mode, snapshots are not mapped, but
1348         //      filesystems are mounting off dm-linear targets directly.
1349         //    - In recovery mode, assume nothing is mapped, so it is optional to unmap.
1350         //  - If partition is flashed or unknown, it is okay to delete snapshots.
1351         //    Otherwise (UPDATED flag), only delete snapshots if they are not mapped
1352         //    as dm-snapshot (for example, after merge completes).
1353         bool should_unmap = current_slot != Slot::Target;
1354         bool should_delete = ShouldDeleteSnapshot(lock, flashing_status, current_slot, name);
1355 
1356         bool partition_ok = true;
1357         if (should_unmap && !UnmapPartitionWithSnapshot(lock, name)) {
1358             partition_ok = false;
1359         }
1360         if (partition_ok && should_delete && !DeleteSnapshot(lock, name)) {
1361             partition_ok = false;
1362         }
1363 
1364         if (!partition_ok) {
1365             // Remember whether or not we were able to unmap the cow image.
1366             auto cow_image_device = GetCowImageDeviceName(name);
1367             has_mapped_cow_images |=
1368                     (EnsureImageManager() && images_->IsImageMapped(cow_image_device));
1369 
1370             ok = false;
1371         }
1372     }
1373 
1374     if (ok || !has_mapped_cow_images) {
1375         // Delete any image artifacts as a precaution, in case an update is
1376         // being cancelled due to some corrupted state in an lp_metadata file.
1377         // Note that we do not do this if some cow images are still mapped,
1378         // since we must not remove backing storage if it's in use.
1379         if (!EnsureImageManager() || !images_->RemoveAllImages()) {
1380             LOG(ERROR) << "Could not remove all snapshot artifacts";
1381             return false;
1382         }
1383     }
1384     return ok;
1385 }
1386 
1387 // See comments in RemoveAllSnapshots().
ShouldDeleteSnapshot(LockedFile * lock,const std::map<std::string,bool> & flashing_status,Slot current_slot,const std::string & name)1388 bool SnapshotManager::ShouldDeleteSnapshot(LockedFile* lock,
1389                                            const std::map<std::string, bool>& flashing_status,
1390                                            Slot current_slot, const std::string& name) {
1391     if (current_slot != Slot::Target) {
1392         return true;
1393     }
1394     auto it = flashing_status.find(name);
1395     if (it == flashing_status.end()) {
1396         LOG(WARNING) << "Can't determine flashing status for " << name;
1397         return true;
1398     }
1399     if (it->second) {
1400         // partition flashed, okay to delete obsolete snapshots
1401         return true;
1402     }
1403     // partition updated, only delete if not dm-snapshot
1404     SnapshotStatus status;
1405     if (!ReadSnapshotStatus(lock, name, &status)) {
1406         LOG(WARNING) << "Unable to read snapshot status for " << name
1407                      << ", guessing snapshot device name";
1408         auto extra_name = GetSnapshotExtraDeviceName(name);
1409         return !IsSnapshotDevice(name) && !IsSnapshotDevice(extra_name);
1410     }
1411     auto dm_name = GetSnapshotDeviceName(name, status);
1412     return !IsSnapshotDevice(dm_name);
1413 }
1414 
GetUpdateState(double * progress)1415 UpdateState SnapshotManager::GetUpdateState(double* progress) {
1416     // If we've never started an update, the state file won't exist.
1417     auto state_file = GetStateFilePath();
1418     if (access(state_file.c_str(), F_OK) != 0 && errno == ENOENT) {
1419         return UpdateState::None;
1420     }
1421 
1422     auto lock = LockShared();
1423     if (!lock) {
1424         return UpdateState::None;
1425     }
1426 
1427     SnapshotUpdateStatus update_status = ReadSnapshotUpdateStatus(lock.get());
1428     auto state = update_status.state();
1429     if (progress == nullptr) {
1430         return state;
1431     }
1432 
1433     if (state == UpdateState::MergeCompleted) {
1434         *progress = 100.0;
1435         return state;
1436     }
1437 
1438     *progress = 0.0;
1439     if (state != UpdateState::Merging) {
1440         return state;
1441     }
1442 
1443     // Sum all the snapshot states as if the system consists of a single huge
1444     // snapshots device, then compute the merge completion percentage of that
1445     // device.
1446     std::vector<std::string> snapshots;
1447     if (!ListSnapshots(lock.get(), &snapshots)) {
1448         LOG(ERROR) << "Could not list snapshots";
1449         return state;
1450     }
1451 
1452     DmTargetSnapshot::Status fake_snapshots_status = {};
1453     for (const auto& snapshot : snapshots) {
1454         DmTargetSnapshot::Status current_status;
1455 
1456         if (!QuerySnapshotStatus(snapshot, nullptr, &current_status)) continue;
1457 
1458         fake_snapshots_status.sectors_allocated += current_status.sectors_allocated;
1459         fake_snapshots_status.total_sectors += current_status.total_sectors;
1460         fake_snapshots_status.metadata_sectors += current_status.metadata_sectors;
1461     }
1462 
1463     *progress = DmTargetSnapshot::MergePercent(fake_snapshots_status,
1464                                                update_status.sectors_allocated());
1465 
1466     return state;
1467 }
1468 
ListSnapshots(LockedFile * lock,std::vector<std::string> * snapshots)1469 bool SnapshotManager::ListSnapshots(LockedFile* lock, std::vector<std::string>* snapshots) {
1470     CHECK(lock);
1471 
1472     auto dir_path = metadata_dir_ + "/snapshots"s;
1473     std::unique_ptr<DIR, decltype(&closedir)> dir(opendir(dir_path.c_str()), closedir);
1474     if (!dir) {
1475         PLOG(ERROR) << "opendir failed: " << dir_path;
1476         return false;
1477     }
1478 
1479     struct dirent* dp;
1480     while ((dp = readdir(dir.get())) != nullptr) {
1481         if (dp->d_type != DT_REG) continue;
1482         snapshots->emplace_back(dp->d_name);
1483     }
1484     return true;
1485 }
1486 
IsSnapshotManagerNeeded()1487 bool SnapshotManager::IsSnapshotManagerNeeded() {
1488     return access(kBootIndicatorPath, F_OK) == 0;
1489 }
1490 
GetGlobalRollbackIndicatorPath()1491 std::string SnapshotManager::GetGlobalRollbackIndicatorPath() {
1492     return kRollbackIndicatorPath;
1493 }
1494 
NeedSnapshotsInFirstStageMount()1495 bool SnapshotManager::NeedSnapshotsInFirstStageMount() {
1496     // If we fail to read, we'll wind up using CreateLogicalPartitions, which
1497     // will create devices that look like the old slot, except with extra
1498     // content at the end of each device. This will confuse dm-verity, and
1499     // ultimately we'll fail to boot. Why not make it a fatal error and have
1500     // the reason be clearer? Because the indicator file still exists, and
1501     // if this was FATAL, reverting to the old slot would be broken.
1502     auto slot = GetCurrentSlot();
1503 
1504     if (slot != Slot::Target) {
1505         if (slot == Slot::Source) {
1506             // Device is rebooting into the original slot, so mark this as a
1507             // rollback.
1508             auto path = GetRollbackIndicatorPath();
1509             if (!android::base::WriteStringToFile("1", path)) {
1510                 PLOG(ERROR) << "Unable to write rollback indicator: " << path;
1511             } else {
1512                 LOG(INFO) << "Rollback detected, writing rollback indicator to " << path;
1513             }
1514         }
1515         LOG(INFO) << "Not booting from new slot. Will not mount snapshots.";
1516         return false;
1517     }
1518 
1519     // If we can't read the update state, it's unlikely anything else will
1520     // succeed, so this is a fatal error. We'll eventually exhaust boot
1521     // attempts and revert to the old slot.
1522     auto lock = LockShared();
1523     if (!lock) {
1524         LOG(FATAL) << "Could not read update state to determine snapshot status";
1525         return false;
1526     }
1527     switch (ReadUpdateState(lock.get())) {
1528         case UpdateState::Unverified:
1529         case UpdateState::Merging:
1530         case UpdateState::MergeFailed:
1531             return true;
1532         default:
1533             return false;
1534     }
1535 }
1536 
CreateLogicalAndSnapshotPartitions(const std::string & super_device,const std::chrono::milliseconds & timeout_ms)1537 bool SnapshotManager::CreateLogicalAndSnapshotPartitions(
1538         const std::string& super_device, const std::chrono::milliseconds& timeout_ms) {
1539     LOG(INFO) << "Creating logical partitions with snapshots as needed";
1540 
1541     auto lock = LockExclusive();
1542     if (!lock) return false;
1543 
1544     const auto& opener = device_->GetPartitionOpener();
1545     uint32_t slot = SlotNumberForSlotSuffix(device_->GetSlotSuffix());
1546     auto metadata = android::fs_mgr::ReadMetadata(opener, super_device, slot);
1547     if (!metadata) {
1548         LOG(ERROR) << "Could not read dynamic partition metadata for device: " << super_device;
1549         return false;
1550     }
1551 
1552     for (const auto& partition : metadata->partitions) {
1553         if (GetPartitionGroupName(metadata->groups[partition.group_index]) == kCowGroupName) {
1554             LOG(INFO) << "Skip mapping partition " << GetPartitionName(partition) << " in group "
1555                       << kCowGroupName;
1556             continue;
1557         }
1558 
1559         CreateLogicalPartitionParams params = {
1560                 .block_device = super_device,
1561                 .metadata = metadata.get(),
1562                 .partition = &partition,
1563                 .partition_opener = &opener,
1564                 .timeout_ms = timeout_ms,
1565         };
1566         std::string ignore_path;
1567         if (!MapPartitionWithSnapshot(lock.get(), std::move(params), &ignore_path)) {
1568             return false;
1569         }
1570     }
1571 
1572     LOG(INFO) << "Created logical partitions with snapshot.";
1573     return true;
1574 }
1575 
GetRemainingTime(const std::chrono::milliseconds & timeout,const std::chrono::time_point<std::chrono::steady_clock> & begin)1576 static std::chrono::milliseconds GetRemainingTime(
1577         const std::chrono::milliseconds& timeout,
1578         const std::chrono::time_point<std::chrono::steady_clock>& begin) {
1579     // If no timeout is specified, execute all commands without specifying any timeout.
1580     if (timeout.count() == 0) return std::chrono::milliseconds(0);
1581     auto passed_time = std::chrono::steady_clock::now() - begin;
1582     auto remaining_time = timeout - duration_cast<std::chrono::milliseconds>(passed_time);
1583     if (remaining_time.count() <= 0) {
1584         LOG(ERROR) << "MapPartitionWithSnapshot has reached timeout " << timeout.count() << "ms ("
1585                    << remaining_time.count() << "ms remaining)";
1586         // Return min() instead of remaining_time here because 0 is treated as a special value for
1587         // no timeout, where the rest of the commands will still be executed.
1588         return std::chrono::milliseconds::min();
1589     }
1590     return remaining_time;
1591 }
1592 
MapPartitionWithSnapshot(LockedFile * lock,CreateLogicalPartitionParams params,std::string * path)1593 bool SnapshotManager::MapPartitionWithSnapshot(LockedFile* lock,
1594                                                CreateLogicalPartitionParams params,
1595                                                std::string* path) {
1596     auto begin = std::chrono::steady_clock::now();
1597 
1598     CHECK(lock);
1599     path->clear();
1600 
1601     if (params.GetPartitionName() != params.GetDeviceName()) {
1602         LOG(ERROR) << "Mapping snapshot with a different name is unsupported: partition_name = "
1603                    << params.GetPartitionName() << ", device_name = " << params.GetDeviceName();
1604         return false;
1605     }
1606 
1607     // Fill out fields in CreateLogicalPartitionParams so that we have more information (e.g. by
1608     // reading super partition metadata).
1609     CreateLogicalPartitionParams::OwnedData params_owned_data;
1610     if (!params.InitDefaults(&params_owned_data)) {
1611         return false;
1612     }
1613 
1614     if (!params.partition->num_extents) {
1615         LOG(INFO) << "Skipping zero-length logical partition: " << params.GetPartitionName();
1616         return true;  // leave path empty to indicate that nothing is mapped.
1617     }
1618 
1619     // Determine if there is a live snapshot for the SnapshotStatus of the partition; i.e. if the
1620     // partition still has a snapshot that needs to be mapped.  If no live snapshot or merge
1621     // completed, live_snapshot_status is set to nullopt.
1622     std::optional<SnapshotStatus> live_snapshot_status;
1623     do {
1624         if (!(params.partition->attributes & LP_PARTITION_ATTR_UPDATED)) {
1625             LOG(INFO) << "Detected re-flashing of partition, will skip snapshot: "
1626                       << params.GetPartitionName();
1627             break;
1628         }
1629         auto file_path = GetSnapshotStatusFilePath(params.GetPartitionName());
1630         if (access(file_path.c_str(), F_OK) != 0) {
1631             if (errno != ENOENT) {
1632                 PLOG(INFO) << "Can't map snapshot for " << params.GetPartitionName()
1633                            << ": Can't access " << file_path;
1634                 return false;
1635             }
1636             break;
1637         }
1638         live_snapshot_status = std::make_optional<SnapshotStatus>();
1639         if (!ReadSnapshotStatus(lock, params.GetPartitionName(), &*live_snapshot_status)) {
1640             return false;
1641         }
1642         // No live snapshot if merge is completed.
1643         if (live_snapshot_status->state() == SnapshotState::MERGE_COMPLETED) {
1644             live_snapshot_status.reset();
1645         }
1646 
1647         if (live_snapshot_status->state() == SnapshotState::NONE ||
1648             live_snapshot_status->cow_partition_size() + live_snapshot_status->cow_file_size() ==
1649                     0) {
1650             LOG(WARNING) << "Snapshot status for " << params.GetPartitionName()
1651                          << " is invalid, ignoring: state = "
1652                          << SnapshotState_Name(live_snapshot_status->state())
1653                          << ", cow_partition_size = " << live_snapshot_status->cow_partition_size()
1654                          << ", cow_file_size = " << live_snapshot_status->cow_file_size();
1655             live_snapshot_status.reset();
1656         }
1657     } while (0);
1658 
1659     if (live_snapshot_status.has_value()) {
1660         // dm-snapshot requires the base device to be writable.
1661         params.force_writable = true;
1662         // Map the base device with a different name to avoid collision.
1663         params.device_name = GetBaseDeviceName(params.GetPartitionName());
1664     }
1665 
1666     AutoDeviceList created_devices;
1667 
1668     // Create the base device for the snapshot, or if there is no snapshot, the
1669     // device itself. This device consists of the real blocks in the super
1670     // partition that this logical partition occupies.
1671     auto& dm = DeviceMapper::Instance();
1672     std::string base_path;
1673     if (!CreateLogicalPartition(params, &base_path)) {
1674         LOG(ERROR) << "Could not create logical partition " << params.GetPartitionName()
1675                    << " as device " << params.GetDeviceName();
1676         return false;
1677     }
1678     created_devices.EmplaceBack<AutoUnmapDevice>(&dm, params.GetDeviceName());
1679 
1680     if (!live_snapshot_status.has_value()) {
1681         *path = base_path;
1682         created_devices.Release();
1683         return true;
1684     }
1685 
1686     // We don't have ueventd in first-stage init, so use device major:minor
1687     // strings instead.
1688     std::string base_device;
1689     if (!dm.GetDeviceString(params.GetDeviceName(), &base_device)) {
1690         LOG(ERROR) << "Could not determine major/minor for: " << params.GetDeviceName();
1691         return false;
1692     }
1693 
1694     auto remaining_time = GetRemainingTime(params.timeout_ms, begin);
1695     if (remaining_time.count() < 0) return false;
1696 
1697     std::string cow_name;
1698     CreateLogicalPartitionParams cow_params = params;
1699     cow_params.timeout_ms = remaining_time;
1700     if (!MapCowDevices(lock, cow_params, *live_snapshot_status, &created_devices, &cow_name)) {
1701         return false;
1702     }
1703     std::string cow_device;
1704     if (!GetMappedImageDeviceStringOrPath(cow_name, &cow_device)) {
1705         LOG(ERROR) << "Could not determine major/minor for: " << cow_name;
1706         return false;
1707     }
1708 
1709     remaining_time = GetRemainingTime(params.timeout_ms, begin);
1710     if (remaining_time.count() < 0) return false;
1711 
1712     if (!MapSnapshot(lock, params.GetPartitionName(), base_device, cow_device, remaining_time,
1713                      path)) {
1714         LOG(ERROR) << "Could not map snapshot for partition: " << params.GetPartitionName();
1715         return false;
1716     }
1717     // No need to add params.GetPartitionName() to created_devices since it is immediately released.
1718 
1719     created_devices.Release();
1720 
1721     LOG(INFO) << "Mapped " << params.GetPartitionName() << " as snapshot device at " << *path;
1722 
1723     return true;
1724 }
1725 
UnmapPartitionWithSnapshot(LockedFile * lock,const std::string & target_partition_name)1726 bool SnapshotManager::UnmapPartitionWithSnapshot(LockedFile* lock,
1727                                                  const std::string& target_partition_name) {
1728     CHECK(lock);
1729 
1730     if (!UnmapSnapshot(lock, target_partition_name)) {
1731         return false;
1732     }
1733 
1734     if (!UnmapCowDevices(lock, target_partition_name)) {
1735         return false;
1736     }
1737 
1738     auto& dm = DeviceMapper::Instance();
1739     std::string base_name = GetBaseDeviceName(target_partition_name);
1740     if (!dm.DeleteDeviceIfExists(base_name)) {
1741         LOG(ERROR) << "Cannot delete base device: " << base_name;
1742         return false;
1743     }
1744 
1745     LOG(INFO) << "Successfully unmapped snapshot " << target_partition_name;
1746 
1747     return true;
1748 }
1749 
MapCowDevices(LockedFile * lock,const CreateLogicalPartitionParams & params,const SnapshotStatus & snapshot_status,AutoDeviceList * created_devices,std::string * cow_name)1750 bool SnapshotManager::MapCowDevices(LockedFile* lock, const CreateLogicalPartitionParams& params,
1751                                     const SnapshotStatus& snapshot_status,
1752                                     AutoDeviceList* created_devices, std::string* cow_name) {
1753     CHECK(lock);
1754     CHECK(snapshot_status.cow_partition_size() + snapshot_status.cow_file_size() > 0);
1755     auto begin = std::chrono::steady_clock::now();
1756 
1757     std::string partition_name = params.GetPartitionName();
1758     std::string cow_image_name = GetCowImageDeviceName(partition_name);
1759     *cow_name = GetCowName(partition_name);
1760 
1761     auto& dm = DeviceMapper::Instance();
1762 
1763     // Map COW image if necessary.
1764     if (snapshot_status.cow_file_size() > 0) {
1765         if (!EnsureImageManager()) return false;
1766         auto remaining_time = GetRemainingTime(params.timeout_ms, begin);
1767         if (remaining_time.count() < 0) return false;
1768 
1769         if (!MapCowImage(partition_name, remaining_time).has_value()) {
1770             LOG(ERROR) << "Could not map cow image for partition: " << partition_name;
1771             return false;
1772         }
1773         created_devices->EmplaceBack<AutoUnmapImage>(images_.get(), cow_image_name);
1774 
1775         // If no COW partition exists, just return the image alone.
1776         if (snapshot_status.cow_partition_size() == 0) {
1777             *cow_name = std::move(cow_image_name);
1778             LOG(INFO) << "Mapped COW image for " << partition_name << " at " << *cow_name;
1779             return true;
1780         }
1781     }
1782 
1783     auto remaining_time = GetRemainingTime(params.timeout_ms, begin);
1784     if (remaining_time.count() < 0) return false;
1785 
1786     CHECK(snapshot_status.cow_partition_size() > 0);
1787 
1788     // Create the DmTable for the COW device. It is the DmTable of the COW partition plus
1789     // COW image device as the last extent.
1790     CreateLogicalPartitionParams cow_partition_params = params;
1791     cow_partition_params.partition = nullptr;
1792     cow_partition_params.partition_name = *cow_name;
1793     cow_partition_params.device_name.clear();
1794     DmTable table;
1795     if (!CreateDmTable(cow_partition_params, &table)) {
1796         return false;
1797     }
1798     // If the COW image exists, append it as the last extent.
1799     if (snapshot_status.cow_file_size() > 0) {
1800         std::string cow_image_device;
1801         if (!GetMappedImageDeviceStringOrPath(cow_image_name, &cow_image_device)) {
1802             LOG(ERROR) << "Cannot determine major/minor for: " << cow_image_name;
1803             return false;
1804         }
1805         auto cow_partition_sectors = snapshot_status.cow_partition_size() / kSectorSize;
1806         auto cow_image_sectors = snapshot_status.cow_file_size() / kSectorSize;
1807         table.Emplace<DmTargetLinear>(cow_partition_sectors, cow_image_sectors, cow_image_device,
1808                                       0);
1809     }
1810 
1811     // We have created the DmTable now. Map it.
1812     std::string cow_path;
1813     if (!dm.CreateDevice(*cow_name, table, &cow_path, remaining_time)) {
1814         LOG(ERROR) << "Could not create COW device: " << *cow_name;
1815         return false;
1816     }
1817     created_devices->EmplaceBack<AutoUnmapDevice>(&dm, *cow_name);
1818     LOG(INFO) << "Mapped COW device for " << params.GetPartitionName() << " at " << cow_path;
1819     return true;
1820 }
1821 
UnmapCowDevices(LockedFile * lock,const std::string & name)1822 bool SnapshotManager::UnmapCowDevices(LockedFile* lock, const std::string& name) {
1823     CHECK(lock);
1824     if (!EnsureImageManager()) return false;
1825 
1826     auto& dm = DeviceMapper::Instance();
1827     auto cow_name = GetCowName(name);
1828     if (!dm.DeleteDeviceIfExists(cow_name)) {
1829         LOG(ERROR) << "Cannot unmap " << cow_name;
1830         return false;
1831     }
1832 
1833     std::string cow_image_name = GetCowImageDeviceName(name);
1834     if (!images_->UnmapImageIfExists(cow_image_name)) {
1835         LOG(ERROR) << "Cannot unmap image " << cow_image_name;
1836         return false;
1837     }
1838     return true;
1839 }
1840 
OpenFile(const std::string & file,int lock_flags)1841 auto SnapshotManager::OpenFile(const std::string& file, int lock_flags)
1842         -> std::unique_ptr<LockedFile> {
1843     unique_fd fd(open(file.c_str(), O_RDONLY | O_CLOEXEC | O_NOFOLLOW));
1844     if (fd < 0) {
1845         PLOG(ERROR) << "Open failed: " << file;
1846         return nullptr;
1847     }
1848     if (lock_flags != 0 && TEMP_FAILURE_RETRY(flock(fd, lock_flags)) < 0) {
1849         PLOG(ERROR) << "Acquire flock failed: " << file;
1850         return nullptr;
1851     }
1852     // For simplicity, we want to CHECK that lock_mode == LOCK_EX, in some
1853     // calls, so strip extra flags.
1854     int lock_mode = lock_flags & (LOCK_EX | LOCK_SH);
1855     return std::make_unique<LockedFile>(file, std::move(fd), lock_mode);
1856 }
1857 
~LockedFile()1858 SnapshotManager::LockedFile::~LockedFile() {
1859     if (TEMP_FAILURE_RETRY(flock(fd_, LOCK_UN)) < 0) {
1860         PLOG(ERROR) << "Failed to unlock file: " << path_;
1861     }
1862 }
1863 
GetStateFilePath() const1864 std::string SnapshotManager::GetStateFilePath() const {
1865     return metadata_dir_ + "/state"s;
1866 }
1867 
GetMergeStateFilePath() const1868 std::string SnapshotManager::GetMergeStateFilePath() const {
1869     return metadata_dir_ + "/merge_state"s;
1870 }
1871 
GetLockPath() const1872 std::string SnapshotManager::GetLockPath() const {
1873     return metadata_dir_;
1874 }
1875 
OpenLock(int lock_flags)1876 std::unique_ptr<SnapshotManager::LockedFile> SnapshotManager::OpenLock(int lock_flags) {
1877     auto lock_file = GetLockPath();
1878     return OpenFile(lock_file, lock_flags);
1879 }
1880 
LockShared()1881 std::unique_ptr<SnapshotManager::LockedFile> SnapshotManager::LockShared() {
1882     return OpenLock(LOCK_SH);
1883 }
1884 
LockExclusive()1885 std::unique_ptr<SnapshotManager::LockedFile> SnapshotManager::LockExclusive() {
1886     return OpenLock(LOCK_EX);
1887 }
1888 
UpdateStateFromString(const std::string & contents)1889 static UpdateState UpdateStateFromString(const std::string& contents) {
1890     if (contents.empty() || contents == "none") {
1891         return UpdateState::None;
1892     } else if (contents == "initiated") {
1893         return UpdateState::Initiated;
1894     } else if (contents == "unverified") {
1895         return UpdateState::Unverified;
1896     } else if (contents == "merging") {
1897         return UpdateState::Merging;
1898     } else if (contents == "merge-completed") {
1899         return UpdateState::MergeCompleted;
1900     } else if (contents == "merge-needs-reboot") {
1901         return UpdateState::MergeNeedsReboot;
1902     } else if (contents == "merge-failed") {
1903         return UpdateState::MergeFailed;
1904     } else if (contents == "cancelled") {
1905         return UpdateState::Cancelled;
1906     } else {
1907         LOG(ERROR) << "Unknown merge state in update state file: \"" << contents << "\"";
1908         return UpdateState::None;
1909     }
1910 }
1911 
operator <<(std::ostream & os,UpdateState state)1912 std::ostream& operator<<(std::ostream& os, UpdateState state) {
1913     switch (state) {
1914         case UpdateState::None:
1915             return os << "none";
1916         case UpdateState::Initiated:
1917             return os << "initiated";
1918         case UpdateState::Unverified:
1919             return os << "unverified";
1920         case UpdateState::Merging:
1921             return os << "merging";
1922         case UpdateState::MergeCompleted:
1923             return os << "merge-completed";
1924         case UpdateState::MergeNeedsReboot:
1925             return os << "merge-needs-reboot";
1926         case UpdateState::MergeFailed:
1927             return os << "merge-failed";
1928         case UpdateState::Cancelled:
1929             return os << "cancelled";
1930         default:
1931             LOG(ERROR) << "Unknown update state: " << static_cast<uint32_t>(state);
1932             return os;
1933     }
1934 }
1935 
ReadUpdateState(LockedFile * lock)1936 UpdateState SnapshotManager::ReadUpdateState(LockedFile* lock) {
1937     SnapshotUpdateStatus status = ReadSnapshotUpdateStatus(lock);
1938     return status.state();
1939 }
1940 
ReadSnapshotUpdateStatus(LockedFile * lock)1941 SnapshotUpdateStatus SnapshotManager::ReadSnapshotUpdateStatus(LockedFile* lock) {
1942     CHECK(lock);
1943 
1944     SnapshotUpdateStatus status = {};
1945     std::string contents;
1946     if (!android::base::ReadFileToString(GetStateFilePath(), &contents)) {
1947         PLOG(ERROR) << "Read state file failed";
1948         status.set_state(UpdateState::None);
1949         return status;
1950     }
1951 
1952     if (!status.ParseFromString(contents)) {
1953         LOG(WARNING) << "Unable to parse state file as SnapshotUpdateStatus, using the old format";
1954 
1955         // Try to rollback to legacy file to support devices that are
1956         // currently using the old file format.
1957         // TODO(b/147409432)
1958         status.set_state(UpdateStateFromString(contents));
1959     }
1960 
1961     return status;
1962 }
1963 
WriteUpdateState(LockedFile * lock,UpdateState state)1964 bool SnapshotManager::WriteUpdateState(LockedFile* lock, UpdateState state) {
1965     SnapshotUpdateStatus status = {};
1966     status.set_state(state);
1967     return WriteSnapshotUpdateStatus(lock, status);
1968 }
1969 
WriteSnapshotUpdateStatus(LockedFile * lock,const SnapshotUpdateStatus & status)1970 bool SnapshotManager::WriteSnapshotUpdateStatus(LockedFile* lock,
1971                                                 const SnapshotUpdateStatus& status) {
1972     CHECK(lock);
1973     CHECK(lock->lock_mode() == LOCK_EX);
1974 
1975     std::string contents;
1976     if (!status.SerializeToString(&contents)) {
1977         LOG(ERROR) << "Unable to serialize SnapshotUpdateStatus.";
1978         return false;
1979     }
1980 
1981 #ifdef LIBSNAPSHOT_USE_HAL
1982     auto merge_status = MergeStatus::UNKNOWN;
1983     switch (status.state()) {
1984         // The needs-reboot and completed cases imply that /data and /metadata
1985         // can be safely wiped, so we don't report a merge status.
1986         case UpdateState::None:
1987         case UpdateState::MergeNeedsReboot:
1988         case UpdateState::MergeCompleted:
1989         case UpdateState::Initiated:
1990             merge_status = MergeStatus::NONE;
1991             break;
1992         case UpdateState::Unverified:
1993             merge_status = MergeStatus::SNAPSHOTTED;
1994             break;
1995         case UpdateState::Merging:
1996         case UpdateState::MergeFailed:
1997             merge_status = MergeStatus::MERGING;
1998             break;
1999         default:
2000             // Note that Cancelled flows to here - it is never written, since
2001             // it only communicates a transient state to the caller.
2002             LOG(ERROR) << "Unexpected update status: " << status.state();
2003             break;
2004     }
2005 
2006     bool set_before_write =
2007             merge_status == MergeStatus::SNAPSHOTTED || merge_status == MergeStatus::MERGING;
2008     if (set_before_write && !device_->SetBootControlMergeStatus(merge_status)) {
2009         return false;
2010     }
2011 #endif
2012 
2013     if (!WriteStringToFileAtomic(contents, GetStateFilePath())) {
2014         PLOG(ERROR) << "Could not write to state file";
2015         return false;
2016     }
2017 
2018 #ifdef LIBSNAPSHOT_USE_HAL
2019     if (!set_before_write && !device_->SetBootControlMergeStatus(merge_status)) {
2020         return false;
2021     }
2022 #endif
2023     return true;
2024 }
2025 
GetSnapshotStatusFilePath(const std::string & name)2026 std::string SnapshotManager::GetSnapshotStatusFilePath(const std::string& name) {
2027     auto file = metadata_dir_ + "/snapshots/"s + name;
2028     return file;
2029 }
2030 
ReadSnapshotStatus(LockedFile * lock,const std::string & name,SnapshotStatus * status)2031 bool SnapshotManager::ReadSnapshotStatus(LockedFile* lock, const std::string& name,
2032                                          SnapshotStatus* status) {
2033     CHECK(lock);
2034     auto path = GetSnapshotStatusFilePath(name);
2035 
2036     unique_fd fd(open(path.c_str(), O_RDONLY | O_CLOEXEC | O_NOFOLLOW));
2037     if (fd < 0) {
2038         PLOG(ERROR) << "Open failed: " << path;
2039         return false;
2040     }
2041 
2042     if (!status->ParseFromFileDescriptor(fd.get())) {
2043         PLOG(ERROR) << "Unable to parse " << path << " as SnapshotStatus";
2044         return false;
2045     }
2046 
2047     if (status->name() != name) {
2048         LOG(WARNING) << "Found snapshot status named " << status->name() << " in " << path;
2049         status->set_name(name);
2050     }
2051 
2052     return true;
2053 }
2054 
WriteSnapshotStatus(LockedFile * lock,const SnapshotStatus & status)2055 bool SnapshotManager::WriteSnapshotStatus(LockedFile* lock, const SnapshotStatus& status) {
2056     // The caller must take an exclusive lock to modify snapshots.
2057     CHECK(lock);
2058     CHECK(lock->lock_mode() == LOCK_EX);
2059     CHECK(!status.name().empty());
2060 
2061     auto path = GetSnapshotStatusFilePath(status.name());
2062 
2063     std::string content;
2064     if (!status.SerializeToString(&content)) {
2065         LOG(ERROR) << "Unable to serialize SnapshotStatus for " << status.name();
2066         return false;
2067     }
2068 
2069     if (!WriteStringToFileAtomic(content, path)) {
2070         PLOG(ERROR) << "Unable to write SnapshotStatus to " << path;
2071         return false;
2072     }
2073 
2074     return true;
2075 }
2076 
GetSnapshotDeviceName(const std::string & snapshot_name,const SnapshotStatus & status)2077 std::string SnapshotManager::GetSnapshotDeviceName(const std::string& snapshot_name,
2078                                                    const SnapshotStatus& status) {
2079     if (status.device_size() != status.snapshot_size()) {
2080         return GetSnapshotExtraDeviceName(snapshot_name);
2081     }
2082     return snapshot_name;
2083 }
2084 
EnsureImageManager()2085 bool SnapshotManager::EnsureImageManager() {
2086     if (images_) return true;
2087 
2088     // For now, use a preset timeout.
2089     images_ = android::fiemap::IImageManager::Open(gsid_dir_, 15000ms);
2090     if (!images_) {
2091         LOG(ERROR) << "Could not open ImageManager";
2092         return false;
2093     }
2094     return true;
2095 }
2096 
ForceLocalImageManager()2097 bool SnapshotManager::ForceLocalImageManager() {
2098     images_ = android::fiemap::ImageManager::Open(gsid_dir_);
2099     if (!images_) {
2100         LOG(ERROR) << "Could not open ImageManager";
2101         return false;
2102     }
2103     has_local_image_manager_ = true;
2104     return true;
2105 }
2106 
UnmapAndDeleteCowPartition(MetadataBuilder * current_metadata)2107 static void UnmapAndDeleteCowPartition(MetadataBuilder* current_metadata) {
2108     auto& dm = DeviceMapper::Instance();
2109     std::vector<std::string> to_delete;
2110     for (auto* existing_cow_partition : current_metadata->ListPartitionsInGroup(kCowGroupName)) {
2111         if (!dm.DeleteDeviceIfExists(existing_cow_partition->name())) {
2112             LOG(WARNING) << existing_cow_partition->name()
2113                          << " cannot be unmapped and its space cannot be reclaimed";
2114             continue;
2115         }
2116         to_delete.push_back(existing_cow_partition->name());
2117     }
2118     for (const auto& name : to_delete) {
2119         current_metadata->RemovePartition(name);
2120     }
2121 }
2122 
AddRequiredSpace(Return orig,const std::map<std::string,SnapshotStatus> & all_snapshot_status)2123 static Return AddRequiredSpace(Return orig,
2124                                const std::map<std::string, SnapshotStatus>& all_snapshot_status) {
2125     if (orig.error_code() != Return::ErrorCode::NO_SPACE) {
2126         return orig;
2127     }
2128     uint64_t sum = 0;
2129     for (auto&& [name, status] : all_snapshot_status) {
2130         sum += status.cow_file_size();
2131     }
2132     return Return::NoSpace(sum);
2133 }
2134 
CreateUpdateSnapshots(const DeltaArchiveManifest & manifest)2135 Return SnapshotManager::CreateUpdateSnapshots(const DeltaArchiveManifest& manifest) {
2136     auto lock = LockExclusive();
2137     if (!lock) return Return::Error();
2138 
2139     // TODO(b/134949511): remove this check. Right now, with overlayfs mounted, the scratch
2140     // partition takes up a big chunk of space in super, causing COW images to be created on
2141     // retrofit Virtual A/B devices.
2142     if (device_->IsOverlayfsSetup()) {
2143         LOG(ERROR) << "Cannot create update snapshots with overlayfs setup. Run `adb enable-verity`"
2144                    << ", reboot, then try again.";
2145         return Return::Error();
2146     }
2147 
2148     const auto& opener = device_->GetPartitionOpener();
2149     auto current_suffix = device_->GetSlotSuffix();
2150     uint32_t current_slot = SlotNumberForSlotSuffix(current_suffix);
2151     auto target_suffix = device_->GetOtherSlotSuffix();
2152     uint32_t target_slot = SlotNumberForSlotSuffix(target_suffix);
2153     auto current_super = device_->GetSuperDevice(current_slot);
2154 
2155     auto current_metadata = MetadataBuilder::New(opener, current_super, current_slot);
2156     if (current_metadata == nullptr) {
2157         LOG(ERROR) << "Cannot create metadata builder.";
2158         return Return::Error();
2159     }
2160 
2161     auto target_metadata =
2162             MetadataBuilder::NewForUpdate(opener, current_super, current_slot, target_slot);
2163     if (target_metadata == nullptr) {
2164         LOG(ERROR) << "Cannot create target metadata builder.";
2165         return Return::Error();
2166     }
2167 
2168     // Delete partitions with target suffix in |current_metadata|. Otherwise,
2169     // partition_cow_creator recognizes these left-over partitions as used space.
2170     for (const auto& group_name : current_metadata->ListGroups()) {
2171         if (android::base::EndsWith(group_name, target_suffix)) {
2172             current_metadata->RemoveGroupAndPartitions(group_name);
2173         }
2174     }
2175 
2176     SnapshotMetadataUpdater metadata_updater(target_metadata.get(), target_slot, manifest);
2177     if (!metadata_updater.Update()) {
2178         LOG(ERROR) << "Cannot calculate new metadata.";
2179         return Return::Error();
2180     }
2181 
2182     // Delete previous COW partitions in current_metadata so that PartitionCowCreator marks those as
2183     // free regions.
2184     UnmapAndDeleteCowPartition(current_metadata.get());
2185 
2186     // Check that all these metadata is not retrofit dynamic partitions. Snapshots on
2187     // devices with retrofit dynamic partitions does not make sense.
2188     // This ensures that current_metadata->GetFreeRegions() uses the same device
2189     // indices as target_metadata (i.e. 0 -> "super").
2190     // This is also assumed in MapCowDevices() call below.
2191     CHECK(current_metadata->GetBlockDevicePartitionName(0) == LP_METADATA_DEFAULT_PARTITION_NAME &&
2192           target_metadata->GetBlockDevicePartitionName(0) == LP_METADATA_DEFAULT_PARTITION_NAME);
2193 
2194     std::map<std::string, SnapshotStatus> all_snapshot_status;
2195 
2196     // In case of error, automatically delete devices that are created along the way.
2197     // Note that "lock" is destroyed after "created_devices", so it is safe to use |lock| for
2198     // these devices.
2199     AutoDeviceList created_devices;
2200 
2201     PartitionCowCreator cow_creator{
2202             .target_metadata = target_metadata.get(),
2203             .target_suffix = target_suffix,
2204             .target_partition = nullptr,
2205             .current_metadata = current_metadata.get(),
2206             .current_suffix = current_suffix,
2207             .operations = nullptr,
2208             .extra_extents = {},
2209     };
2210 
2211     auto ret = CreateUpdateSnapshotsInternal(lock.get(), manifest, &cow_creator, &created_devices,
2212                                              &all_snapshot_status);
2213     if (!ret.is_ok()) return ret;
2214 
2215     auto exported_target_metadata = target_metadata->Export();
2216     if (exported_target_metadata == nullptr) {
2217         LOG(ERROR) << "Cannot export target metadata";
2218         return Return::Error();
2219     }
2220 
2221     ret = InitializeUpdateSnapshots(lock.get(), target_metadata.get(),
2222                                     exported_target_metadata.get(), target_suffix,
2223                                     all_snapshot_status);
2224     if (!ret.is_ok()) return ret;
2225 
2226     if (!UpdatePartitionTable(opener, device_->GetSuperDevice(target_slot),
2227                               *exported_target_metadata, target_slot)) {
2228         LOG(ERROR) << "Cannot write target metadata";
2229         return Return::Error();
2230     }
2231 
2232     created_devices.Release();
2233     LOG(INFO) << "Successfully created all snapshots for target slot " << target_suffix;
2234 
2235     return Return::Ok();
2236 }
2237 
CreateUpdateSnapshotsInternal(LockedFile * lock,const DeltaArchiveManifest & manifest,PartitionCowCreator * cow_creator,AutoDeviceList * created_devices,std::map<std::string,SnapshotStatus> * all_snapshot_status)2238 Return SnapshotManager::CreateUpdateSnapshotsInternal(
2239         LockedFile* lock, const DeltaArchiveManifest& manifest, PartitionCowCreator* cow_creator,
2240         AutoDeviceList* created_devices,
2241         std::map<std::string, SnapshotStatus>* all_snapshot_status) {
2242     CHECK(lock);
2243 
2244     auto* target_metadata = cow_creator->target_metadata;
2245     const auto& target_suffix = cow_creator->target_suffix;
2246 
2247     if (!target_metadata->AddGroup(kCowGroupName, 0)) {
2248         LOG(ERROR) << "Cannot add group " << kCowGroupName;
2249         return Return::Error();
2250     }
2251 
2252     std::map<std::string, const RepeatedPtrField<InstallOperation>*> install_operation_map;
2253     std::map<std::string, std::vector<Extent>> extra_extents_map;
2254     for (const auto& partition_update : manifest.partitions()) {
2255         auto suffixed_name = partition_update.partition_name() + target_suffix;
2256         auto&& [it, inserted] =
2257                 install_operation_map.emplace(suffixed_name, &partition_update.operations());
2258         if (!inserted) {
2259             LOG(ERROR) << "Duplicated partition " << partition_update.partition_name()
2260                        << " in update manifest.";
2261             return Return::Error();
2262         }
2263 
2264         auto& extra_extents = extra_extents_map[suffixed_name];
2265         if (partition_update.has_hash_tree_extent()) {
2266             extra_extents.push_back(partition_update.hash_tree_extent());
2267         }
2268         if (partition_update.has_fec_extent()) {
2269             extra_extents.push_back(partition_update.fec_extent());
2270         }
2271     }
2272 
2273     for (auto* target_partition : ListPartitionsWithSuffix(target_metadata, target_suffix)) {
2274         cow_creator->target_partition = target_partition;
2275         cow_creator->operations = nullptr;
2276         auto operations_it = install_operation_map.find(target_partition->name());
2277         if (operations_it != install_operation_map.end()) {
2278             cow_creator->operations = operations_it->second;
2279         } else {
2280             LOG(INFO) << target_partition->name()
2281                       << " isn't included in the payload, skipping the cow creation.";
2282             continue;
2283         }
2284 
2285         cow_creator->extra_extents.clear();
2286         auto extra_extents_it = extra_extents_map.find(target_partition->name());
2287         if (extra_extents_it != extra_extents_map.end()) {
2288             cow_creator->extra_extents = std::move(extra_extents_it->second);
2289         }
2290 
2291         // Compute the device sizes for the partition.
2292         auto cow_creator_ret = cow_creator->Run();
2293         if (!cow_creator_ret.has_value()) {
2294             return Return::Error();
2295         }
2296 
2297         LOG(INFO) << "For partition " << target_partition->name()
2298                   << ", device size = " << cow_creator_ret->snapshot_status.device_size()
2299                   << ", snapshot size = " << cow_creator_ret->snapshot_status.snapshot_size()
2300                   << ", cow partition size = "
2301                   << cow_creator_ret->snapshot_status.cow_partition_size()
2302                   << ", cow file size = " << cow_creator_ret->snapshot_status.cow_file_size();
2303 
2304         // Delete any existing snapshot before re-creating one.
2305         if (!DeleteSnapshot(lock, target_partition->name())) {
2306             LOG(ERROR) << "Cannot delete existing snapshot before creating a new one for partition "
2307                        << target_partition->name();
2308             return Return::Error();
2309         }
2310 
2311         // It is possible that the whole partition uses free space in super, and snapshot / COW
2312         // would not be needed. In this case, skip the partition.
2313         bool needs_snapshot = cow_creator_ret->snapshot_status.snapshot_size() > 0;
2314         bool needs_cow = (cow_creator_ret->snapshot_status.cow_partition_size() +
2315                           cow_creator_ret->snapshot_status.cow_file_size()) > 0;
2316         CHECK(needs_snapshot == needs_cow);
2317 
2318         if (!needs_snapshot) {
2319             LOG(INFO) << "Skip creating snapshot for partition " << target_partition->name()
2320                       << "because nothing needs to be snapshotted.";
2321             continue;
2322         }
2323 
2324         // Store these device sizes to snapshot status file.
2325         if (!CreateSnapshot(lock, &cow_creator_ret->snapshot_status)) {
2326             return Return::Error();
2327         }
2328         created_devices->EmplaceBack<AutoDeleteSnapshot>(this, lock, target_partition->name());
2329 
2330         // Create the COW partition. That is, use any remaining free space in super partition before
2331         // creating the COW images.
2332         if (cow_creator_ret->snapshot_status.cow_partition_size() > 0) {
2333             CHECK(cow_creator_ret->snapshot_status.cow_partition_size() % kSectorSize == 0)
2334                     << "cow_partition_size == "
2335                     << cow_creator_ret->snapshot_status.cow_partition_size()
2336                     << " is not a multiple of sector size " << kSectorSize;
2337             auto cow_partition = target_metadata->AddPartition(GetCowName(target_partition->name()),
2338                                                                kCowGroupName, 0 /* flags */);
2339             if (cow_partition == nullptr) {
2340                 return Return::Error();
2341             }
2342 
2343             if (!target_metadata->ResizePartition(
2344                         cow_partition, cow_creator_ret->snapshot_status.cow_partition_size(),
2345                         cow_creator_ret->cow_partition_usable_regions)) {
2346                 LOG(ERROR) << "Cannot create COW partition on metadata with size "
2347                            << cow_creator_ret->snapshot_status.cow_partition_size();
2348                 return Return::Error();
2349             }
2350             // Only the in-memory target_metadata is modified; nothing to clean up if there is an
2351             // error in the future.
2352         }
2353 
2354         all_snapshot_status->emplace(target_partition->name(),
2355                                      std::move(cow_creator_ret->snapshot_status));
2356 
2357         LOG(INFO) << "Successfully created snapshot partition for " << target_partition->name();
2358     }
2359 
2360     LOG(INFO) << "Allocating CoW images.";
2361 
2362     for (auto&& [name, snapshot_status] : *all_snapshot_status) {
2363         // Create the backing COW image if necessary.
2364         if (snapshot_status.cow_file_size() > 0) {
2365             auto ret = CreateCowImage(lock, name);
2366             if (!ret.is_ok()) return AddRequiredSpace(ret, *all_snapshot_status);
2367         }
2368 
2369         LOG(INFO) << "Successfully created snapshot for " << name;
2370     }
2371 
2372     return Return::Ok();
2373 }
2374 
InitializeUpdateSnapshots(LockedFile * lock,MetadataBuilder * target_metadata,const LpMetadata * exported_target_metadata,const std::string & target_suffix,const std::map<std::string,SnapshotStatus> & all_snapshot_status)2375 Return SnapshotManager::InitializeUpdateSnapshots(
2376         LockedFile* lock, MetadataBuilder* target_metadata,
2377         const LpMetadata* exported_target_metadata, const std::string& target_suffix,
2378         const std::map<std::string, SnapshotStatus>& all_snapshot_status) {
2379     CHECK(lock);
2380 
2381     CreateLogicalPartitionParams cow_params{
2382             .block_device = LP_METADATA_DEFAULT_PARTITION_NAME,
2383             .metadata = exported_target_metadata,
2384             .timeout_ms = std::chrono::milliseconds::max(),
2385             .partition_opener = &device_->GetPartitionOpener(),
2386     };
2387     for (auto* target_partition : ListPartitionsWithSuffix(target_metadata, target_suffix)) {
2388         AutoDeviceList created_devices_for_cow;
2389 
2390         if (!UnmapPartitionWithSnapshot(lock, target_partition->name())) {
2391             LOG(ERROR) << "Cannot unmap existing COW devices before re-mapping them for zero-fill: "
2392                        << target_partition->name();
2393             return Return::Error();
2394         }
2395 
2396         auto it = all_snapshot_status.find(target_partition->name());
2397         if (it == all_snapshot_status.end()) continue;
2398         cow_params.partition_name = target_partition->name();
2399         std::string cow_name;
2400         if (!MapCowDevices(lock, cow_params, it->second, &created_devices_for_cow, &cow_name)) {
2401             return Return::Error();
2402         }
2403 
2404         std::string cow_path;
2405         if (!images_->GetMappedImageDevice(cow_name, &cow_path)) {
2406             LOG(ERROR) << "Cannot determine path for " << cow_name;
2407             return Return::Error();
2408         }
2409 
2410         auto ret = InitializeCow(cow_path);
2411         if (!ret.is_ok()) {
2412             LOG(ERROR) << "Can't zero-fill COW device for " << target_partition->name() << ": "
2413                        << cow_path;
2414             return AddRequiredSpace(ret, all_snapshot_status);
2415         }
2416         // Let destructor of created_devices_for_cow to unmap the COW devices.
2417     };
2418     return Return::Ok();
2419 }
2420 
MapUpdateSnapshot(const CreateLogicalPartitionParams & params,std::string * snapshot_path)2421 bool SnapshotManager::MapUpdateSnapshot(const CreateLogicalPartitionParams& params,
2422                                         std::string* snapshot_path) {
2423     auto lock = LockShared();
2424     if (!lock) return false;
2425     if (!UnmapPartitionWithSnapshot(lock.get(), params.GetPartitionName())) {
2426         LOG(ERROR) << "Cannot unmap existing snapshot before re-mapping it: "
2427                    << params.GetPartitionName();
2428         return false;
2429     }
2430     return MapPartitionWithSnapshot(lock.get(), params, snapshot_path);
2431 }
2432 
UnmapUpdateSnapshot(const std::string & target_partition_name)2433 bool SnapshotManager::UnmapUpdateSnapshot(const std::string& target_partition_name) {
2434     auto lock = LockShared();
2435     if (!lock) return false;
2436     return UnmapPartitionWithSnapshot(lock.get(), target_partition_name);
2437 }
2438 
UnmapAllPartitions()2439 bool SnapshotManager::UnmapAllPartitions() {
2440     auto lock = LockExclusive();
2441     if (!lock) return false;
2442 
2443     const auto& opener = device_->GetPartitionOpener();
2444     uint32_t slot = SlotNumberForSlotSuffix(device_->GetSlotSuffix());
2445     auto super_device = device_->GetSuperDevice(slot);
2446     auto metadata = android::fs_mgr::ReadMetadata(opener, super_device, slot);
2447     if (!metadata) {
2448         LOG(ERROR) << "Could not read dynamic partition metadata for device: " << super_device;
2449         return false;
2450     }
2451 
2452     bool ok = true;
2453     for (const auto& partition : metadata->partitions) {
2454         auto partition_name = GetPartitionName(partition);
2455         ok &= UnmapPartitionWithSnapshot(lock.get(), partition_name);
2456     }
2457     return ok;
2458 }
2459 
operator <<(std::ostream & os,SnapshotManager::Slot slot)2460 std::ostream& operator<<(std::ostream& os, SnapshotManager::Slot slot) {
2461     switch (slot) {
2462         case SnapshotManager::Slot::Unknown:
2463             return os << "unknown";
2464         case SnapshotManager::Slot::Source:
2465             return os << "source";
2466         case SnapshotManager::Slot::Target:
2467             return os << "target";
2468     }
2469 }
2470 
Dump(std::ostream & os)2471 bool SnapshotManager::Dump(std::ostream& os) {
2472     // Don't actually lock. Dump() is for debugging purposes only, so it is okay
2473     // if it is racy.
2474     auto file = OpenLock(0 /* lock flag */);
2475     if (!file) return false;
2476 
2477     std::stringstream ss;
2478 
2479     ss << "Update state: " << ReadUpdateState(file.get()) << std::endl;
2480 
2481     ss << "Current slot: " << device_->GetSlotSuffix() << std::endl;
2482     ss << "Boot indicator: booting from " << GetCurrentSlot() << " slot" << std::endl;
2483     ss << "Rollback indicator: "
2484        << (access(GetRollbackIndicatorPath().c_str(), F_OK) == 0 ? "exists" : strerror(errno))
2485        << std::endl;
2486     ss << "Forward merge indicator: "
2487        << (access(GetForwardMergeIndicatorPath().c_str(), F_OK) == 0 ? "exists" : strerror(errno))
2488        << std::endl;
2489 
2490     bool ok = true;
2491     std::vector<std::string> snapshots;
2492     if (!ListSnapshots(file.get(), &snapshots)) {
2493         LOG(ERROR) << "Could not list snapshots";
2494         snapshots.clear();
2495         ok = false;
2496     }
2497     for (const auto& name : snapshots) {
2498         ss << "Snapshot: " << name << std::endl;
2499         SnapshotStatus status;
2500         if (!ReadSnapshotStatus(file.get(), name, &status)) {
2501             ok = false;
2502             continue;
2503         }
2504         ss << "    state: " << SnapshotState_Name(status.state()) << std::endl;
2505         ss << "    device size (bytes): " << status.device_size() << std::endl;
2506         ss << "    snapshot size (bytes): " << status.snapshot_size() << std::endl;
2507         ss << "    cow partition size (bytes): " << status.cow_partition_size() << std::endl;
2508         ss << "    cow file size (bytes): " << status.cow_file_size() << std::endl;
2509         ss << "    allocated sectors: " << status.sectors_allocated() << std::endl;
2510         ss << "    metadata sectors: " << status.metadata_sectors() << std::endl;
2511     }
2512     os << ss.rdbuf();
2513     return ok;
2514 }
2515 
EnsureMetadataMounted()2516 std::unique_ptr<AutoDevice> SnapshotManager::EnsureMetadataMounted() {
2517     if (!device_->IsRecovery()) {
2518         // No need to mount anything in recovery.
2519         LOG(INFO) << "EnsureMetadataMounted does nothing in Android mode.";
2520         return std::unique_ptr<AutoUnmountDevice>(new AutoUnmountDevice());
2521     }
2522     auto ret = AutoUnmountDevice::New(device_->GetMetadataDir());
2523     if (ret == nullptr) return nullptr;
2524 
2525     // In rescue mode, it is possible to erase and format metadata, but /metadata/ota is not
2526     // created to execute snapshot updates. Hence, subsequent calls is likely to fail because
2527     // Lock*() fails. By failing early and returning nullptr here, update_engine_sideload can
2528     // treat this case as if /metadata is not mounted.
2529     if (!LockShared()) {
2530         LOG(WARNING) << "/metadata is mounted, but errors occur when acquiring a shared lock. "
2531                         "Subsequent calls to SnapshotManager will fail. Unmounting /metadata now.";
2532         return nullptr;
2533     }
2534     return ret;
2535 }
2536 
HandleImminentDataWipe(const std::function<void ()> & callback)2537 bool SnapshotManager::HandleImminentDataWipe(const std::function<void()>& callback) {
2538     if (!device_->IsRecovery()) {
2539         LOG(ERROR) << "Data wipes are only allowed in recovery.";
2540         return false;
2541     }
2542 
2543     auto mount = EnsureMetadataMounted();
2544     if (!mount || !mount->HasDevice()) {
2545         // We allow the wipe to continue, because if we can't mount /metadata,
2546         // it is unlikely the device would have booted anyway. If there is no
2547         // metadata partition, then the device predates Virtual A/B.
2548         return true;
2549     }
2550 
2551     // Check this early, so we don't accidentally start trying to populate
2552     // the state file in recovery. Note we don't call GetUpdateState since
2553     // we want errors in acquiring the lock to be propagated, instead of
2554     // returning UpdateState::None.
2555     auto state_file = GetStateFilePath();
2556     if (access(state_file.c_str(), F_OK) != 0 && errno == ENOENT) {
2557         return true;
2558     }
2559 
2560     auto slot_number = SlotNumberForSlotSuffix(device_->GetSlotSuffix());
2561     auto super_path = device_->GetSuperDevice(slot_number);
2562     if (!CreateLogicalAndSnapshotPartitions(super_path)) {
2563         LOG(ERROR) << "Unable to map partitions to complete merge.";
2564         return false;
2565     }
2566 
2567     auto process_callback = [&]() -> bool {
2568         if (callback) {
2569             callback();
2570         }
2571         return true;
2572     };
2573 
2574     in_factory_data_reset_ = true;
2575     bool ok = ProcessUpdateStateOnDataWipe(true /* allow_forward_merge */, process_callback);
2576     in_factory_data_reset_ = false;
2577 
2578     if (!ok) {
2579         return false;
2580     }
2581 
2582     // Nothing should be depending on partitions now, so unmap them all.
2583     if (!UnmapAllPartitions()) {
2584         LOG(ERROR) << "Unable to unmap all partitions; fastboot may fail to flash.";
2585     }
2586     return true;
2587 }
2588 
FinishMergeInRecovery()2589 bool SnapshotManager::FinishMergeInRecovery() {
2590     if (!device_->IsRecovery()) {
2591         LOG(ERROR) << "Data wipes are only allowed in recovery.";
2592         return false;
2593     }
2594 
2595     auto mount = EnsureMetadataMounted();
2596     if (!mount || !mount->HasDevice()) {
2597         return false;
2598     }
2599 
2600     auto slot_number = SlotNumberForSlotSuffix(device_->GetSlotSuffix());
2601     auto super_path = device_->GetSuperDevice(slot_number);
2602     if (!CreateLogicalAndSnapshotPartitions(super_path)) {
2603         LOG(ERROR) << "Unable to map partitions to complete merge.";
2604         return false;
2605     }
2606 
2607     UpdateState state = ProcessUpdateState();
2608     if (state != UpdateState::MergeCompleted) {
2609         LOG(ERROR) << "Merge returned unexpected status: " << state;
2610         return false;
2611     }
2612 
2613     // Nothing should be depending on partitions now, so unmap them all.
2614     if (!UnmapAllPartitions()) {
2615         LOG(ERROR) << "Unable to unmap all partitions; fastboot may fail to flash.";
2616     }
2617     return true;
2618 }
2619 
ProcessUpdateStateOnDataWipe(bool allow_forward_merge,const std::function<bool ()> & callback)2620 bool SnapshotManager::ProcessUpdateStateOnDataWipe(bool allow_forward_merge,
2621                                                    const std::function<bool()>& callback) {
2622     auto slot_number = SlotNumberForSlotSuffix(device_->GetSlotSuffix());
2623     UpdateState state = ProcessUpdateState(callback);
2624     LOG(INFO) << "Update state in recovery: " << state;
2625     switch (state) {
2626         case UpdateState::MergeFailed:
2627             LOG(ERROR) << "Unrecoverable merge failure detected.";
2628             return false;
2629         case UpdateState::Unverified: {
2630             // If an OTA was just applied but has not yet started merging:
2631             //
2632             // - if forward merge is allowed, initiate merge and call
2633             // ProcessUpdateState again.
2634             //
2635             // - if forward merge is not allowed, we
2636             // have no choice but to revert slots, because the current slot will
2637             // immediately become unbootable. Rather than wait for the device
2638             // to reboot N times until a rollback, we proactively disable the
2639             // new slot instead.
2640             //
2641             // Since the rollback is inevitable, we don't treat a HAL failure
2642             // as an error here.
2643             auto slot = GetCurrentSlot();
2644             if (slot == Slot::Target) {
2645                 if (allow_forward_merge &&
2646                     access(GetForwardMergeIndicatorPath().c_str(), F_OK) == 0) {
2647                     LOG(INFO) << "Forward merge allowed, initiating merge now.";
2648                     return InitiateMerge() &&
2649                            ProcessUpdateStateOnDataWipe(false /* allow_forward_merge */, callback);
2650                 }
2651 
2652                 LOG(ERROR) << "Reverting to old slot since update will be deleted.";
2653                 device_->SetSlotAsUnbootable(slot_number);
2654             } else {
2655                 LOG(INFO) << "Booting from " << slot << " slot, no action is taken.";
2656             }
2657             break;
2658         }
2659         case UpdateState::MergeNeedsReboot:
2660             // We shouldn't get here, because nothing is depending on
2661             // logical partitions.
2662             LOG(ERROR) << "Unexpected merge-needs-reboot state in recovery.";
2663             break;
2664         default:
2665             break;
2666     }
2667     return true;
2668 }
2669 
EnsureNoOverflowSnapshot(LockedFile * lock)2670 bool SnapshotManager::EnsureNoOverflowSnapshot(LockedFile* lock) {
2671     CHECK(lock);
2672 
2673     std::vector<std::string> snapshots;
2674     if (!ListSnapshots(lock, &snapshots)) {
2675         LOG(ERROR) << "Could not list snapshots.";
2676         return false;
2677     }
2678 
2679     auto& dm = DeviceMapper::Instance();
2680     for (const auto& snapshot : snapshots) {
2681         std::vector<DeviceMapper::TargetInfo> targets;
2682         if (!dm.GetTableStatus(snapshot, &targets)) {
2683             LOG(ERROR) << "Could not read snapshot device table: " << snapshot;
2684             return false;
2685         }
2686         if (targets.size() != 1) {
2687             LOG(ERROR) << "Unexpected device-mapper table for snapshot: " << snapshot
2688                        << ", size = " << targets.size();
2689             return false;
2690         }
2691         if (targets[0].IsOverflowSnapshot()) {
2692             LOG(ERROR) << "Detected overflow in snapshot " << snapshot
2693                        << ", CoW device size computation is wrong!";
2694             return false;
2695         }
2696     }
2697 
2698     return true;
2699 }
2700 
RecoveryCreateSnapshotDevices()2701 CreateResult SnapshotManager::RecoveryCreateSnapshotDevices() {
2702     if (!device_->IsRecovery()) {
2703         LOG(ERROR) << __func__ << " is only allowed in recovery.";
2704         return CreateResult::NOT_CREATED;
2705     }
2706 
2707     auto mount = EnsureMetadataMounted();
2708     if (!mount || !mount->HasDevice()) {
2709         LOG(ERROR) << "Couldn't mount Metadata.";
2710         return CreateResult::NOT_CREATED;
2711     }
2712     return RecoveryCreateSnapshotDevices(mount);
2713 }
2714 
RecoveryCreateSnapshotDevices(const std::unique_ptr<AutoDevice> & metadata_device)2715 CreateResult SnapshotManager::RecoveryCreateSnapshotDevices(
2716         const std::unique_ptr<AutoDevice>& metadata_device) {
2717     if (!device_->IsRecovery()) {
2718         LOG(ERROR) << __func__ << " is only allowed in recovery.";
2719         return CreateResult::NOT_CREATED;
2720     }
2721 
2722     if (metadata_device == nullptr || !metadata_device->HasDevice()) {
2723         LOG(ERROR) << "Metadata not mounted.";
2724         return CreateResult::NOT_CREATED;
2725     }
2726 
2727     auto state_file = GetStateFilePath();
2728     if (access(state_file.c_str(), F_OK) != 0 && errno == ENOENT) {
2729         LOG(ERROR) << "Couldn't access state file.";
2730         return CreateResult::NOT_CREATED;
2731     }
2732 
2733     if (!NeedSnapshotsInFirstStageMount()) {
2734         return CreateResult::NOT_CREATED;
2735     }
2736 
2737     auto slot_suffix = device_->GetOtherSlotSuffix();
2738     auto slot_number = SlotNumberForSlotSuffix(slot_suffix);
2739     auto super_path = device_->GetSuperDevice(slot_number);
2740     if (!CreateLogicalAndSnapshotPartitions(super_path)) {
2741         LOG(ERROR) << "Unable to map partitions.";
2742         return CreateResult::ERROR;
2743     }
2744     return CreateResult::CREATED;
2745 }
2746 
UpdateForwardMergeIndicator(bool wipe)2747 bool SnapshotManager::UpdateForwardMergeIndicator(bool wipe) {
2748     auto path = GetForwardMergeIndicatorPath();
2749 
2750     if (!wipe) {
2751         LOG(INFO) << "Wipe is not scheduled. Deleting forward merge indicator.";
2752         return RemoveFileIfExists(path);
2753     }
2754 
2755     // TODO(b/152094219): Don't forward merge if no CoW file is allocated.
2756 
2757     LOG(INFO) << "Wipe will be scheduled. Allowing forward merge of snapshots.";
2758     if (!android::base::WriteStringToFile("1", path)) {
2759         PLOG(ERROR) << "Unable to write forward merge indicator: " << path;
2760         return false;
2761     }
2762 
2763     return true;
2764 }
2765 
GetSnapshotMergeStatsInstance()2766 ISnapshotMergeStats* SnapshotManager::GetSnapshotMergeStatsInstance() {
2767     return SnapshotMergeStats::GetInstance(*this);
2768 }
2769 
GetMappedImageDeviceStringOrPath(const std::string & device_name,std::string * device_string_or_mapped_path)2770 bool SnapshotManager::GetMappedImageDeviceStringOrPath(const std::string& device_name,
2771                                                        std::string* device_string_or_mapped_path) {
2772     auto& dm = DeviceMapper::Instance();
2773     // Try getting the device string if it is a device mapper device.
2774     if (dm.GetState(device_name) != DmDeviceState::INVALID) {
2775         return dm.GetDeviceString(device_name, device_string_or_mapped_path);
2776     }
2777 
2778     // Otherwise, get path from IImageManager.
2779     if (!images_->GetMappedImageDevice(device_name, device_string_or_mapped_path)) {
2780         return false;
2781     }
2782 
2783     LOG(WARNING) << "Calling GetMappedImageDevice with local image manager; device "
2784                  << (device_string_or_mapped_path ? *device_string_or_mapped_path : "(nullptr)")
2785                  << "may not be available in first stage init! ";
2786     return true;
2787 }
2788 
2789 }  // namespace snapshot
2790 }  // namespace android
2791