1 // Copyright (C) 2019 The Android Open Source Project
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14
15 #include <libsnapshot/snapshot.h>
16
17 #include <dirent.h>
18 #include <math.h>
19 #include <sys/file.h>
20 #include <sys/types.h>
21 #include <sys/unistd.h>
22
23 #include <optional>
24 #include <thread>
25 #include <unordered_set>
26
27 #include <android-base/file.h>
28 #include <android-base/logging.h>
29 #include <android-base/parseint.h>
30 #include <android-base/strings.h>
31 #include <android-base/unique_fd.h>
32 #include <ext4_utils/ext4_utils.h>
33 #include <fs_mgr.h>
34 #include <fs_mgr_dm_linear.h>
35 #include <fstab/fstab.h>
36 #include <libdm/dm.h>
37 #include <libfiemap/image_manager.h>
38 #include <liblp/liblp.h>
39
40 #include <android/snapshot/snapshot.pb.h>
41 #include <libsnapshot/snapshot_stats.h>
42 #include "device_info.h"
43 #include "partition_cow_creator.h"
44 #include "snapshot_metadata_updater.h"
45 #include "utility.h"
46
47 namespace android {
48 namespace snapshot {
49
50 using android::base::unique_fd;
51 using android::dm::DeviceMapper;
52 using android::dm::DmDeviceState;
53 using android::dm::DmTable;
54 using android::dm::DmTargetLinear;
55 using android::dm::DmTargetSnapshot;
56 using android::dm::kSectorSize;
57 using android::dm::SnapshotStorageMode;
58 using android::fiemap::FiemapStatus;
59 using android::fiemap::IImageManager;
60 using android::fs_mgr::CreateDmTable;
61 using android::fs_mgr::CreateLogicalPartition;
62 using android::fs_mgr::CreateLogicalPartitionParams;
63 using android::fs_mgr::GetPartitionGroupName;
64 using android::fs_mgr::GetPartitionName;
65 using android::fs_mgr::LpMetadata;
66 using android::fs_mgr::MetadataBuilder;
67 using android::fs_mgr::SlotNumberForSlotSuffix;
68 using android::hardware::boot::V1_1::MergeStatus;
69 using chromeos_update_engine::DeltaArchiveManifest;
70 using chromeos_update_engine::Extent;
71 using chromeos_update_engine::InstallOperation;
72 template <typename T>
73 using RepeatedPtrField = google::protobuf::RepeatedPtrField<T>;
74 using std::chrono::duration_cast;
75 using namespace std::chrono_literals;
76 using namespace std::string_literals;
77
78 static constexpr char kBootIndicatorPath[] = "/metadata/ota/snapshot-boot";
79 static constexpr char kRollbackIndicatorPath[] = "/metadata/ota/rollback-indicator";
80 static constexpr auto kUpdateStateCheckInterval = 2s;
81
82 // Note: IImageManager is an incomplete type in the header, so the default
83 // destructor doesn't work.
~SnapshotManager()84 SnapshotManager::~SnapshotManager() {}
85
New(IDeviceInfo * info)86 std::unique_ptr<SnapshotManager> SnapshotManager::New(IDeviceInfo* info) {
87 if (!info) {
88 info = new DeviceInfo();
89 }
90 return std::unique_ptr<SnapshotManager>(new SnapshotManager(info));
91 }
92
NewForFirstStageMount(IDeviceInfo * info)93 std::unique_ptr<SnapshotManager> SnapshotManager::NewForFirstStageMount(IDeviceInfo* info) {
94 auto sm = New(info);
95 if (!sm || !sm->ForceLocalImageManager()) {
96 return nullptr;
97 }
98 return sm;
99 }
100
SnapshotManager(IDeviceInfo * device)101 SnapshotManager::SnapshotManager(IDeviceInfo* device) : device_(device) {
102 gsid_dir_ = device_->GetGsidDir();
103 metadata_dir_ = device_->GetMetadataDir();
104 }
105
GetCowName(const std::string & snapshot_name)106 static std::string GetCowName(const std::string& snapshot_name) {
107 return snapshot_name + "-cow";
108 }
109
GetCowImageDeviceName(const std::string & snapshot_name)110 static std::string GetCowImageDeviceName(const std::string& snapshot_name) {
111 return snapshot_name + "-cow-img";
112 }
113
GetBaseDeviceName(const std::string & partition_name)114 static std::string GetBaseDeviceName(const std::string& partition_name) {
115 return partition_name + "-base";
116 }
117
GetSnapshotExtraDeviceName(const std::string & snapshot_name)118 static std::string GetSnapshotExtraDeviceName(const std::string& snapshot_name) {
119 return snapshot_name + "-inner";
120 }
121
BeginUpdate()122 bool SnapshotManager::BeginUpdate() {
123 bool needs_merge = false;
124 if (!TryCancelUpdate(&needs_merge)) {
125 return false;
126 }
127 if (needs_merge) {
128 LOG(INFO) << "Wait for merge (if any) before beginning a new update.";
129 auto state = ProcessUpdateState();
130 LOG(INFO) << "Merged with state = " << state;
131 }
132
133 auto file = LockExclusive();
134 if (!file) return false;
135
136 // Purge the ImageManager just in case there is a corrupt lp_metadata file
137 // lying around. (NB: no need to return false on an error, we can let the
138 // update try to progress.)
139 if (EnsureImageManager()) {
140 images_->RemoveAllImages();
141 }
142
143 auto state = ReadUpdateState(file.get());
144 if (state != UpdateState::None) {
145 LOG(ERROR) << "An update is already in progress, cannot begin a new update";
146 return false;
147 }
148 return WriteUpdateState(file.get(), UpdateState::Initiated);
149 }
150
CancelUpdate()151 bool SnapshotManager::CancelUpdate() {
152 bool needs_merge = false;
153 if (!TryCancelUpdate(&needs_merge)) {
154 return false;
155 }
156 if (needs_merge) {
157 LOG(ERROR) << "Cannot cancel update after it has completed or started merging";
158 }
159 return !needs_merge;
160 }
161
TryCancelUpdate(bool * needs_merge)162 bool SnapshotManager::TryCancelUpdate(bool* needs_merge) {
163 *needs_merge = false;
164
165 auto file = LockExclusive();
166 if (!file) return false;
167
168 UpdateState state = ReadUpdateState(file.get());
169 if (state == UpdateState::None) return true;
170
171 if (state == UpdateState::Initiated) {
172 LOG(INFO) << "Update has been initiated, now canceling";
173 return RemoveAllUpdateState(file.get());
174 }
175
176 if (state == UpdateState::Unverified) {
177 // We completed an update, but it can still be canceled if we haven't booted into it.
178 auto slot = GetCurrentSlot();
179 if (slot != Slot::Target) {
180 LOG(INFO) << "Canceling previously completed updates (if any)";
181 return RemoveAllUpdateState(file.get());
182 }
183 }
184 *needs_merge = true;
185 return true;
186 }
187
ReadUpdateSourceSlotSuffix()188 std::string SnapshotManager::ReadUpdateSourceSlotSuffix() {
189 auto boot_file = GetSnapshotBootIndicatorPath();
190 std::string contents;
191 if (!android::base::ReadFileToString(boot_file, &contents)) {
192 PLOG(WARNING) << "Cannot read " << boot_file;
193 return {};
194 }
195 return contents;
196 }
197
GetCurrentSlot()198 SnapshotManager::Slot SnapshotManager::GetCurrentSlot() {
199 auto contents = ReadUpdateSourceSlotSuffix();
200 if (contents.empty()) {
201 return Slot::Unknown;
202 }
203 if (device_->GetSlotSuffix() == contents) {
204 return Slot::Source;
205 }
206 return Slot::Target;
207 }
208
RemoveFileIfExists(const std::string & path)209 static bool RemoveFileIfExists(const std::string& path) {
210 std::string message;
211 if (!android::base::RemoveFileIfExists(path, &message)) {
212 LOG(ERROR) << "Remove failed: " << path << ": " << message;
213 return false;
214 }
215 return true;
216 }
217
RemoveAllUpdateState(LockedFile * lock,const std::function<bool ()> & prolog)218 bool SnapshotManager::RemoveAllUpdateState(LockedFile* lock, const std::function<bool()>& prolog) {
219 if (prolog && !prolog()) {
220 LOG(WARNING) << "Can't RemoveAllUpdateState: prolog failed.";
221 return false;
222 }
223
224 LOG(INFO) << "Removing all update state.";
225
226 if (!RemoveAllSnapshots(lock)) {
227 LOG(ERROR) << "Could not remove all snapshots";
228 return false;
229 }
230
231 // It's okay if these fail:
232 // - For SnapshotBoot and Rollback, first-stage init performs a deeper check after
233 // reading the indicator file, so it's not a problem if it still exists
234 // after the update completes.
235 // - For ForwardMerge, FinishedSnapshotWrites asserts that the existence of the indicator
236 // matches the incoming update.
237 std::vector<std::string> files = {
238 GetSnapshotBootIndicatorPath(),
239 GetRollbackIndicatorPath(),
240 GetForwardMergeIndicatorPath(),
241 };
242 for (const auto& file : files) {
243 RemoveFileIfExists(file);
244 }
245
246 // If this fails, we'll keep trying to remove the update state (as the
247 // device reboots or starts a new update) until it finally succeeds.
248 return WriteUpdateState(lock, UpdateState::None);
249 }
250
FinishedSnapshotWrites(bool wipe)251 bool SnapshotManager::FinishedSnapshotWrites(bool wipe) {
252 auto lock = LockExclusive();
253 if (!lock) return false;
254
255 auto update_state = ReadUpdateState(lock.get());
256 if (update_state == UpdateState::Unverified) {
257 LOG(INFO) << "FinishedSnapshotWrites already called before. Ignored.";
258 return true;
259 }
260
261 if (update_state != UpdateState::Initiated) {
262 LOG(ERROR) << "Can only transition to the Unverified state from the Initiated state.";
263 return false;
264 }
265
266 if (!EnsureNoOverflowSnapshot(lock.get())) {
267 LOG(ERROR) << "Cannot ensure there are no overflow snapshots.";
268 return false;
269 }
270
271 if (!UpdateForwardMergeIndicator(wipe)) {
272 return false;
273 }
274
275 // This file is written on boot to detect whether a rollback occurred. It
276 // MUST NOT exist before rebooting, otherwise, we're at risk of deleting
277 // snapshots too early.
278 if (!RemoveFileIfExists(GetRollbackIndicatorPath())) {
279 return false;
280 }
281
282 // This file acts as both a quick indicator for init (it can use access(2)
283 // to decide how to do first-stage mounts), and it stores the old slot, so
284 // we can tell whether or not we performed a rollback.
285 auto contents = device_->GetSlotSuffix();
286 auto boot_file = GetSnapshotBootIndicatorPath();
287 if (!WriteStringToFileAtomic(contents, boot_file)) {
288 PLOG(ERROR) << "write failed: " << boot_file;
289 return false;
290 }
291 return WriteUpdateState(lock.get(), UpdateState::Unverified);
292 }
293
CreateSnapshot(LockedFile * lock,SnapshotStatus * status)294 bool SnapshotManager::CreateSnapshot(LockedFile* lock, SnapshotStatus* status) {
295 CHECK(lock);
296 CHECK(lock->lock_mode() == LOCK_EX);
297 CHECK(status);
298
299 if (status->name().empty()) {
300 LOG(ERROR) << "SnapshotStatus has no name.";
301 return false;
302 }
303 // Check these sizes. Like liblp, we guarantee the partition size is
304 // respected, which means it has to be sector-aligned. (This guarantee is
305 // useful for locating avb footers correctly). The COW file size, however,
306 // can be arbitrarily larger than specified, so we can safely round it up.
307 if (status->device_size() % kSectorSize != 0) {
308 LOG(ERROR) << "Snapshot " << status->name()
309 << " device size is not a multiple of the sector size: "
310 << status->device_size();
311 return false;
312 }
313 if (status->snapshot_size() % kSectorSize != 0) {
314 LOG(ERROR) << "Snapshot " << status->name()
315 << " snapshot size is not a multiple of the sector size: "
316 << status->snapshot_size();
317 return false;
318 }
319 if (status->cow_partition_size() % kSectorSize != 0) {
320 LOG(ERROR) << "Snapshot " << status->name()
321 << " cow partition size is not a multiple of the sector size: "
322 << status->cow_partition_size();
323 return false;
324 }
325 if (status->cow_file_size() % kSectorSize != 0) {
326 LOG(ERROR) << "Snapshot " << status->name()
327 << " cow file size is not a multiple of the sector size: "
328 << status->cow_file_size();
329 return false;
330 }
331
332 status->set_state(SnapshotState::CREATED);
333 status->set_sectors_allocated(0);
334 status->set_metadata_sectors(0);
335
336 if (!WriteSnapshotStatus(lock, *status)) {
337 PLOG(ERROR) << "Could not write snapshot status: " << status->name();
338 return false;
339 }
340 return true;
341 }
342
CreateCowImage(LockedFile * lock,const std::string & name)343 Return SnapshotManager::CreateCowImage(LockedFile* lock, const std::string& name) {
344 CHECK(lock);
345 CHECK(lock->lock_mode() == LOCK_EX);
346 if (!EnsureImageManager()) return Return::Error();
347
348 SnapshotStatus status;
349 if (!ReadSnapshotStatus(lock, name, &status)) {
350 return Return::Error();
351 }
352
353 // The COW file size should have been rounded up to the nearest sector in CreateSnapshot.
354 if (status.cow_file_size() % kSectorSize != 0) {
355 LOG(ERROR) << "Snapshot " << name << " COW file size is not a multiple of the sector size: "
356 << status.cow_file_size();
357 return Return::Error();
358 }
359
360 std::string cow_image_name = GetCowImageDeviceName(name);
361 int cow_flags = IImageManager::CREATE_IMAGE_DEFAULT;
362 return Return(images_->CreateBackingImage(cow_image_name, status.cow_file_size(), cow_flags));
363 }
364
MapSnapshot(LockedFile * lock,const std::string & name,const std::string & base_device,const std::string & cow_device,const std::chrono::milliseconds & timeout_ms,std::string * dev_path)365 bool SnapshotManager::MapSnapshot(LockedFile* lock, const std::string& name,
366 const std::string& base_device, const std::string& cow_device,
367 const std::chrono::milliseconds& timeout_ms,
368 std::string* dev_path) {
369 CHECK(lock);
370
371 SnapshotStatus status;
372 if (!ReadSnapshotStatus(lock, name, &status)) {
373 return false;
374 }
375 if (status.state() == SnapshotState::NONE || status.state() == SnapshotState::MERGE_COMPLETED) {
376 LOG(ERROR) << "Should not create a snapshot device for " << name
377 << " after merging has completed.";
378 return false;
379 }
380
381 // Validate the block device size, as well as the requested snapshot size.
382 // Note that during first-stage init, we don't have the device paths.
383 if (android::base::StartsWith(base_device, "/")) {
384 unique_fd fd(open(base_device.c_str(), O_RDONLY | O_CLOEXEC));
385 if (fd < 0) {
386 PLOG(ERROR) << "open failed: " << base_device;
387 return false;
388 }
389 auto dev_size = get_block_device_size(fd);
390 if (!dev_size) {
391 PLOG(ERROR) << "Could not determine block device size: " << base_device;
392 return false;
393 }
394 if (status.device_size() != dev_size) {
395 LOG(ERROR) << "Block device size for " << base_device << " does not match"
396 << "(expected " << status.device_size() << ", got " << dev_size << ")";
397 return false;
398 }
399 }
400 if (status.device_size() % kSectorSize != 0) {
401 LOG(ERROR) << "invalid blockdev size for " << base_device << ": " << status.device_size();
402 return false;
403 }
404 if (status.snapshot_size() % kSectorSize != 0 ||
405 status.snapshot_size() > status.device_size()) {
406 LOG(ERROR) << "Invalid snapshot size for " << base_device << ": " << status.snapshot_size();
407 return false;
408 }
409 uint64_t snapshot_sectors = status.snapshot_size() / kSectorSize;
410 uint64_t linear_sectors = (status.device_size() - status.snapshot_size()) / kSectorSize;
411
412 auto& dm = DeviceMapper::Instance();
413
414 // Note that merging is a global state. We do track whether individual devices
415 // have completed merging, but the start of the merge process is considered
416 // atomic.
417 SnapshotStorageMode mode;
418 switch (ReadUpdateState(lock)) {
419 case UpdateState::MergeCompleted:
420 case UpdateState::MergeNeedsReboot:
421 LOG(ERROR) << "Should not create a snapshot device for " << name
422 << " after global merging has completed.";
423 return false;
424 case UpdateState::Merging:
425 case UpdateState::MergeFailed:
426 // Note: MergeFailed indicates that a merge is in progress, but
427 // is possibly stalled. We still have to honor the merge.
428 mode = SnapshotStorageMode::Merge;
429 break;
430 default:
431 mode = SnapshotStorageMode::Persistent;
432 break;
433 }
434
435 // The kernel (tested on 4.19) crashes horribly if a device has both a snapshot
436 // and a linear target in the same table. Instead, we stack them, and give the
437 // snapshot device a different name. It is not exposed to the caller in this
438 // case.
439 auto snap_name = (linear_sectors > 0) ? GetSnapshotExtraDeviceName(name) : name;
440
441 DmTable table;
442 table.Emplace<DmTargetSnapshot>(0, snapshot_sectors, base_device, cow_device, mode,
443 kSnapshotChunkSize);
444 if (!dm.CreateDevice(snap_name, table, dev_path, timeout_ms)) {
445 LOG(ERROR) << "Could not create snapshot device: " << snap_name;
446 return false;
447 }
448
449 if (linear_sectors) {
450 std::string snap_dev;
451 if (!dm.GetDeviceString(snap_name, &snap_dev)) {
452 LOG(ERROR) << "Cannot determine major/minor for: " << snap_name;
453 return false;
454 }
455
456 // Our stacking will looks like this:
457 // [linear, linear] ; to snapshot, and non-snapshot region of base device
458 // [snapshot-inner]
459 // [base device] [cow]
460 DmTable table;
461 table.Emplace<DmTargetLinear>(0, snapshot_sectors, snap_dev, 0);
462 table.Emplace<DmTargetLinear>(snapshot_sectors, linear_sectors, base_device,
463 snapshot_sectors);
464 if (!dm.CreateDevice(name, table, dev_path, timeout_ms)) {
465 LOG(ERROR) << "Could not create outer snapshot device: " << name;
466 dm.DeleteDevice(snap_name);
467 return false;
468 }
469 }
470
471 // :TODO: when merging is implemented, we need to add an argument to the
472 // status indicating how much progress is left to merge. (device-mapper
473 // does not retain the initial values, so we can't derive them.)
474 return true;
475 }
476
MapCowImage(const std::string & name,const std::chrono::milliseconds & timeout_ms)477 std::optional<std::string> SnapshotManager::MapCowImage(
478 const std::string& name, const std::chrono::milliseconds& timeout_ms) {
479 if (!EnsureImageManager()) return std::nullopt;
480 auto cow_image_name = GetCowImageDeviceName(name);
481
482 bool ok;
483 std::string cow_dev;
484 if (has_local_image_manager_) {
485 // If we forced a local image manager, it means we don't have binder,
486 // which means first-stage init. We must use device-mapper.
487 const auto& opener = device_->GetPartitionOpener();
488 ok = images_->MapImageWithDeviceMapper(opener, cow_image_name, &cow_dev);
489 } else {
490 ok = images_->MapImageDevice(cow_image_name, timeout_ms, &cow_dev);
491 }
492
493 if (ok) {
494 LOG(INFO) << "Mapped " << cow_image_name << " to " << cow_dev;
495 return cow_dev;
496 }
497 LOG(ERROR) << "Could not map image device: " << cow_image_name;
498 return std::nullopt;
499 }
500
UnmapSnapshot(LockedFile * lock,const std::string & name)501 bool SnapshotManager::UnmapSnapshot(LockedFile* lock, const std::string& name) {
502 CHECK(lock);
503
504 auto& dm = DeviceMapper::Instance();
505 if (!dm.DeleteDeviceIfExists(name)) {
506 LOG(ERROR) << "Could not delete snapshot device: " << name;
507 return false;
508 }
509
510 auto snapshot_extra_device = GetSnapshotExtraDeviceName(name);
511 if (!dm.DeleteDeviceIfExists(snapshot_extra_device)) {
512 LOG(ERROR) << "Could not delete snapshot inner device: " << snapshot_extra_device;
513 return false;
514 }
515
516 return true;
517 }
518
UnmapCowImage(const std::string & name)519 bool SnapshotManager::UnmapCowImage(const std::string& name) {
520 if (!EnsureImageManager()) return false;
521 return images_->UnmapImageIfExists(GetCowImageDeviceName(name));
522 }
523
DeleteSnapshot(LockedFile * lock,const std::string & name)524 bool SnapshotManager::DeleteSnapshot(LockedFile* lock, const std::string& name) {
525 CHECK(lock);
526 CHECK(lock->lock_mode() == LOCK_EX);
527 if (!EnsureImageManager()) return false;
528
529 if (!UnmapCowDevices(lock, name)) {
530 return false;
531 }
532
533 // We can't delete snapshots in recovery. The only way we'd try is it we're
534 // completing or canceling a merge in preparation for a data wipe, in which
535 // case, we don't care if the file sticks around.
536 if (device_->IsRecovery()) {
537 LOG(INFO) << "Skipping delete of snapshot " << name << " in recovery.";
538 return true;
539 }
540
541 auto cow_image_name = GetCowImageDeviceName(name);
542 if (images_->BackingImageExists(cow_image_name)) {
543 if (!images_->DeleteBackingImage(cow_image_name)) {
544 return false;
545 }
546 }
547
548 std::string error;
549 auto file_path = GetSnapshotStatusFilePath(name);
550 if (!android::base::RemoveFileIfExists(file_path, &error)) {
551 LOG(ERROR) << "Failed to remove status file " << file_path << ": " << error;
552 return false;
553 }
554 return true;
555 }
556
InitiateMerge(uint64_t * cow_file_size)557 bool SnapshotManager::InitiateMerge(uint64_t* cow_file_size) {
558 auto lock = LockExclusive();
559 if (!lock) return false;
560
561 UpdateState state = ReadUpdateState(lock.get());
562 if (state != UpdateState::Unverified) {
563 LOG(ERROR) << "Cannot begin a merge if an update has not been verified";
564 return false;
565 }
566
567 auto slot = GetCurrentSlot();
568 if (slot != Slot::Target) {
569 LOG(ERROR) << "Device cannot merge while not booting from new slot";
570 return false;
571 }
572
573 std::vector<std::string> snapshots;
574 if (!ListSnapshots(lock.get(), &snapshots)) {
575 LOG(ERROR) << "Could not list snapshots";
576 return false;
577 }
578
579 auto other_suffix = device_->GetOtherSlotSuffix();
580
581 auto& dm = DeviceMapper::Instance();
582 for (const auto& snapshot : snapshots) {
583 if (android::base::EndsWith(snapshot, other_suffix)) {
584 // Allow the merge to continue, but log this unexpected case.
585 LOG(ERROR) << "Unexpected snapshot found during merge: " << snapshot;
586 continue;
587 }
588
589 // The device has to be mapped, since everything should be merged at
590 // the same time. This is a fairly serious error. We could forcefully
591 // map everything here, but it should have been mapped during first-
592 // stage init.
593 if (dm.GetState(snapshot) == DmDeviceState::INVALID) {
594 LOG(ERROR) << "Cannot begin merge; device " << snapshot << " is not mapped.";
595 return false;
596 }
597 }
598
599 auto metadata = ReadCurrentMetadata();
600 for (auto it = snapshots.begin(); it != snapshots.end();) {
601 switch (GetMetadataPartitionState(*metadata, *it)) {
602 case MetadataPartitionState::Flashed:
603 LOG(WARNING) << "Detected re-flashing for partition " << *it
604 << ". Skip merging it.";
605 [[fallthrough]];
606 case MetadataPartitionState::None: {
607 LOG(WARNING) << "Deleting snapshot for partition " << *it;
608 if (!DeleteSnapshot(lock.get(), *it)) {
609 LOG(WARNING) << "Cannot delete snapshot for partition " << *it
610 << ". Skip merging it anyways.";
611 }
612 it = snapshots.erase(it);
613 } break;
614 case MetadataPartitionState::Updated: {
615 ++it;
616 } break;
617 }
618 }
619
620 uint64_t total_cow_file_size = 0;
621 DmTargetSnapshot::Status initial_target_values = {};
622 for (const auto& snapshot : snapshots) {
623 DmTargetSnapshot::Status current_status;
624 if (!QuerySnapshotStatus(snapshot, nullptr, ¤t_status)) {
625 return false;
626 }
627 initial_target_values.sectors_allocated += current_status.sectors_allocated;
628 initial_target_values.total_sectors += current_status.total_sectors;
629 initial_target_values.metadata_sectors += current_status.metadata_sectors;
630
631 SnapshotStatus snapshot_status;
632 if (!ReadSnapshotStatus(lock.get(), snapshot, &snapshot_status)) {
633 return false;
634 }
635 total_cow_file_size += snapshot_status.cow_file_size();
636 }
637
638 if (cow_file_size) {
639 *cow_file_size = total_cow_file_size;
640 }
641
642 SnapshotUpdateStatus initial_status;
643 initial_status.set_state(UpdateState::Merging);
644 initial_status.set_sectors_allocated(initial_target_values.sectors_allocated);
645 initial_status.set_total_sectors(initial_target_values.total_sectors);
646 initial_status.set_metadata_sectors(initial_target_values.metadata_sectors);
647
648 // Point of no return - mark that we're starting a merge. From now on every
649 // snapshot must be a merge target.
650 if (!WriteSnapshotUpdateStatus(lock.get(), initial_status)) {
651 return false;
652 }
653
654 bool rewrote_all = true;
655 for (const auto& snapshot : snapshots) {
656 // If this fails, we have no choice but to continue. Everything must
657 // be merged. This is not an ideal state to be in, but it is safe,
658 // because we the next boot will try again.
659 if (!SwitchSnapshotToMerge(lock.get(), snapshot)) {
660 LOG(ERROR) << "Failed to switch snapshot to a merge target: " << snapshot;
661 rewrote_all = false;
662 }
663 }
664
665 // If we couldn't switch everything to a merge target, pre-emptively mark
666 // this merge as failed. It will get acknowledged when WaitForMerge() is
667 // called.
668 if (!rewrote_all) {
669 WriteUpdateState(lock.get(), UpdateState::MergeFailed);
670 }
671
672 // Return true no matter what, because a merge was initiated.
673 return true;
674 }
675
SwitchSnapshotToMerge(LockedFile * lock,const std::string & name)676 bool SnapshotManager::SwitchSnapshotToMerge(LockedFile* lock, const std::string& name) {
677 SnapshotStatus status;
678 if (!ReadSnapshotStatus(lock, name, &status)) {
679 return false;
680 }
681 if (status.state() != SnapshotState::CREATED) {
682 LOG(WARNING) << "Snapshot " << name
683 << " has unexpected state: " << SnapshotState_Name(status.state());
684 }
685
686 // After this, we return true because we technically did switch to a merge
687 // target. Everything else we do here is just informational.
688 auto dm_name = GetSnapshotDeviceName(name, status);
689 if (!RewriteSnapshotDeviceTable(dm_name)) {
690 return false;
691 }
692
693 status.set_state(SnapshotState::MERGING);
694
695 DmTargetSnapshot::Status dm_status;
696 if (!QuerySnapshotStatus(dm_name, nullptr, &dm_status)) {
697 LOG(ERROR) << "Could not query merge status for snapshot: " << dm_name;
698 }
699 status.set_sectors_allocated(dm_status.sectors_allocated);
700 status.set_metadata_sectors(dm_status.metadata_sectors);
701 if (!WriteSnapshotStatus(lock, status)) {
702 LOG(ERROR) << "Could not update status file for snapshot: " << name;
703 }
704 return true;
705 }
706
RewriteSnapshotDeviceTable(const std::string & dm_name)707 bool SnapshotManager::RewriteSnapshotDeviceTable(const std::string& dm_name) {
708 auto& dm = DeviceMapper::Instance();
709
710 std::vector<DeviceMapper::TargetInfo> old_targets;
711 if (!dm.GetTableInfo(dm_name, &old_targets)) {
712 LOG(ERROR) << "Could not read snapshot device table: " << dm_name;
713 return false;
714 }
715 if (old_targets.size() != 1 || DeviceMapper::GetTargetType(old_targets[0].spec) != "snapshot") {
716 LOG(ERROR) << "Unexpected device-mapper table for snapshot: " << dm_name;
717 return false;
718 }
719
720 std::string base_device, cow_device;
721 if (!DmTargetSnapshot::GetDevicesFromParams(old_targets[0].data, &base_device, &cow_device)) {
722 LOG(ERROR) << "Could not derive underlying devices for snapshot: " << dm_name;
723 return false;
724 }
725
726 DmTable table;
727 table.Emplace<DmTargetSnapshot>(0, old_targets[0].spec.length, base_device, cow_device,
728 SnapshotStorageMode::Merge, kSnapshotChunkSize);
729 if (!dm.LoadTableAndActivate(dm_name, table)) {
730 LOG(ERROR) << "Could not swap device-mapper tables on snapshot device " << dm_name;
731 return false;
732 }
733 LOG(INFO) << "Successfully switched snapshot device to a merge target: " << dm_name;
734 return true;
735 }
736
737 enum class TableQuery {
738 Table,
739 Status,
740 };
741
GetSingleTarget(const std::string & dm_name,TableQuery query,DeviceMapper::TargetInfo * target)742 static bool GetSingleTarget(const std::string& dm_name, TableQuery query,
743 DeviceMapper::TargetInfo* target) {
744 auto& dm = DeviceMapper::Instance();
745 if (dm.GetState(dm_name) == DmDeviceState::INVALID) {
746 return false;
747 }
748
749 std::vector<DeviceMapper::TargetInfo> targets;
750 bool result;
751 if (query == TableQuery::Status) {
752 result = dm.GetTableStatus(dm_name, &targets);
753 } else {
754 result = dm.GetTableInfo(dm_name, &targets);
755 }
756 if (!result) {
757 LOG(ERROR) << "Could not query device: " << dm_name;
758 return false;
759 }
760 if (targets.size() != 1) {
761 return false;
762 }
763
764 *target = std::move(targets[0]);
765 return true;
766 }
767
IsSnapshotDevice(const std::string & dm_name,TargetInfo * target)768 bool SnapshotManager::IsSnapshotDevice(const std::string& dm_name, TargetInfo* target) {
769 DeviceMapper::TargetInfo snap_target;
770 if (!GetSingleTarget(dm_name, TableQuery::Status, &snap_target)) {
771 return false;
772 }
773 auto type = DeviceMapper::GetTargetType(snap_target.spec);
774 if (type != "snapshot" && type != "snapshot-merge") {
775 return false;
776 }
777 if (target) {
778 *target = std::move(snap_target);
779 }
780 return true;
781 }
782
QuerySnapshotStatus(const std::string & dm_name,std::string * target_type,DmTargetSnapshot::Status * status)783 bool SnapshotManager::QuerySnapshotStatus(const std::string& dm_name, std::string* target_type,
784 DmTargetSnapshot::Status* status) {
785 DeviceMapper::TargetInfo target;
786 if (!IsSnapshotDevice(dm_name, &target)) {
787 LOG(ERROR) << "Device " << dm_name << " is not a snapshot or snapshot-merge device";
788 return false;
789 }
790 if (!DmTargetSnapshot::ParseStatusText(target.data, status)) {
791 LOG(ERROR) << "Could not parse snapshot status text: " << dm_name;
792 return false;
793 }
794 if (target_type) {
795 *target_type = DeviceMapper::GetTargetType(target.spec);
796 }
797 return true;
798 }
799
800 // Note that when a merge fails, we will *always* try again to complete the
801 // merge each time the device boots. There is no harm in doing so, and if
802 // the problem was transient, we might manage to get a new outcome.
ProcessUpdateState(const std::function<bool ()> & callback,const std::function<bool ()> & before_cancel)803 UpdateState SnapshotManager::ProcessUpdateState(const std::function<bool()>& callback,
804 const std::function<bool()>& before_cancel) {
805 while (true) {
806 UpdateState state = CheckMergeState(before_cancel);
807 if (state == UpdateState::MergeFailed) {
808 AcknowledgeMergeFailure();
809 }
810 if (state != UpdateState::Merging) {
811 // Either there is no merge, or the merge was finished, so no need
812 // to keep waiting.
813 return state;
814 }
815
816 if (callback && !callback()) {
817 return state;
818 }
819
820 // This wait is not super time sensitive, so we have a relatively
821 // low polling frequency.
822 std::this_thread::sleep_for(kUpdateStateCheckInterval);
823 }
824 }
825
CheckMergeState(const std::function<bool ()> & before_cancel)826 UpdateState SnapshotManager::CheckMergeState(const std::function<bool()>& before_cancel) {
827 auto lock = LockExclusive();
828 if (!lock) {
829 return UpdateState::MergeFailed;
830 }
831
832 UpdateState state = CheckMergeState(lock.get(), before_cancel);
833 if (state == UpdateState::MergeCompleted) {
834 // Do this inside the same lock. Failures get acknowledged without the
835 // lock, because flock() might have failed.
836 AcknowledgeMergeSuccess(lock.get());
837 } else if (state == UpdateState::Cancelled) {
838 if (!RemoveAllUpdateState(lock.get(), before_cancel)) {
839 return ReadSnapshotUpdateStatus(lock.get()).state();
840 }
841 }
842 return state;
843 }
844
CheckMergeState(LockedFile * lock,const std::function<bool ()> & before_cancel)845 UpdateState SnapshotManager::CheckMergeState(LockedFile* lock,
846 const std::function<bool()>& before_cancel) {
847 UpdateState state = ReadUpdateState(lock);
848 switch (state) {
849 case UpdateState::None:
850 case UpdateState::MergeCompleted:
851 // Harmless races are allowed between two callers of WaitForMerge,
852 // so in both of these cases we just propagate the state.
853 return state;
854
855 case UpdateState::Merging:
856 case UpdateState::MergeNeedsReboot:
857 case UpdateState::MergeFailed:
858 // We'll poll each snapshot below. Note that for the NeedsReboot
859 // case, we always poll once to give cleanup another opportunity to
860 // run.
861 break;
862
863 case UpdateState::Unverified:
864 // This is an edge case. Normally cancelled updates are detected
865 // via the merge poll below, but if we never started a merge, we
866 // need to also check here.
867 if (HandleCancelledUpdate(lock, before_cancel)) {
868 return UpdateState::Cancelled;
869 }
870 return state;
871
872 default:
873 return state;
874 }
875
876 std::vector<std::string> snapshots;
877 if (!ListSnapshots(lock, &snapshots)) {
878 return UpdateState::MergeFailed;
879 }
880
881 bool cancelled = false;
882 bool failed = false;
883 bool merging = false;
884 bool needs_reboot = false;
885 for (const auto& snapshot : snapshots) {
886 UpdateState snapshot_state = CheckTargetMergeState(lock, snapshot);
887 switch (snapshot_state) {
888 case UpdateState::MergeFailed:
889 failed = true;
890 break;
891 case UpdateState::Merging:
892 merging = true;
893 break;
894 case UpdateState::MergeNeedsReboot:
895 needs_reboot = true;
896 break;
897 case UpdateState::MergeCompleted:
898 break;
899 case UpdateState::Cancelled:
900 cancelled = true;
901 break;
902 default:
903 LOG(ERROR) << "Unknown merge status for \"" << snapshot << "\": "
904 << "\"" << snapshot_state << "\"";
905 failed = true;
906 break;
907 }
908 }
909
910 if (merging) {
911 // Note that we handle "Merging" before we handle anything else. We
912 // want to poll until *nothing* is merging if we can, so everything has
913 // a chance to get marked as completed or failed.
914 return UpdateState::Merging;
915 }
916 if (failed) {
917 // Note: since there are many drop-out cases for failure, we acknowledge
918 // it in WaitForMerge rather than here and elsewhere.
919 return UpdateState::MergeFailed;
920 }
921 if (needs_reboot) {
922 WriteUpdateState(lock, UpdateState::MergeNeedsReboot);
923 return UpdateState::MergeNeedsReboot;
924 }
925 if (cancelled) {
926 // This is an edge case, that we handle as correctly as we sensibly can.
927 // The underlying partition has changed behind update_engine, and we've
928 // removed the snapshot as a result. The exact state of the update is
929 // undefined now, but this can only happen on an unlocked device where
930 // partitions can be flashed without wiping userdata.
931 return UpdateState::Cancelled;
932 }
933 return UpdateState::MergeCompleted;
934 }
935
CheckTargetMergeState(LockedFile * lock,const std::string & name)936 UpdateState SnapshotManager::CheckTargetMergeState(LockedFile* lock, const std::string& name) {
937 SnapshotStatus snapshot_status;
938 if (!ReadSnapshotStatus(lock, name, &snapshot_status)) {
939 return UpdateState::MergeFailed;
940 }
941
942 std::string dm_name = GetSnapshotDeviceName(name, snapshot_status);
943
944 std::unique_ptr<LpMetadata> current_metadata;
945
946 if (!IsSnapshotDevice(dm_name)) {
947 if (!current_metadata) {
948 current_metadata = ReadCurrentMetadata();
949 }
950
951 if (!current_metadata ||
952 GetMetadataPartitionState(*current_metadata, name) != MetadataPartitionState::Updated) {
953 DeleteSnapshot(lock, name);
954 return UpdateState::Cancelled;
955 }
956
957 // During a check, we decided the merge was complete, but we were unable to
958 // collapse the device-mapper stack and perform COW cleanup. If we haven't
959 // rebooted after this check, the device will still be a snapshot-merge
960 // target. If the have rebooted, the device will now be a linear target,
961 // and we can try cleanup again.
962 if (snapshot_status.state() == SnapshotState::MERGE_COMPLETED) {
963 // NB: It's okay if this fails now, we gave cleanup our best effort.
964 OnSnapshotMergeComplete(lock, name, snapshot_status);
965 return UpdateState::MergeCompleted;
966 }
967
968 LOG(ERROR) << "Expected snapshot or snapshot-merge for device: " << dm_name;
969 return UpdateState::MergeFailed;
970 }
971
972 // This check is expensive so it is only enabled for debugging.
973 DCHECK((current_metadata = ReadCurrentMetadata()) &&
974 GetMetadataPartitionState(*current_metadata, name) == MetadataPartitionState::Updated);
975
976 std::string target_type;
977 DmTargetSnapshot::Status status;
978 if (!QuerySnapshotStatus(dm_name, &target_type, &status)) {
979 return UpdateState::MergeFailed;
980 }
981 if (target_type != "snapshot-merge") {
982 // We can get here if we failed to rewrite the target type in
983 // InitiateMerge(). If we failed to create the target in first-stage
984 // init, boot would not succeed.
985 LOG(ERROR) << "Snapshot " << name << " has incorrect target type: " << target_type;
986 return UpdateState::MergeFailed;
987 }
988
989 // These two values are equal when merging is complete.
990 if (status.sectors_allocated != status.metadata_sectors) {
991 if (snapshot_status.state() == SnapshotState::MERGE_COMPLETED) {
992 LOG(ERROR) << "Snapshot " << name << " is merging after being marked merge-complete.";
993 return UpdateState::MergeFailed;
994 }
995 return UpdateState::Merging;
996 }
997
998 // Merging is done. First, update the status file to indicate the merge
999 // is complete. We do this before calling OnSnapshotMergeComplete, even
1000 // though this means the write is potentially wasted work (since in the
1001 // ideal case we'll immediately delete the file).
1002 //
1003 // This makes it simpler to reason about the next reboot: no matter what
1004 // part of cleanup failed, first-stage init won't try to create another
1005 // snapshot device for this partition.
1006 snapshot_status.set_state(SnapshotState::MERGE_COMPLETED);
1007 if (!WriteSnapshotStatus(lock, snapshot_status)) {
1008 return UpdateState::MergeFailed;
1009 }
1010 if (!OnSnapshotMergeComplete(lock, name, snapshot_status)) {
1011 return UpdateState::MergeNeedsReboot;
1012 }
1013 return UpdateState::MergeCompleted;
1014 }
1015
GetSnapshotBootIndicatorPath()1016 std::string SnapshotManager::GetSnapshotBootIndicatorPath() {
1017 return metadata_dir_ + "/" + android::base::Basename(kBootIndicatorPath);
1018 }
1019
GetRollbackIndicatorPath()1020 std::string SnapshotManager::GetRollbackIndicatorPath() {
1021 return metadata_dir_ + "/" + android::base::Basename(kRollbackIndicatorPath);
1022 }
1023
GetForwardMergeIndicatorPath()1024 std::string SnapshotManager::GetForwardMergeIndicatorPath() {
1025 return metadata_dir_ + "/allow-forward-merge";
1026 }
1027
AcknowledgeMergeSuccess(LockedFile * lock)1028 void SnapshotManager::AcknowledgeMergeSuccess(LockedFile* lock) {
1029 // It's not possible to remove update state in recovery, so write an
1030 // indicator that cleanup is needed on reboot. If a factory data reset
1031 // was requested, it doesn't matter, everything will get wiped anyway.
1032 // To make testing easier we consider a /data wipe as cleaned up.
1033 if (device_->IsRecovery() && !in_factory_data_reset_) {
1034 WriteUpdateState(lock, UpdateState::MergeCompleted);
1035 return;
1036 }
1037
1038 RemoveAllUpdateState(lock);
1039 }
1040
AcknowledgeMergeFailure()1041 void SnapshotManager::AcknowledgeMergeFailure() {
1042 // Log first, so worst case, we always have a record of why the calls below
1043 // were being made.
1044 LOG(ERROR) << "Merge could not be completed and will be marked as failed.";
1045
1046 auto lock = LockExclusive();
1047 if (!lock) return;
1048
1049 // Since we released the lock in between WaitForMerge and here, it's
1050 // possible (1) the merge successfully completed or (2) was already
1051 // marked as a failure. So make sure to check the state again, and
1052 // only mark as a failure if appropriate.
1053 UpdateState state = ReadUpdateState(lock.get());
1054 if (state != UpdateState::Merging && state != UpdateState::MergeNeedsReboot) {
1055 return;
1056 }
1057
1058 WriteUpdateState(lock.get(), UpdateState::MergeFailed);
1059 }
1060
OnSnapshotMergeComplete(LockedFile * lock,const std::string & name,const SnapshotStatus & status)1061 bool SnapshotManager::OnSnapshotMergeComplete(LockedFile* lock, const std::string& name,
1062 const SnapshotStatus& status) {
1063 auto dm_name = GetSnapshotDeviceName(name, status);
1064 if (IsSnapshotDevice(dm_name)) {
1065 // We are extra-cautious here, to avoid deleting the wrong table.
1066 std::string target_type;
1067 DmTargetSnapshot::Status dm_status;
1068 if (!QuerySnapshotStatus(dm_name, &target_type, &dm_status)) {
1069 return false;
1070 }
1071 if (target_type != "snapshot-merge") {
1072 LOG(ERROR) << "Unexpected target type " << target_type
1073 << " for snapshot device: " << dm_name;
1074 return false;
1075 }
1076 if (dm_status.sectors_allocated != dm_status.metadata_sectors) {
1077 LOG(ERROR) << "Merge is unexpectedly incomplete for device " << dm_name;
1078 return false;
1079 }
1080 if (!CollapseSnapshotDevice(name, status)) {
1081 LOG(ERROR) << "Unable to collapse snapshot: " << name;
1082 return false;
1083 }
1084 // Note that collapsing is implicitly an Unmap, so we don't need to
1085 // unmap the snapshot.
1086 }
1087
1088 if (!DeleteSnapshot(lock, name)) {
1089 LOG(ERROR) << "Could not delete snapshot: " << name;
1090 return false;
1091 }
1092 return true;
1093 }
1094
CollapseSnapshotDevice(const std::string & name,const SnapshotStatus & status)1095 bool SnapshotManager::CollapseSnapshotDevice(const std::string& name,
1096 const SnapshotStatus& status) {
1097 auto& dm = DeviceMapper::Instance();
1098 auto dm_name = GetSnapshotDeviceName(name, status);
1099
1100 // Verify we have a snapshot-merge device.
1101 DeviceMapper::TargetInfo target;
1102 if (!GetSingleTarget(dm_name, TableQuery::Table, &target)) {
1103 return false;
1104 }
1105 if (DeviceMapper::GetTargetType(target.spec) != "snapshot-merge") {
1106 // This should be impossible, it was checked earlier.
1107 LOG(ERROR) << "Snapshot device has invalid target type: " << dm_name;
1108 return false;
1109 }
1110
1111 std::string base_device, cow_device;
1112 if (!DmTargetSnapshot::GetDevicesFromParams(target.data, &base_device, &cow_device)) {
1113 LOG(ERROR) << "Could not parse snapshot device " << dm_name
1114 << " parameters: " << target.data;
1115 return false;
1116 }
1117
1118 uint64_t snapshot_sectors = status.snapshot_size() / kSectorSize;
1119 if (snapshot_sectors * kSectorSize != status.snapshot_size()) {
1120 LOG(ERROR) << "Snapshot " << name
1121 << " size is not sector aligned: " << status.snapshot_size();
1122 return false;
1123 }
1124
1125 if (dm_name != name) {
1126 // We've derived the base device, but we actually need to replace the
1127 // table of the outermost device. Do a quick verification that this
1128 // device looks like we expect it to.
1129 std::vector<DeviceMapper::TargetInfo> outer_table;
1130 if (!dm.GetTableInfo(name, &outer_table)) {
1131 LOG(ERROR) << "Could not validate outer snapshot table: " << name;
1132 return false;
1133 }
1134 if (outer_table.size() != 2) {
1135 LOG(ERROR) << "Expected 2 dm-linear targets for table " << name
1136 << ", got: " << outer_table.size();
1137 return false;
1138 }
1139 for (const auto& target : outer_table) {
1140 auto target_type = DeviceMapper::GetTargetType(target.spec);
1141 if (target_type != "linear") {
1142 LOG(ERROR) << "Outer snapshot table may only contain linear targets, but " << name
1143 << " has target: " << target_type;
1144 return false;
1145 }
1146 }
1147 if (outer_table[0].spec.length != snapshot_sectors) {
1148 LOG(ERROR) << "dm-snapshot " << name << " should have " << snapshot_sectors
1149 << " sectors, got: " << outer_table[0].spec.length;
1150 return false;
1151 }
1152 uint64_t expected_device_sectors = status.device_size() / kSectorSize;
1153 uint64_t actual_device_sectors = outer_table[0].spec.length + outer_table[1].spec.length;
1154 if (expected_device_sectors != actual_device_sectors) {
1155 LOG(ERROR) << "Outer device " << name << " should have " << expected_device_sectors
1156 << " sectors, got: " << actual_device_sectors;
1157 return false;
1158 }
1159 }
1160
1161 uint32_t slot = SlotNumberForSlotSuffix(device_->GetSlotSuffix());
1162 // Create a DmTable that is identical to the base device.
1163 CreateLogicalPartitionParams base_device_params{
1164 .block_device = device_->GetSuperDevice(slot),
1165 .metadata_slot = slot,
1166 .partition_name = name,
1167 .partition_opener = &device_->GetPartitionOpener(),
1168 };
1169 DmTable table;
1170 if (!CreateDmTable(base_device_params, &table)) {
1171 LOG(ERROR) << "Could not create a DmTable for partition: " << name;
1172 return false;
1173 }
1174
1175 // Note: we are replacing the *outer* table here, so we do not use dm_name.
1176 if (!dm.LoadTableAndActivate(name, table)) {
1177 return false;
1178 }
1179
1180 // Attempt to delete the snapshot device if one still exists. Nothing
1181 // should be depending on the device, and device-mapper should have
1182 // flushed remaining I/O. We could in theory replace with dm-zero (or
1183 // re-use the table above), but for now it's better to know why this
1184 // would fail.
1185 if (dm_name != name && !dm.DeleteDeviceIfExists(dm_name)) {
1186 LOG(ERROR) << "Unable to delete snapshot device " << dm_name << ", COW cannot be "
1187 << "reclaimed until after reboot.";
1188 return false;
1189 }
1190
1191 // Cleanup the base device as well, since it is no longer used. This does
1192 // not block cleanup.
1193 auto base_name = GetBaseDeviceName(name);
1194 if (!dm.DeleteDeviceIfExists(base_name)) {
1195 LOG(ERROR) << "Unable to delete base device for snapshot: " << base_name;
1196 }
1197 return true;
1198 }
1199
HandleCancelledUpdate(LockedFile * lock,const std::function<bool ()> & before_cancel)1200 bool SnapshotManager::HandleCancelledUpdate(LockedFile* lock,
1201 const std::function<bool()>& before_cancel) {
1202 auto slot = GetCurrentSlot();
1203 if (slot == Slot::Unknown) {
1204 return false;
1205 }
1206
1207 // If all snapshots were reflashed, then cancel the entire update.
1208 if (AreAllSnapshotsCancelled(lock)) {
1209 LOG(WARNING) << "Detected re-flashing, cancelling unverified update.";
1210 return RemoveAllUpdateState(lock, before_cancel);
1211 }
1212
1213 // If update has been rolled back, then cancel the entire update.
1214 // Client (update_engine) is responsible for doing additional cleanup work on its own states
1215 // when ProcessUpdateState() returns UpdateState::Cancelled.
1216 auto current_slot = GetCurrentSlot();
1217 if (current_slot != Slot::Source) {
1218 LOG(INFO) << "Update state is being processed while booting at " << current_slot
1219 << " slot, taking no action.";
1220 return false;
1221 }
1222
1223 // current_slot == Source. Attempt to detect rollbacks.
1224 if (access(GetRollbackIndicatorPath().c_str(), F_OK) != 0) {
1225 // This unverified update is not attempted. Take no action.
1226 PLOG(INFO) << "Rollback indicator not detected. "
1227 << "Update state is being processed before reboot, taking no action.";
1228 return false;
1229 }
1230
1231 LOG(WARNING) << "Detected rollback, cancelling unverified update.";
1232 return RemoveAllUpdateState(lock, before_cancel);
1233 }
1234
ReadCurrentMetadata()1235 std::unique_ptr<LpMetadata> SnapshotManager::ReadCurrentMetadata() {
1236 const auto& opener = device_->GetPartitionOpener();
1237 uint32_t slot = SlotNumberForSlotSuffix(device_->GetSlotSuffix());
1238 auto super_device = device_->GetSuperDevice(slot);
1239 auto metadata = android::fs_mgr::ReadMetadata(opener, super_device, slot);
1240 if (!metadata) {
1241 LOG(ERROR) << "Could not read dynamic partition metadata for device: " << super_device;
1242 return nullptr;
1243 }
1244 return metadata;
1245 }
1246
GetMetadataPartitionState(const LpMetadata & metadata,const std::string & name)1247 SnapshotManager::MetadataPartitionState SnapshotManager::GetMetadataPartitionState(
1248 const LpMetadata& metadata, const std::string& name) {
1249 auto partition = android::fs_mgr::FindPartition(metadata, name);
1250 if (!partition) return MetadataPartitionState::None;
1251 if (partition->attributes & LP_PARTITION_ATTR_UPDATED) {
1252 return MetadataPartitionState::Updated;
1253 }
1254 return MetadataPartitionState::Flashed;
1255 }
1256
AreAllSnapshotsCancelled(LockedFile * lock)1257 bool SnapshotManager::AreAllSnapshotsCancelled(LockedFile* lock) {
1258 std::vector<std::string> snapshots;
1259 if (!ListSnapshots(lock, &snapshots)) {
1260 LOG(WARNING) << "Failed to list snapshots to determine whether device has been flashed "
1261 << "after applying an update. Assuming no snapshots.";
1262 // Let HandleCancelledUpdate resets UpdateState.
1263 return true;
1264 }
1265
1266 std::map<std::string, bool> flashing_status;
1267
1268 if (!GetSnapshotFlashingStatus(lock, snapshots, &flashing_status)) {
1269 LOG(WARNING) << "Failed to determine whether partitions have been flashed. Not"
1270 << "removing update states.";
1271 return false;
1272 }
1273
1274 bool all_snapshots_cancelled = std::all_of(flashing_status.begin(), flashing_status.end(),
1275 [](const auto& pair) { return pair.second; });
1276
1277 if (all_snapshots_cancelled) {
1278 LOG(WARNING) << "All partitions are re-flashed after update, removing all update states.";
1279 }
1280 return all_snapshots_cancelled;
1281 }
1282
GetSnapshotFlashingStatus(LockedFile * lock,const std::vector<std::string> & snapshots,std::map<std::string,bool> * out)1283 bool SnapshotManager::GetSnapshotFlashingStatus(LockedFile* lock,
1284 const std::vector<std::string>& snapshots,
1285 std::map<std::string, bool>* out) {
1286 CHECK(lock);
1287
1288 auto source_slot_suffix = ReadUpdateSourceSlotSuffix();
1289 if (source_slot_suffix.empty()) {
1290 return false;
1291 }
1292 uint32_t source_slot = SlotNumberForSlotSuffix(source_slot_suffix);
1293 uint32_t target_slot = (source_slot == 0) ? 1 : 0;
1294
1295 // Attempt to detect re-flashing on each partition.
1296 // - If all partitions are re-flashed, we can proceed to cancel the whole update.
1297 // - If only some of the partitions are re-flashed, snapshots for re-flashed partitions are
1298 // deleted. Caller is responsible for merging the rest of the snapshots.
1299 // - If none of the partitions are re-flashed, caller is responsible for merging the snapshots.
1300 //
1301 // Note that we use target slot metadata, since if an OTA has been applied
1302 // to the target slot, we can detect the UPDATED flag. Any kind of flash
1303 // operation against dynamic partitions ensures that all copies of the
1304 // metadata are in sync, so flashing all partitions on the source slot will
1305 // remove the UPDATED flag on the target slot as well.
1306 const auto& opener = device_->GetPartitionOpener();
1307 auto super_device = device_->GetSuperDevice(target_slot);
1308 auto metadata = android::fs_mgr::ReadMetadata(opener, super_device, target_slot);
1309 if (!metadata) {
1310 return false;
1311 }
1312
1313 for (const auto& snapshot_name : snapshots) {
1314 if (GetMetadataPartitionState(*metadata, snapshot_name) ==
1315 MetadataPartitionState::Updated) {
1316 out->emplace(snapshot_name, false);
1317 } else {
1318 // Delete snapshots for partitions that are re-flashed after the update.
1319 LOG(WARNING) << "Detected re-flashing of partition " << snapshot_name << ".";
1320 out->emplace(snapshot_name, true);
1321 }
1322 }
1323 return true;
1324 }
1325
RemoveAllSnapshots(LockedFile * lock)1326 bool SnapshotManager::RemoveAllSnapshots(LockedFile* lock) {
1327 std::vector<std::string> snapshots;
1328 if (!ListSnapshots(lock, &snapshots)) {
1329 LOG(ERROR) << "Could not list snapshots";
1330 return false;
1331 }
1332
1333 std::map<std::string, bool> flashing_status;
1334 if (!GetSnapshotFlashingStatus(lock, snapshots, &flashing_status)) {
1335 LOG(WARNING) << "Failed to get flashing status";
1336 }
1337
1338 auto current_slot = GetCurrentSlot();
1339 bool ok = true;
1340 bool has_mapped_cow_images = false;
1341 for (const auto& name : snapshots) {
1342 // If booting off source slot, it is okay to unmap and delete all the snapshots.
1343 // If boot indicator is missing, update state is None or Initiated, so
1344 // it is also okay to unmap and delete all the snapshots.
1345 // If booting off target slot,
1346 // - should not unmap because:
1347 // - In Android mode, snapshots are not mapped, but
1348 // filesystems are mounting off dm-linear targets directly.
1349 // - In recovery mode, assume nothing is mapped, so it is optional to unmap.
1350 // - If partition is flashed or unknown, it is okay to delete snapshots.
1351 // Otherwise (UPDATED flag), only delete snapshots if they are not mapped
1352 // as dm-snapshot (for example, after merge completes).
1353 bool should_unmap = current_slot != Slot::Target;
1354 bool should_delete = ShouldDeleteSnapshot(lock, flashing_status, current_slot, name);
1355
1356 bool partition_ok = true;
1357 if (should_unmap && !UnmapPartitionWithSnapshot(lock, name)) {
1358 partition_ok = false;
1359 }
1360 if (partition_ok && should_delete && !DeleteSnapshot(lock, name)) {
1361 partition_ok = false;
1362 }
1363
1364 if (!partition_ok) {
1365 // Remember whether or not we were able to unmap the cow image.
1366 auto cow_image_device = GetCowImageDeviceName(name);
1367 has_mapped_cow_images |=
1368 (EnsureImageManager() && images_->IsImageMapped(cow_image_device));
1369
1370 ok = false;
1371 }
1372 }
1373
1374 if (ok || !has_mapped_cow_images) {
1375 // Delete any image artifacts as a precaution, in case an update is
1376 // being cancelled due to some corrupted state in an lp_metadata file.
1377 // Note that we do not do this if some cow images are still mapped,
1378 // since we must not remove backing storage if it's in use.
1379 if (!EnsureImageManager() || !images_->RemoveAllImages()) {
1380 LOG(ERROR) << "Could not remove all snapshot artifacts";
1381 return false;
1382 }
1383 }
1384 return ok;
1385 }
1386
1387 // See comments in RemoveAllSnapshots().
ShouldDeleteSnapshot(LockedFile * lock,const std::map<std::string,bool> & flashing_status,Slot current_slot,const std::string & name)1388 bool SnapshotManager::ShouldDeleteSnapshot(LockedFile* lock,
1389 const std::map<std::string, bool>& flashing_status,
1390 Slot current_slot, const std::string& name) {
1391 if (current_slot != Slot::Target) {
1392 return true;
1393 }
1394 auto it = flashing_status.find(name);
1395 if (it == flashing_status.end()) {
1396 LOG(WARNING) << "Can't determine flashing status for " << name;
1397 return true;
1398 }
1399 if (it->second) {
1400 // partition flashed, okay to delete obsolete snapshots
1401 return true;
1402 }
1403 // partition updated, only delete if not dm-snapshot
1404 SnapshotStatus status;
1405 if (!ReadSnapshotStatus(lock, name, &status)) {
1406 LOG(WARNING) << "Unable to read snapshot status for " << name
1407 << ", guessing snapshot device name";
1408 auto extra_name = GetSnapshotExtraDeviceName(name);
1409 return !IsSnapshotDevice(name) && !IsSnapshotDevice(extra_name);
1410 }
1411 auto dm_name = GetSnapshotDeviceName(name, status);
1412 return !IsSnapshotDevice(dm_name);
1413 }
1414
GetUpdateState(double * progress)1415 UpdateState SnapshotManager::GetUpdateState(double* progress) {
1416 // If we've never started an update, the state file won't exist.
1417 auto state_file = GetStateFilePath();
1418 if (access(state_file.c_str(), F_OK) != 0 && errno == ENOENT) {
1419 return UpdateState::None;
1420 }
1421
1422 auto lock = LockShared();
1423 if (!lock) {
1424 return UpdateState::None;
1425 }
1426
1427 SnapshotUpdateStatus update_status = ReadSnapshotUpdateStatus(lock.get());
1428 auto state = update_status.state();
1429 if (progress == nullptr) {
1430 return state;
1431 }
1432
1433 if (state == UpdateState::MergeCompleted) {
1434 *progress = 100.0;
1435 return state;
1436 }
1437
1438 *progress = 0.0;
1439 if (state != UpdateState::Merging) {
1440 return state;
1441 }
1442
1443 // Sum all the snapshot states as if the system consists of a single huge
1444 // snapshots device, then compute the merge completion percentage of that
1445 // device.
1446 std::vector<std::string> snapshots;
1447 if (!ListSnapshots(lock.get(), &snapshots)) {
1448 LOG(ERROR) << "Could not list snapshots";
1449 return state;
1450 }
1451
1452 DmTargetSnapshot::Status fake_snapshots_status = {};
1453 for (const auto& snapshot : snapshots) {
1454 DmTargetSnapshot::Status current_status;
1455
1456 if (!QuerySnapshotStatus(snapshot, nullptr, ¤t_status)) continue;
1457
1458 fake_snapshots_status.sectors_allocated += current_status.sectors_allocated;
1459 fake_snapshots_status.total_sectors += current_status.total_sectors;
1460 fake_snapshots_status.metadata_sectors += current_status.metadata_sectors;
1461 }
1462
1463 *progress = DmTargetSnapshot::MergePercent(fake_snapshots_status,
1464 update_status.sectors_allocated());
1465
1466 return state;
1467 }
1468
ListSnapshots(LockedFile * lock,std::vector<std::string> * snapshots)1469 bool SnapshotManager::ListSnapshots(LockedFile* lock, std::vector<std::string>* snapshots) {
1470 CHECK(lock);
1471
1472 auto dir_path = metadata_dir_ + "/snapshots"s;
1473 std::unique_ptr<DIR, decltype(&closedir)> dir(opendir(dir_path.c_str()), closedir);
1474 if (!dir) {
1475 PLOG(ERROR) << "opendir failed: " << dir_path;
1476 return false;
1477 }
1478
1479 struct dirent* dp;
1480 while ((dp = readdir(dir.get())) != nullptr) {
1481 if (dp->d_type != DT_REG) continue;
1482 snapshots->emplace_back(dp->d_name);
1483 }
1484 return true;
1485 }
1486
IsSnapshotManagerNeeded()1487 bool SnapshotManager::IsSnapshotManagerNeeded() {
1488 return access(kBootIndicatorPath, F_OK) == 0;
1489 }
1490
GetGlobalRollbackIndicatorPath()1491 std::string SnapshotManager::GetGlobalRollbackIndicatorPath() {
1492 return kRollbackIndicatorPath;
1493 }
1494
NeedSnapshotsInFirstStageMount()1495 bool SnapshotManager::NeedSnapshotsInFirstStageMount() {
1496 // If we fail to read, we'll wind up using CreateLogicalPartitions, which
1497 // will create devices that look like the old slot, except with extra
1498 // content at the end of each device. This will confuse dm-verity, and
1499 // ultimately we'll fail to boot. Why not make it a fatal error and have
1500 // the reason be clearer? Because the indicator file still exists, and
1501 // if this was FATAL, reverting to the old slot would be broken.
1502 auto slot = GetCurrentSlot();
1503
1504 if (slot != Slot::Target) {
1505 if (slot == Slot::Source) {
1506 // Device is rebooting into the original slot, so mark this as a
1507 // rollback.
1508 auto path = GetRollbackIndicatorPath();
1509 if (!android::base::WriteStringToFile("1", path)) {
1510 PLOG(ERROR) << "Unable to write rollback indicator: " << path;
1511 } else {
1512 LOG(INFO) << "Rollback detected, writing rollback indicator to " << path;
1513 }
1514 }
1515 LOG(INFO) << "Not booting from new slot. Will not mount snapshots.";
1516 return false;
1517 }
1518
1519 // If we can't read the update state, it's unlikely anything else will
1520 // succeed, so this is a fatal error. We'll eventually exhaust boot
1521 // attempts and revert to the old slot.
1522 auto lock = LockShared();
1523 if (!lock) {
1524 LOG(FATAL) << "Could not read update state to determine snapshot status";
1525 return false;
1526 }
1527 switch (ReadUpdateState(lock.get())) {
1528 case UpdateState::Unverified:
1529 case UpdateState::Merging:
1530 case UpdateState::MergeFailed:
1531 return true;
1532 default:
1533 return false;
1534 }
1535 }
1536
CreateLogicalAndSnapshotPartitions(const std::string & super_device,const std::chrono::milliseconds & timeout_ms)1537 bool SnapshotManager::CreateLogicalAndSnapshotPartitions(
1538 const std::string& super_device, const std::chrono::milliseconds& timeout_ms) {
1539 LOG(INFO) << "Creating logical partitions with snapshots as needed";
1540
1541 auto lock = LockExclusive();
1542 if (!lock) return false;
1543
1544 const auto& opener = device_->GetPartitionOpener();
1545 uint32_t slot = SlotNumberForSlotSuffix(device_->GetSlotSuffix());
1546 auto metadata = android::fs_mgr::ReadMetadata(opener, super_device, slot);
1547 if (!metadata) {
1548 LOG(ERROR) << "Could not read dynamic partition metadata for device: " << super_device;
1549 return false;
1550 }
1551
1552 for (const auto& partition : metadata->partitions) {
1553 if (GetPartitionGroupName(metadata->groups[partition.group_index]) == kCowGroupName) {
1554 LOG(INFO) << "Skip mapping partition " << GetPartitionName(partition) << " in group "
1555 << kCowGroupName;
1556 continue;
1557 }
1558
1559 CreateLogicalPartitionParams params = {
1560 .block_device = super_device,
1561 .metadata = metadata.get(),
1562 .partition = &partition,
1563 .partition_opener = &opener,
1564 .timeout_ms = timeout_ms,
1565 };
1566 std::string ignore_path;
1567 if (!MapPartitionWithSnapshot(lock.get(), std::move(params), &ignore_path)) {
1568 return false;
1569 }
1570 }
1571
1572 LOG(INFO) << "Created logical partitions with snapshot.";
1573 return true;
1574 }
1575
GetRemainingTime(const std::chrono::milliseconds & timeout,const std::chrono::time_point<std::chrono::steady_clock> & begin)1576 static std::chrono::milliseconds GetRemainingTime(
1577 const std::chrono::milliseconds& timeout,
1578 const std::chrono::time_point<std::chrono::steady_clock>& begin) {
1579 // If no timeout is specified, execute all commands without specifying any timeout.
1580 if (timeout.count() == 0) return std::chrono::milliseconds(0);
1581 auto passed_time = std::chrono::steady_clock::now() - begin;
1582 auto remaining_time = timeout - duration_cast<std::chrono::milliseconds>(passed_time);
1583 if (remaining_time.count() <= 0) {
1584 LOG(ERROR) << "MapPartitionWithSnapshot has reached timeout " << timeout.count() << "ms ("
1585 << remaining_time.count() << "ms remaining)";
1586 // Return min() instead of remaining_time here because 0 is treated as a special value for
1587 // no timeout, where the rest of the commands will still be executed.
1588 return std::chrono::milliseconds::min();
1589 }
1590 return remaining_time;
1591 }
1592
MapPartitionWithSnapshot(LockedFile * lock,CreateLogicalPartitionParams params,std::string * path)1593 bool SnapshotManager::MapPartitionWithSnapshot(LockedFile* lock,
1594 CreateLogicalPartitionParams params,
1595 std::string* path) {
1596 auto begin = std::chrono::steady_clock::now();
1597
1598 CHECK(lock);
1599 path->clear();
1600
1601 if (params.GetPartitionName() != params.GetDeviceName()) {
1602 LOG(ERROR) << "Mapping snapshot with a different name is unsupported: partition_name = "
1603 << params.GetPartitionName() << ", device_name = " << params.GetDeviceName();
1604 return false;
1605 }
1606
1607 // Fill out fields in CreateLogicalPartitionParams so that we have more information (e.g. by
1608 // reading super partition metadata).
1609 CreateLogicalPartitionParams::OwnedData params_owned_data;
1610 if (!params.InitDefaults(¶ms_owned_data)) {
1611 return false;
1612 }
1613
1614 if (!params.partition->num_extents) {
1615 LOG(INFO) << "Skipping zero-length logical partition: " << params.GetPartitionName();
1616 return true; // leave path empty to indicate that nothing is mapped.
1617 }
1618
1619 // Determine if there is a live snapshot for the SnapshotStatus of the partition; i.e. if the
1620 // partition still has a snapshot that needs to be mapped. If no live snapshot or merge
1621 // completed, live_snapshot_status is set to nullopt.
1622 std::optional<SnapshotStatus> live_snapshot_status;
1623 do {
1624 if (!(params.partition->attributes & LP_PARTITION_ATTR_UPDATED)) {
1625 LOG(INFO) << "Detected re-flashing of partition, will skip snapshot: "
1626 << params.GetPartitionName();
1627 break;
1628 }
1629 auto file_path = GetSnapshotStatusFilePath(params.GetPartitionName());
1630 if (access(file_path.c_str(), F_OK) != 0) {
1631 if (errno != ENOENT) {
1632 PLOG(INFO) << "Can't map snapshot for " << params.GetPartitionName()
1633 << ": Can't access " << file_path;
1634 return false;
1635 }
1636 break;
1637 }
1638 live_snapshot_status = std::make_optional<SnapshotStatus>();
1639 if (!ReadSnapshotStatus(lock, params.GetPartitionName(), &*live_snapshot_status)) {
1640 return false;
1641 }
1642 // No live snapshot if merge is completed.
1643 if (live_snapshot_status->state() == SnapshotState::MERGE_COMPLETED) {
1644 live_snapshot_status.reset();
1645 }
1646
1647 if (live_snapshot_status->state() == SnapshotState::NONE ||
1648 live_snapshot_status->cow_partition_size() + live_snapshot_status->cow_file_size() ==
1649 0) {
1650 LOG(WARNING) << "Snapshot status for " << params.GetPartitionName()
1651 << " is invalid, ignoring: state = "
1652 << SnapshotState_Name(live_snapshot_status->state())
1653 << ", cow_partition_size = " << live_snapshot_status->cow_partition_size()
1654 << ", cow_file_size = " << live_snapshot_status->cow_file_size();
1655 live_snapshot_status.reset();
1656 }
1657 } while (0);
1658
1659 if (live_snapshot_status.has_value()) {
1660 // dm-snapshot requires the base device to be writable.
1661 params.force_writable = true;
1662 // Map the base device with a different name to avoid collision.
1663 params.device_name = GetBaseDeviceName(params.GetPartitionName());
1664 }
1665
1666 AutoDeviceList created_devices;
1667
1668 // Create the base device for the snapshot, or if there is no snapshot, the
1669 // device itself. This device consists of the real blocks in the super
1670 // partition that this logical partition occupies.
1671 auto& dm = DeviceMapper::Instance();
1672 std::string base_path;
1673 if (!CreateLogicalPartition(params, &base_path)) {
1674 LOG(ERROR) << "Could not create logical partition " << params.GetPartitionName()
1675 << " as device " << params.GetDeviceName();
1676 return false;
1677 }
1678 created_devices.EmplaceBack<AutoUnmapDevice>(&dm, params.GetDeviceName());
1679
1680 if (!live_snapshot_status.has_value()) {
1681 *path = base_path;
1682 created_devices.Release();
1683 return true;
1684 }
1685
1686 // We don't have ueventd in first-stage init, so use device major:minor
1687 // strings instead.
1688 std::string base_device;
1689 if (!dm.GetDeviceString(params.GetDeviceName(), &base_device)) {
1690 LOG(ERROR) << "Could not determine major/minor for: " << params.GetDeviceName();
1691 return false;
1692 }
1693
1694 auto remaining_time = GetRemainingTime(params.timeout_ms, begin);
1695 if (remaining_time.count() < 0) return false;
1696
1697 std::string cow_name;
1698 CreateLogicalPartitionParams cow_params = params;
1699 cow_params.timeout_ms = remaining_time;
1700 if (!MapCowDevices(lock, cow_params, *live_snapshot_status, &created_devices, &cow_name)) {
1701 return false;
1702 }
1703 std::string cow_device;
1704 if (!GetMappedImageDeviceStringOrPath(cow_name, &cow_device)) {
1705 LOG(ERROR) << "Could not determine major/minor for: " << cow_name;
1706 return false;
1707 }
1708
1709 remaining_time = GetRemainingTime(params.timeout_ms, begin);
1710 if (remaining_time.count() < 0) return false;
1711
1712 if (!MapSnapshot(lock, params.GetPartitionName(), base_device, cow_device, remaining_time,
1713 path)) {
1714 LOG(ERROR) << "Could not map snapshot for partition: " << params.GetPartitionName();
1715 return false;
1716 }
1717 // No need to add params.GetPartitionName() to created_devices since it is immediately released.
1718
1719 created_devices.Release();
1720
1721 LOG(INFO) << "Mapped " << params.GetPartitionName() << " as snapshot device at " << *path;
1722
1723 return true;
1724 }
1725
UnmapPartitionWithSnapshot(LockedFile * lock,const std::string & target_partition_name)1726 bool SnapshotManager::UnmapPartitionWithSnapshot(LockedFile* lock,
1727 const std::string& target_partition_name) {
1728 CHECK(lock);
1729
1730 if (!UnmapSnapshot(lock, target_partition_name)) {
1731 return false;
1732 }
1733
1734 if (!UnmapCowDevices(lock, target_partition_name)) {
1735 return false;
1736 }
1737
1738 auto& dm = DeviceMapper::Instance();
1739 std::string base_name = GetBaseDeviceName(target_partition_name);
1740 if (!dm.DeleteDeviceIfExists(base_name)) {
1741 LOG(ERROR) << "Cannot delete base device: " << base_name;
1742 return false;
1743 }
1744
1745 LOG(INFO) << "Successfully unmapped snapshot " << target_partition_name;
1746
1747 return true;
1748 }
1749
MapCowDevices(LockedFile * lock,const CreateLogicalPartitionParams & params,const SnapshotStatus & snapshot_status,AutoDeviceList * created_devices,std::string * cow_name)1750 bool SnapshotManager::MapCowDevices(LockedFile* lock, const CreateLogicalPartitionParams& params,
1751 const SnapshotStatus& snapshot_status,
1752 AutoDeviceList* created_devices, std::string* cow_name) {
1753 CHECK(lock);
1754 CHECK(snapshot_status.cow_partition_size() + snapshot_status.cow_file_size() > 0);
1755 auto begin = std::chrono::steady_clock::now();
1756
1757 std::string partition_name = params.GetPartitionName();
1758 std::string cow_image_name = GetCowImageDeviceName(partition_name);
1759 *cow_name = GetCowName(partition_name);
1760
1761 auto& dm = DeviceMapper::Instance();
1762
1763 // Map COW image if necessary.
1764 if (snapshot_status.cow_file_size() > 0) {
1765 if (!EnsureImageManager()) return false;
1766 auto remaining_time = GetRemainingTime(params.timeout_ms, begin);
1767 if (remaining_time.count() < 0) return false;
1768
1769 if (!MapCowImage(partition_name, remaining_time).has_value()) {
1770 LOG(ERROR) << "Could not map cow image for partition: " << partition_name;
1771 return false;
1772 }
1773 created_devices->EmplaceBack<AutoUnmapImage>(images_.get(), cow_image_name);
1774
1775 // If no COW partition exists, just return the image alone.
1776 if (snapshot_status.cow_partition_size() == 0) {
1777 *cow_name = std::move(cow_image_name);
1778 LOG(INFO) << "Mapped COW image for " << partition_name << " at " << *cow_name;
1779 return true;
1780 }
1781 }
1782
1783 auto remaining_time = GetRemainingTime(params.timeout_ms, begin);
1784 if (remaining_time.count() < 0) return false;
1785
1786 CHECK(snapshot_status.cow_partition_size() > 0);
1787
1788 // Create the DmTable for the COW device. It is the DmTable of the COW partition plus
1789 // COW image device as the last extent.
1790 CreateLogicalPartitionParams cow_partition_params = params;
1791 cow_partition_params.partition = nullptr;
1792 cow_partition_params.partition_name = *cow_name;
1793 cow_partition_params.device_name.clear();
1794 DmTable table;
1795 if (!CreateDmTable(cow_partition_params, &table)) {
1796 return false;
1797 }
1798 // If the COW image exists, append it as the last extent.
1799 if (snapshot_status.cow_file_size() > 0) {
1800 std::string cow_image_device;
1801 if (!GetMappedImageDeviceStringOrPath(cow_image_name, &cow_image_device)) {
1802 LOG(ERROR) << "Cannot determine major/minor for: " << cow_image_name;
1803 return false;
1804 }
1805 auto cow_partition_sectors = snapshot_status.cow_partition_size() / kSectorSize;
1806 auto cow_image_sectors = snapshot_status.cow_file_size() / kSectorSize;
1807 table.Emplace<DmTargetLinear>(cow_partition_sectors, cow_image_sectors, cow_image_device,
1808 0);
1809 }
1810
1811 // We have created the DmTable now. Map it.
1812 std::string cow_path;
1813 if (!dm.CreateDevice(*cow_name, table, &cow_path, remaining_time)) {
1814 LOG(ERROR) << "Could not create COW device: " << *cow_name;
1815 return false;
1816 }
1817 created_devices->EmplaceBack<AutoUnmapDevice>(&dm, *cow_name);
1818 LOG(INFO) << "Mapped COW device for " << params.GetPartitionName() << " at " << cow_path;
1819 return true;
1820 }
1821
UnmapCowDevices(LockedFile * lock,const std::string & name)1822 bool SnapshotManager::UnmapCowDevices(LockedFile* lock, const std::string& name) {
1823 CHECK(lock);
1824 if (!EnsureImageManager()) return false;
1825
1826 auto& dm = DeviceMapper::Instance();
1827 auto cow_name = GetCowName(name);
1828 if (!dm.DeleteDeviceIfExists(cow_name)) {
1829 LOG(ERROR) << "Cannot unmap " << cow_name;
1830 return false;
1831 }
1832
1833 std::string cow_image_name = GetCowImageDeviceName(name);
1834 if (!images_->UnmapImageIfExists(cow_image_name)) {
1835 LOG(ERROR) << "Cannot unmap image " << cow_image_name;
1836 return false;
1837 }
1838 return true;
1839 }
1840
OpenFile(const std::string & file,int lock_flags)1841 auto SnapshotManager::OpenFile(const std::string& file, int lock_flags)
1842 -> std::unique_ptr<LockedFile> {
1843 unique_fd fd(open(file.c_str(), O_RDONLY | O_CLOEXEC | O_NOFOLLOW));
1844 if (fd < 0) {
1845 PLOG(ERROR) << "Open failed: " << file;
1846 return nullptr;
1847 }
1848 if (lock_flags != 0 && TEMP_FAILURE_RETRY(flock(fd, lock_flags)) < 0) {
1849 PLOG(ERROR) << "Acquire flock failed: " << file;
1850 return nullptr;
1851 }
1852 // For simplicity, we want to CHECK that lock_mode == LOCK_EX, in some
1853 // calls, so strip extra flags.
1854 int lock_mode = lock_flags & (LOCK_EX | LOCK_SH);
1855 return std::make_unique<LockedFile>(file, std::move(fd), lock_mode);
1856 }
1857
~LockedFile()1858 SnapshotManager::LockedFile::~LockedFile() {
1859 if (TEMP_FAILURE_RETRY(flock(fd_, LOCK_UN)) < 0) {
1860 PLOG(ERROR) << "Failed to unlock file: " << path_;
1861 }
1862 }
1863
GetStateFilePath() const1864 std::string SnapshotManager::GetStateFilePath() const {
1865 return metadata_dir_ + "/state"s;
1866 }
1867
GetMergeStateFilePath() const1868 std::string SnapshotManager::GetMergeStateFilePath() const {
1869 return metadata_dir_ + "/merge_state"s;
1870 }
1871
GetLockPath() const1872 std::string SnapshotManager::GetLockPath() const {
1873 return metadata_dir_;
1874 }
1875
OpenLock(int lock_flags)1876 std::unique_ptr<SnapshotManager::LockedFile> SnapshotManager::OpenLock(int lock_flags) {
1877 auto lock_file = GetLockPath();
1878 return OpenFile(lock_file, lock_flags);
1879 }
1880
LockShared()1881 std::unique_ptr<SnapshotManager::LockedFile> SnapshotManager::LockShared() {
1882 return OpenLock(LOCK_SH);
1883 }
1884
LockExclusive()1885 std::unique_ptr<SnapshotManager::LockedFile> SnapshotManager::LockExclusive() {
1886 return OpenLock(LOCK_EX);
1887 }
1888
UpdateStateFromString(const std::string & contents)1889 static UpdateState UpdateStateFromString(const std::string& contents) {
1890 if (contents.empty() || contents == "none") {
1891 return UpdateState::None;
1892 } else if (contents == "initiated") {
1893 return UpdateState::Initiated;
1894 } else if (contents == "unverified") {
1895 return UpdateState::Unverified;
1896 } else if (contents == "merging") {
1897 return UpdateState::Merging;
1898 } else if (contents == "merge-completed") {
1899 return UpdateState::MergeCompleted;
1900 } else if (contents == "merge-needs-reboot") {
1901 return UpdateState::MergeNeedsReboot;
1902 } else if (contents == "merge-failed") {
1903 return UpdateState::MergeFailed;
1904 } else if (contents == "cancelled") {
1905 return UpdateState::Cancelled;
1906 } else {
1907 LOG(ERROR) << "Unknown merge state in update state file: \"" << contents << "\"";
1908 return UpdateState::None;
1909 }
1910 }
1911
operator <<(std::ostream & os,UpdateState state)1912 std::ostream& operator<<(std::ostream& os, UpdateState state) {
1913 switch (state) {
1914 case UpdateState::None:
1915 return os << "none";
1916 case UpdateState::Initiated:
1917 return os << "initiated";
1918 case UpdateState::Unverified:
1919 return os << "unverified";
1920 case UpdateState::Merging:
1921 return os << "merging";
1922 case UpdateState::MergeCompleted:
1923 return os << "merge-completed";
1924 case UpdateState::MergeNeedsReboot:
1925 return os << "merge-needs-reboot";
1926 case UpdateState::MergeFailed:
1927 return os << "merge-failed";
1928 case UpdateState::Cancelled:
1929 return os << "cancelled";
1930 default:
1931 LOG(ERROR) << "Unknown update state: " << static_cast<uint32_t>(state);
1932 return os;
1933 }
1934 }
1935
ReadUpdateState(LockedFile * lock)1936 UpdateState SnapshotManager::ReadUpdateState(LockedFile* lock) {
1937 SnapshotUpdateStatus status = ReadSnapshotUpdateStatus(lock);
1938 return status.state();
1939 }
1940
ReadSnapshotUpdateStatus(LockedFile * lock)1941 SnapshotUpdateStatus SnapshotManager::ReadSnapshotUpdateStatus(LockedFile* lock) {
1942 CHECK(lock);
1943
1944 SnapshotUpdateStatus status = {};
1945 std::string contents;
1946 if (!android::base::ReadFileToString(GetStateFilePath(), &contents)) {
1947 PLOG(ERROR) << "Read state file failed";
1948 status.set_state(UpdateState::None);
1949 return status;
1950 }
1951
1952 if (!status.ParseFromString(contents)) {
1953 LOG(WARNING) << "Unable to parse state file as SnapshotUpdateStatus, using the old format";
1954
1955 // Try to rollback to legacy file to support devices that are
1956 // currently using the old file format.
1957 // TODO(b/147409432)
1958 status.set_state(UpdateStateFromString(contents));
1959 }
1960
1961 return status;
1962 }
1963
WriteUpdateState(LockedFile * lock,UpdateState state)1964 bool SnapshotManager::WriteUpdateState(LockedFile* lock, UpdateState state) {
1965 SnapshotUpdateStatus status = {};
1966 status.set_state(state);
1967 return WriteSnapshotUpdateStatus(lock, status);
1968 }
1969
WriteSnapshotUpdateStatus(LockedFile * lock,const SnapshotUpdateStatus & status)1970 bool SnapshotManager::WriteSnapshotUpdateStatus(LockedFile* lock,
1971 const SnapshotUpdateStatus& status) {
1972 CHECK(lock);
1973 CHECK(lock->lock_mode() == LOCK_EX);
1974
1975 std::string contents;
1976 if (!status.SerializeToString(&contents)) {
1977 LOG(ERROR) << "Unable to serialize SnapshotUpdateStatus.";
1978 return false;
1979 }
1980
1981 #ifdef LIBSNAPSHOT_USE_HAL
1982 auto merge_status = MergeStatus::UNKNOWN;
1983 switch (status.state()) {
1984 // The needs-reboot and completed cases imply that /data and /metadata
1985 // can be safely wiped, so we don't report a merge status.
1986 case UpdateState::None:
1987 case UpdateState::MergeNeedsReboot:
1988 case UpdateState::MergeCompleted:
1989 case UpdateState::Initiated:
1990 merge_status = MergeStatus::NONE;
1991 break;
1992 case UpdateState::Unverified:
1993 merge_status = MergeStatus::SNAPSHOTTED;
1994 break;
1995 case UpdateState::Merging:
1996 case UpdateState::MergeFailed:
1997 merge_status = MergeStatus::MERGING;
1998 break;
1999 default:
2000 // Note that Cancelled flows to here - it is never written, since
2001 // it only communicates a transient state to the caller.
2002 LOG(ERROR) << "Unexpected update status: " << status.state();
2003 break;
2004 }
2005
2006 bool set_before_write =
2007 merge_status == MergeStatus::SNAPSHOTTED || merge_status == MergeStatus::MERGING;
2008 if (set_before_write && !device_->SetBootControlMergeStatus(merge_status)) {
2009 return false;
2010 }
2011 #endif
2012
2013 if (!WriteStringToFileAtomic(contents, GetStateFilePath())) {
2014 PLOG(ERROR) << "Could not write to state file";
2015 return false;
2016 }
2017
2018 #ifdef LIBSNAPSHOT_USE_HAL
2019 if (!set_before_write && !device_->SetBootControlMergeStatus(merge_status)) {
2020 return false;
2021 }
2022 #endif
2023 return true;
2024 }
2025
GetSnapshotStatusFilePath(const std::string & name)2026 std::string SnapshotManager::GetSnapshotStatusFilePath(const std::string& name) {
2027 auto file = metadata_dir_ + "/snapshots/"s + name;
2028 return file;
2029 }
2030
ReadSnapshotStatus(LockedFile * lock,const std::string & name,SnapshotStatus * status)2031 bool SnapshotManager::ReadSnapshotStatus(LockedFile* lock, const std::string& name,
2032 SnapshotStatus* status) {
2033 CHECK(lock);
2034 auto path = GetSnapshotStatusFilePath(name);
2035
2036 unique_fd fd(open(path.c_str(), O_RDONLY | O_CLOEXEC | O_NOFOLLOW));
2037 if (fd < 0) {
2038 PLOG(ERROR) << "Open failed: " << path;
2039 return false;
2040 }
2041
2042 if (!status->ParseFromFileDescriptor(fd.get())) {
2043 PLOG(ERROR) << "Unable to parse " << path << " as SnapshotStatus";
2044 return false;
2045 }
2046
2047 if (status->name() != name) {
2048 LOG(WARNING) << "Found snapshot status named " << status->name() << " in " << path;
2049 status->set_name(name);
2050 }
2051
2052 return true;
2053 }
2054
WriteSnapshotStatus(LockedFile * lock,const SnapshotStatus & status)2055 bool SnapshotManager::WriteSnapshotStatus(LockedFile* lock, const SnapshotStatus& status) {
2056 // The caller must take an exclusive lock to modify snapshots.
2057 CHECK(lock);
2058 CHECK(lock->lock_mode() == LOCK_EX);
2059 CHECK(!status.name().empty());
2060
2061 auto path = GetSnapshotStatusFilePath(status.name());
2062
2063 std::string content;
2064 if (!status.SerializeToString(&content)) {
2065 LOG(ERROR) << "Unable to serialize SnapshotStatus for " << status.name();
2066 return false;
2067 }
2068
2069 if (!WriteStringToFileAtomic(content, path)) {
2070 PLOG(ERROR) << "Unable to write SnapshotStatus to " << path;
2071 return false;
2072 }
2073
2074 return true;
2075 }
2076
GetSnapshotDeviceName(const std::string & snapshot_name,const SnapshotStatus & status)2077 std::string SnapshotManager::GetSnapshotDeviceName(const std::string& snapshot_name,
2078 const SnapshotStatus& status) {
2079 if (status.device_size() != status.snapshot_size()) {
2080 return GetSnapshotExtraDeviceName(snapshot_name);
2081 }
2082 return snapshot_name;
2083 }
2084
EnsureImageManager()2085 bool SnapshotManager::EnsureImageManager() {
2086 if (images_) return true;
2087
2088 // For now, use a preset timeout.
2089 images_ = android::fiemap::IImageManager::Open(gsid_dir_, 15000ms);
2090 if (!images_) {
2091 LOG(ERROR) << "Could not open ImageManager";
2092 return false;
2093 }
2094 return true;
2095 }
2096
ForceLocalImageManager()2097 bool SnapshotManager::ForceLocalImageManager() {
2098 images_ = android::fiemap::ImageManager::Open(gsid_dir_);
2099 if (!images_) {
2100 LOG(ERROR) << "Could not open ImageManager";
2101 return false;
2102 }
2103 has_local_image_manager_ = true;
2104 return true;
2105 }
2106
UnmapAndDeleteCowPartition(MetadataBuilder * current_metadata)2107 static void UnmapAndDeleteCowPartition(MetadataBuilder* current_metadata) {
2108 auto& dm = DeviceMapper::Instance();
2109 std::vector<std::string> to_delete;
2110 for (auto* existing_cow_partition : current_metadata->ListPartitionsInGroup(kCowGroupName)) {
2111 if (!dm.DeleteDeviceIfExists(existing_cow_partition->name())) {
2112 LOG(WARNING) << existing_cow_partition->name()
2113 << " cannot be unmapped and its space cannot be reclaimed";
2114 continue;
2115 }
2116 to_delete.push_back(existing_cow_partition->name());
2117 }
2118 for (const auto& name : to_delete) {
2119 current_metadata->RemovePartition(name);
2120 }
2121 }
2122
AddRequiredSpace(Return orig,const std::map<std::string,SnapshotStatus> & all_snapshot_status)2123 static Return AddRequiredSpace(Return orig,
2124 const std::map<std::string, SnapshotStatus>& all_snapshot_status) {
2125 if (orig.error_code() != Return::ErrorCode::NO_SPACE) {
2126 return orig;
2127 }
2128 uint64_t sum = 0;
2129 for (auto&& [name, status] : all_snapshot_status) {
2130 sum += status.cow_file_size();
2131 }
2132 return Return::NoSpace(sum);
2133 }
2134
CreateUpdateSnapshots(const DeltaArchiveManifest & manifest)2135 Return SnapshotManager::CreateUpdateSnapshots(const DeltaArchiveManifest& manifest) {
2136 auto lock = LockExclusive();
2137 if (!lock) return Return::Error();
2138
2139 // TODO(b/134949511): remove this check. Right now, with overlayfs mounted, the scratch
2140 // partition takes up a big chunk of space in super, causing COW images to be created on
2141 // retrofit Virtual A/B devices.
2142 if (device_->IsOverlayfsSetup()) {
2143 LOG(ERROR) << "Cannot create update snapshots with overlayfs setup. Run `adb enable-verity`"
2144 << ", reboot, then try again.";
2145 return Return::Error();
2146 }
2147
2148 const auto& opener = device_->GetPartitionOpener();
2149 auto current_suffix = device_->GetSlotSuffix();
2150 uint32_t current_slot = SlotNumberForSlotSuffix(current_suffix);
2151 auto target_suffix = device_->GetOtherSlotSuffix();
2152 uint32_t target_slot = SlotNumberForSlotSuffix(target_suffix);
2153 auto current_super = device_->GetSuperDevice(current_slot);
2154
2155 auto current_metadata = MetadataBuilder::New(opener, current_super, current_slot);
2156 if (current_metadata == nullptr) {
2157 LOG(ERROR) << "Cannot create metadata builder.";
2158 return Return::Error();
2159 }
2160
2161 auto target_metadata =
2162 MetadataBuilder::NewForUpdate(opener, current_super, current_slot, target_slot);
2163 if (target_metadata == nullptr) {
2164 LOG(ERROR) << "Cannot create target metadata builder.";
2165 return Return::Error();
2166 }
2167
2168 // Delete partitions with target suffix in |current_metadata|. Otherwise,
2169 // partition_cow_creator recognizes these left-over partitions as used space.
2170 for (const auto& group_name : current_metadata->ListGroups()) {
2171 if (android::base::EndsWith(group_name, target_suffix)) {
2172 current_metadata->RemoveGroupAndPartitions(group_name);
2173 }
2174 }
2175
2176 SnapshotMetadataUpdater metadata_updater(target_metadata.get(), target_slot, manifest);
2177 if (!metadata_updater.Update()) {
2178 LOG(ERROR) << "Cannot calculate new metadata.";
2179 return Return::Error();
2180 }
2181
2182 // Delete previous COW partitions in current_metadata so that PartitionCowCreator marks those as
2183 // free regions.
2184 UnmapAndDeleteCowPartition(current_metadata.get());
2185
2186 // Check that all these metadata is not retrofit dynamic partitions. Snapshots on
2187 // devices with retrofit dynamic partitions does not make sense.
2188 // This ensures that current_metadata->GetFreeRegions() uses the same device
2189 // indices as target_metadata (i.e. 0 -> "super").
2190 // This is also assumed in MapCowDevices() call below.
2191 CHECK(current_metadata->GetBlockDevicePartitionName(0) == LP_METADATA_DEFAULT_PARTITION_NAME &&
2192 target_metadata->GetBlockDevicePartitionName(0) == LP_METADATA_DEFAULT_PARTITION_NAME);
2193
2194 std::map<std::string, SnapshotStatus> all_snapshot_status;
2195
2196 // In case of error, automatically delete devices that are created along the way.
2197 // Note that "lock" is destroyed after "created_devices", so it is safe to use |lock| for
2198 // these devices.
2199 AutoDeviceList created_devices;
2200
2201 PartitionCowCreator cow_creator{
2202 .target_metadata = target_metadata.get(),
2203 .target_suffix = target_suffix,
2204 .target_partition = nullptr,
2205 .current_metadata = current_metadata.get(),
2206 .current_suffix = current_suffix,
2207 .operations = nullptr,
2208 .extra_extents = {},
2209 };
2210
2211 auto ret = CreateUpdateSnapshotsInternal(lock.get(), manifest, &cow_creator, &created_devices,
2212 &all_snapshot_status);
2213 if (!ret.is_ok()) return ret;
2214
2215 auto exported_target_metadata = target_metadata->Export();
2216 if (exported_target_metadata == nullptr) {
2217 LOG(ERROR) << "Cannot export target metadata";
2218 return Return::Error();
2219 }
2220
2221 ret = InitializeUpdateSnapshots(lock.get(), target_metadata.get(),
2222 exported_target_metadata.get(), target_suffix,
2223 all_snapshot_status);
2224 if (!ret.is_ok()) return ret;
2225
2226 if (!UpdatePartitionTable(opener, device_->GetSuperDevice(target_slot),
2227 *exported_target_metadata, target_slot)) {
2228 LOG(ERROR) << "Cannot write target metadata";
2229 return Return::Error();
2230 }
2231
2232 created_devices.Release();
2233 LOG(INFO) << "Successfully created all snapshots for target slot " << target_suffix;
2234
2235 return Return::Ok();
2236 }
2237
CreateUpdateSnapshotsInternal(LockedFile * lock,const DeltaArchiveManifest & manifest,PartitionCowCreator * cow_creator,AutoDeviceList * created_devices,std::map<std::string,SnapshotStatus> * all_snapshot_status)2238 Return SnapshotManager::CreateUpdateSnapshotsInternal(
2239 LockedFile* lock, const DeltaArchiveManifest& manifest, PartitionCowCreator* cow_creator,
2240 AutoDeviceList* created_devices,
2241 std::map<std::string, SnapshotStatus>* all_snapshot_status) {
2242 CHECK(lock);
2243
2244 auto* target_metadata = cow_creator->target_metadata;
2245 const auto& target_suffix = cow_creator->target_suffix;
2246
2247 if (!target_metadata->AddGroup(kCowGroupName, 0)) {
2248 LOG(ERROR) << "Cannot add group " << kCowGroupName;
2249 return Return::Error();
2250 }
2251
2252 std::map<std::string, const RepeatedPtrField<InstallOperation>*> install_operation_map;
2253 std::map<std::string, std::vector<Extent>> extra_extents_map;
2254 for (const auto& partition_update : manifest.partitions()) {
2255 auto suffixed_name = partition_update.partition_name() + target_suffix;
2256 auto&& [it, inserted] =
2257 install_operation_map.emplace(suffixed_name, &partition_update.operations());
2258 if (!inserted) {
2259 LOG(ERROR) << "Duplicated partition " << partition_update.partition_name()
2260 << " in update manifest.";
2261 return Return::Error();
2262 }
2263
2264 auto& extra_extents = extra_extents_map[suffixed_name];
2265 if (partition_update.has_hash_tree_extent()) {
2266 extra_extents.push_back(partition_update.hash_tree_extent());
2267 }
2268 if (partition_update.has_fec_extent()) {
2269 extra_extents.push_back(partition_update.fec_extent());
2270 }
2271 }
2272
2273 for (auto* target_partition : ListPartitionsWithSuffix(target_metadata, target_suffix)) {
2274 cow_creator->target_partition = target_partition;
2275 cow_creator->operations = nullptr;
2276 auto operations_it = install_operation_map.find(target_partition->name());
2277 if (operations_it != install_operation_map.end()) {
2278 cow_creator->operations = operations_it->second;
2279 } else {
2280 LOG(INFO) << target_partition->name()
2281 << " isn't included in the payload, skipping the cow creation.";
2282 continue;
2283 }
2284
2285 cow_creator->extra_extents.clear();
2286 auto extra_extents_it = extra_extents_map.find(target_partition->name());
2287 if (extra_extents_it != extra_extents_map.end()) {
2288 cow_creator->extra_extents = std::move(extra_extents_it->second);
2289 }
2290
2291 // Compute the device sizes for the partition.
2292 auto cow_creator_ret = cow_creator->Run();
2293 if (!cow_creator_ret.has_value()) {
2294 return Return::Error();
2295 }
2296
2297 LOG(INFO) << "For partition " << target_partition->name()
2298 << ", device size = " << cow_creator_ret->snapshot_status.device_size()
2299 << ", snapshot size = " << cow_creator_ret->snapshot_status.snapshot_size()
2300 << ", cow partition size = "
2301 << cow_creator_ret->snapshot_status.cow_partition_size()
2302 << ", cow file size = " << cow_creator_ret->snapshot_status.cow_file_size();
2303
2304 // Delete any existing snapshot before re-creating one.
2305 if (!DeleteSnapshot(lock, target_partition->name())) {
2306 LOG(ERROR) << "Cannot delete existing snapshot before creating a new one for partition "
2307 << target_partition->name();
2308 return Return::Error();
2309 }
2310
2311 // It is possible that the whole partition uses free space in super, and snapshot / COW
2312 // would not be needed. In this case, skip the partition.
2313 bool needs_snapshot = cow_creator_ret->snapshot_status.snapshot_size() > 0;
2314 bool needs_cow = (cow_creator_ret->snapshot_status.cow_partition_size() +
2315 cow_creator_ret->snapshot_status.cow_file_size()) > 0;
2316 CHECK(needs_snapshot == needs_cow);
2317
2318 if (!needs_snapshot) {
2319 LOG(INFO) << "Skip creating snapshot for partition " << target_partition->name()
2320 << "because nothing needs to be snapshotted.";
2321 continue;
2322 }
2323
2324 // Store these device sizes to snapshot status file.
2325 if (!CreateSnapshot(lock, &cow_creator_ret->snapshot_status)) {
2326 return Return::Error();
2327 }
2328 created_devices->EmplaceBack<AutoDeleteSnapshot>(this, lock, target_partition->name());
2329
2330 // Create the COW partition. That is, use any remaining free space in super partition before
2331 // creating the COW images.
2332 if (cow_creator_ret->snapshot_status.cow_partition_size() > 0) {
2333 CHECK(cow_creator_ret->snapshot_status.cow_partition_size() % kSectorSize == 0)
2334 << "cow_partition_size == "
2335 << cow_creator_ret->snapshot_status.cow_partition_size()
2336 << " is not a multiple of sector size " << kSectorSize;
2337 auto cow_partition = target_metadata->AddPartition(GetCowName(target_partition->name()),
2338 kCowGroupName, 0 /* flags */);
2339 if (cow_partition == nullptr) {
2340 return Return::Error();
2341 }
2342
2343 if (!target_metadata->ResizePartition(
2344 cow_partition, cow_creator_ret->snapshot_status.cow_partition_size(),
2345 cow_creator_ret->cow_partition_usable_regions)) {
2346 LOG(ERROR) << "Cannot create COW partition on metadata with size "
2347 << cow_creator_ret->snapshot_status.cow_partition_size();
2348 return Return::Error();
2349 }
2350 // Only the in-memory target_metadata is modified; nothing to clean up if there is an
2351 // error in the future.
2352 }
2353
2354 all_snapshot_status->emplace(target_partition->name(),
2355 std::move(cow_creator_ret->snapshot_status));
2356
2357 LOG(INFO) << "Successfully created snapshot partition for " << target_partition->name();
2358 }
2359
2360 LOG(INFO) << "Allocating CoW images.";
2361
2362 for (auto&& [name, snapshot_status] : *all_snapshot_status) {
2363 // Create the backing COW image if necessary.
2364 if (snapshot_status.cow_file_size() > 0) {
2365 auto ret = CreateCowImage(lock, name);
2366 if (!ret.is_ok()) return AddRequiredSpace(ret, *all_snapshot_status);
2367 }
2368
2369 LOG(INFO) << "Successfully created snapshot for " << name;
2370 }
2371
2372 return Return::Ok();
2373 }
2374
InitializeUpdateSnapshots(LockedFile * lock,MetadataBuilder * target_metadata,const LpMetadata * exported_target_metadata,const std::string & target_suffix,const std::map<std::string,SnapshotStatus> & all_snapshot_status)2375 Return SnapshotManager::InitializeUpdateSnapshots(
2376 LockedFile* lock, MetadataBuilder* target_metadata,
2377 const LpMetadata* exported_target_metadata, const std::string& target_suffix,
2378 const std::map<std::string, SnapshotStatus>& all_snapshot_status) {
2379 CHECK(lock);
2380
2381 CreateLogicalPartitionParams cow_params{
2382 .block_device = LP_METADATA_DEFAULT_PARTITION_NAME,
2383 .metadata = exported_target_metadata,
2384 .timeout_ms = std::chrono::milliseconds::max(),
2385 .partition_opener = &device_->GetPartitionOpener(),
2386 };
2387 for (auto* target_partition : ListPartitionsWithSuffix(target_metadata, target_suffix)) {
2388 AutoDeviceList created_devices_for_cow;
2389
2390 if (!UnmapPartitionWithSnapshot(lock, target_partition->name())) {
2391 LOG(ERROR) << "Cannot unmap existing COW devices before re-mapping them for zero-fill: "
2392 << target_partition->name();
2393 return Return::Error();
2394 }
2395
2396 auto it = all_snapshot_status.find(target_partition->name());
2397 if (it == all_snapshot_status.end()) continue;
2398 cow_params.partition_name = target_partition->name();
2399 std::string cow_name;
2400 if (!MapCowDevices(lock, cow_params, it->second, &created_devices_for_cow, &cow_name)) {
2401 return Return::Error();
2402 }
2403
2404 std::string cow_path;
2405 if (!images_->GetMappedImageDevice(cow_name, &cow_path)) {
2406 LOG(ERROR) << "Cannot determine path for " << cow_name;
2407 return Return::Error();
2408 }
2409
2410 auto ret = InitializeCow(cow_path);
2411 if (!ret.is_ok()) {
2412 LOG(ERROR) << "Can't zero-fill COW device for " << target_partition->name() << ": "
2413 << cow_path;
2414 return AddRequiredSpace(ret, all_snapshot_status);
2415 }
2416 // Let destructor of created_devices_for_cow to unmap the COW devices.
2417 };
2418 return Return::Ok();
2419 }
2420
MapUpdateSnapshot(const CreateLogicalPartitionParams & params,std::string * snapshot_path)2421 bool SnapshotManager::MapUpdateSnapshot(const CreateLogicalPartitionParams& params,
2422 std::string* snapshot_path) {
2423 auto lock = LockShared();
2424 if (!lock) return false;
2425 if (!UnmapPartitionWithSnapshot(lock.get(), params.GetPartitionName())) {
2426 LOG(ERROR) << "Cannot unmap existing snapshot before re-mapping it: "
2427 << params.GetPartitionName();
2428 return false;
2429 }
2430 return MapPartitionWithSnapshot(lock.get(), params, snapshot_path);
2431 }
2432
UnmapUpdateSnapshot(const std::string & target_partition_name)2433 bool SnapshotManager::UnmapUpdateSnapshot(const std::string& target_partition_name) {
2434 auto lock = LockShared();
2435 if (!lock) return false;
2436 return UnmapPartitionWithSnapshot(lock.get(), target_partition_name);
2437 }
2438
UnmapAllPartitions()2439 bool SnapshotManager::UnmapAllPartitions() {
2440 auto lock = LockExclusive();
2441 if (!lock) return false;
2442
2443 const auto& opener = device_->GetPartitionOpener();
2444 uint32_t slot = SlotNumberForSlotSuffix(device_->GetSlotSuffix());
2445 auto super_device = device_->GetSuperDevice(slot);
2446 auto metadata = android::fs_mgr::ReadMetadata(opener, super_device, slot);
2447 if (!metadata) {
2448 LOG(ERROR) << "Could not read dynamic partition metadata for device: " << super_device;
2449 return false;
2450 }
2451
2452 bool ok = true;
2453 for (const auto& partition : metadata->partitions) {
2454 auto partition_name = GetPartitionName(partition);
2455 ok &= UnmapPartitionWithSnapshot(lock.get(), partition_name);
2456 }
2457 return ok;
2458 }
2459
operator <<(std::ostream & os,SnapshotManager::Slot slot)2460 std::ostream& operator<<(std::ostream& os, SnapshotManager::Slot slot) {
2461 switch (slot) {
2462 case SnapshotManager::Slot::Unknown:
2463 return os << "unknown";
2464 case SnapshotManager::Slot::Source:
2465 return os << "source";
2466 case SnapshotManager::Slot::Target:
2467 return os << "target";
2468 }
2469 }
2470
Dump(std::ostream & os)2471 bool SnapshotManager::Dump(std::ostream& os) {
2472 // Don't actually lock. Dump() is for debugging purposes only, so it is okay
2473 // if it is racy.
2474 auto file = OpenLock(0 /* lock flag */);
2475 if (!file) return false;
2476
2477 std::stringstream ss;
2478
2479 ss << "Update state: " << ReadUpdateState(file.get()) << std::endl;
2480
2481 ss << "Current slot: " << device_->GetSlotSuffix() << std::endl;
2482 ss << "Boot indicator: booting from " << GetCurrentSlot() << " slot" << std::endl;
2483 ss << "Rollback indicator: "
2484 << (access(GetRollbackIndicatorPath().c_str(), F_OK) == 0 ? "exists" : strerror(errno))
2485 << std::endl;
2486 ss << "Forward merge indicator: "
2487 << (access(GetForwardMergeIndicatorPath().c_str(), F_OK) == 0 ? "exists" : strerror(errno))
2488 << std::endl;
2489
2490 bool ok = true;
2491 std::vector<std::string> snapshots;
2492 if (!ListSnapshots(file.get(), &snapshots)) {
2493 LOG(ERROR) << "Could not list snapshots";
2494 snapshots.clear();
2495 ok = false;
2496 }
2497 for (const auto& name : snapshots) {
2498 ss << "Snapshot: " << name << std::endl;
2499 SnapshotStatus status;
2500 if (!ReadSnapshotStatus(file.get(), name, &status)) {
2501 ok = false;
2502 continue;
2503 }
2504 ss << " state: " << SnapshotState_Name(status.state()) << std::endl;
2505 ss << " device size (bytes): " << status.device_size() << std::endl;
2506 ss << " snapshot size (bytes): " << status.snapshot_size() << std::endl;
2507 ss << " cow partition size (bytes): " << status.cow_partition_size() << std::endl;
2508 ss << " cow file size (bytes): " << status.cow_file_size() << std::endl;
2509 ss << " allocated sectors: " << status.sectors_allocated() << std::endl;
2510 ss << " metadata sectors: " << status.metadata_sectors() << std::endl;
2511 }
2512 os << ss.rdbuf();
2513 return ok;
2514 }
2515
EnsureMetadataMounted()2516 std::unique_ptr<AutoDevice> SnapshotManager::EnsureMetadataMounted() {
2517 if (!device_->IsRecovery()) {
2518 // No need to mount anything in recovery.
2519 LOG(INFO) << "EnsureMetadataMounted does nothing in Android mode.";
2520 return std::unique_ptr<AutoUnmountDevice>(new AutoUnmountDevice());
2521 }
2522 auto ret = AutoUnmountDevice::New(device_->GetMetadataDir());
2523 if (ret == nullptr) return nullptr;
2524
2525 // In rescue mode, it is possible to erase and format metadata, but /metadata/ota is not
2526 // created to execute snapshot updates. Hence, subsequent calls is likely to fail because
2527 // Lock*() fails. By failing early and returning nullptr here, update_engine_sideload can
2528 // treat this case as if /metadata is not mounted.
2529 if (!LockShared()) {
2530 LOG(WARNING) << "/metadata is mounted, but errors occur when acquiring a shared lock. "
2531 "Subsequent calls to SnapshotManager will fail. Unmounting /metadata now.";
2532 return nullptr;
2533 }
2534 return ret;
2535 }
2536
HandleImminentDataWipe(const std::function<void ()> & callback)2537 bool SnapshotManager::HandleImminentDataWipe(const std::function<void()>& callback) {
2538 if (!device_->IsRecovery()) {
2539 LOG(ERROR) << "Data wipes are only allowed in recovery.";
2540 return false;
2541 }
2542
2543 auto mount = EnsureMetadataMounted();
2544 if (!mount || !mount->HasDevice()) {
2545 // We allow the wipe to continue, because if we can't mount /metadata,
2546 // it is unlikely the device would have booted anyway. If there is no
2547 // metadata partition, then the device predates Virtual A/B.
2548 return true;
2549 }
2550
2551 // Check this early, so we don't accidentally start trying to populate
2552 // the state file in recovery. Note we don't call GetUpdateState since
2553 // we want errors in acquiring the lock to be propagated, instead of
2554 // returning UpdateState::None.
2555 auto state_file = GetStateFilePath();
2556 if (access(state_file.c_str(), F_OK) != 0 && errno == ENOENT) {
2557 return true;
2558 }
2559
2560 auto slot_number = SlotNumberForSlotSuffix(device_->GetSlotSuffix());
2561 auto super_path = device_->GetSuperDevice(slot_number);
2562 if (!CreateLogicalAndSnapshotPartitions(super_path)) {
2563 LOG(ERROR) << "Unable to map partitions to complete merge.";
2564 return false;
2565 }
2566
2567 auto process_callback = [&]() -> bool {
2568 if (callback) {
2569 callback();
2570 }
2571 return true;
2572 };
2573
2574 in_factory_data_reset_ = true;
2575 bool ok = ProcessUpdateStateOnDataWipe(true /* allow_forward_merge */, process_callback);
2576 in_factory_data_reset_ = false;
2577
2578 if (!ok) {
2579 return false;
2580 }
2581
2582 // Nothing should be depending on partitions now, so unmap them all.
2583 if (!UnmapAllPartitions()) {
2584 LOG(ERROR) << "Unable to unmap all partitions; fastboot may fail to flash.";
2585 }
2586 return true;
2587 }
2588
FinishMergeInRecovery()2589 bool SnapshotManager::FinishMergeInRecovery() {
2590 if (!device_->IsRecovery()) {
2591 LOG(ERROR) << "Data wipes are only allowed in recovery.";
2592 return false;
2593 }
2594
2595 auto mount = EnsureMetadataMounted();
2596 if (!mount || !mount->HasDevice()) {
2597 return false;
2598 }
2599
2600 auto slot_number = SlotNumberForSlotSuffix(device_->GetSlotSuffix());
2601 auto super_path = device_->GetSuperDevice(slot_number);
2602 if (!CreateLogicalAndSnapshotPartitions(super_path)) {
2603 LOG(ERROR) << "Unable to map partitions to complete merge.";
2604 return false;
2605 }
2606
2607 UpdateState state = ProcessUpdateState();
2608 if (state != UpdateState::MergeCompleted) {
2609 LOG(ERROR) << "Merge returned unexpected status: " << state;
2610 return false;
2611 }
2612
2613 // Nothing should be depending on partitions now, so unmap them all.
2614 if (!UnmapAllPartitions()) {
2615 LOG(ERROR) << "Unable to unmap all partitions; fastboot may fail to flash.";
2616 }
2617 return true;
2618 }
2619
ProcessUpdateStateOnDataWipe(bool allow_forward_merge,const std::function<bool ()> & callback)2620 bool SnapshotManager::ProcessUpdateStateOnDataWipe(bool allow_forward_merge,
2621 const std::function<bool()>& callback) {
2622 auto slot_number = SlotNumberForSlotSuffix(device_->GetSlotSuffix());
2623 UpdateState state = ProcessUpdateState(callback);
2624 LOG(INFO) << "Update state in recovery: " << state;
2625 switch (state) {
2626 case UpdateState::MergeFailed:
2627 LOG(ERROR) << "Unrecoverable merge failure detected.";
2628 return false;
2629 case UpdateState::Unverified: {
2630 // If an OTA was just applied but has not yet started merging:
2631 //
2632 // - if forward merge is allowed, initiate merge and call
2633 // ProcessUpdateState again.
2634 //
2635 // - if forward merge is not allowed, we
2636 // have no choice but to revert slots, because the current slot will
2637 // immediately become unbootable. Rather than wait for the device
2638 // to reboot N times until a rollback, we proactively disable the
2639 // new slot instead.
2640 //
2641 // Since the rollback is inevitable, we don't treat a HAL failure
2642 // as an error here.
2643 auto slot = GetCurrentSlot();
2644 if (slot == Slot::Target) {
2645 if (allow_forward_merge &&
2646 access(GetForwardMergeIndicatorPath().c_str(), F_OK) == 0) {
2647 LOG(INFO) << "Forward merge allowed, initiating merge now.";
2648 return InitiateMerge() &&
2649 ProcessUpdateStateOnDataWipe(false /* allow_forward_merge */, callback);
2650 }
2651
2652 LOG(ERROR) << "Reverting to old slot since update will be deleted.";
2653 device_->SetSlotAsUnbootable(slot_number);
2654 } else {
2655 LOG(INFO) << "Booting from " << slot << " slot, no action is taken.";
2656 }
2657 break;
2658 }
2659 case UpdateState::MergeNeedsReboot:
2660 // We shouldn't get here, because nothing is depending on
2661 // logical partitions.
2662 LOG(ERROR) << "Unexpected merge-needs-reboot state in recovery.";
2663 break;
2664 default:
2665 break;
2666 }
2667 return true;
2668 }
2669
EnsureNoOverflowSnapshot(LockedFile * lock)2670 bool SnapshotManager::EnsureNoOverflowSnapshot(LockedFile* lock) {
2671 CHECK(lock);
2672
2673 std::vector<std::string> snapshots;
2674 if (!ListSnapshots(lock, &snapshots)) {
2675 LOG(ERROR) << "Could not list snapshots.";
2676 return false;
2677 }
2678
2679 auto& dm = DeviceMapper::Instance();
2680 for (const auto& snapshot : snapshots) {
2681 std::vector<DeviceMapper::TargetInfo> targets;
2682 if (!dm.GetTableStatus(snapshot, &targets)) {
2683 LOG(ERROR) << "Could not read snapshot device table: " << snapshot;
2684 return false;
2685 }
2686 if (targets.size() != 1) {
2687 LOG(ERROR) << "Unexpected device-mapper table for snapshot: " << snapshot
2688 << ", size = " << targets.size();
2689 return false;
2690 }
2691 if (targets[0].IsOverflowSnapshot()) {
2692 LOG(ERROR) << "Detected overflow in snapshot " << snapshot
2693 << ", CoW device size computation is wrong!";
2694 return false;
2695 }
2696 }
2697
2698 return true;
2699 }
2700
RecoveryCreateSnapshotDevices()2701 CreateResult SnapshotManager::RecoveryCreateSnapshotDevices() {
2702 if (!device_->IsRecovery()) {
2703 LOG(ERROR) << __func__ << " is only allowed in recovery.";
2704 return CreateResult::NOT_CREATED;
2705 }
2706
2707 auto mount = EnsureMetadataMounted();
2708 if (!mount || !mount->HasDevice()) {
2709 LOG(ERROR) << "Couldn't mount Metadata.";
2710 return CreateResult::NOT_CREATED;
2711 }
2712 return RecoveryCreateSnapshotDevices(mount);
2713 }
2714
RecoveryCreateSnapshotDevices(const std::unique_ptr<AutoDevice> & metadata_device)2715 CreateResult SnapshotManager::RecoveryCreateSnapshotDevices(
2716 const std::unique_ptr<AutoDevice>& metadata_device) {
2717 if (!device_->IsRecovery()) {
2718 LOG(ERROR) << __func__ << " is only allowed in recovery.";
2719 return CreateResult::NOT_CREATED;
2720 }
2721
2722 if (metadata_device == nullptr || !metadata_device->HasDevice()) {
2723 LOG(ERROR) << "Metadata not mounted.";
2724 return CreateResult::NOT_CREATED;
2725 }
2726
2727 auto state_file = GetStateFilePath();
2728 if (access(state_file.c_str(), F_OK) != 0 && errno == ENOENT) {
2729 LOG(ERROR) << "Couldn't access state file.";
2730 return CreateResult::NOT_CREATED;
2731 }
2732
2733 if (!NeedSnapshotsInFirstStageMount()) {
2734 return CreateResult::NOT_CREATED;
2735 }
2736
2737 auto slot_suffix = device_->GetOtherSlotSuffix();
2738 auto slot_number = SlotNumberForSlotSuffix(slot_suffix);
2739 auto super_path = device_->GetSuperDevice(slot_number);
2740 if (!CreateLogicalAndSnapshotPartitions(super_path)) {
2741 LOG(ERROR) << "Unable to map partitions.";
2742 return CreateResult::ERROR;
2743 }
2744 return CreateResult::CREATED;
2745 }
2746
UpdateForwardMergeIndicator(bool wipe)2747 bool SnapshotManager::UpdateForwardMergeIndicator(bool wipe) {
2748 auto path = GetForwardMergeIndicatorPath();
2749
2750 if (!wipe) {
2751 LOG(INFO) << "Wipe is not scheduled. Deleting forward merge indicator.";
2752 return RemoveFileIfExists(path);
2753 }
2754
2755 // TODO(b/152094219): Don't forward merge if no CoW file is allocated.
2756
2757 LOG(INFO) << "Wipe will be scheduled. Allowing forward merge of snapshots.";
2758 if (!android::base::WriteStringToFile("1", path)) {
2759 PLOG(ERROR) << "Unable to write forward merge indicator: " << path;
2760 return false;
2761 }
2762
2763 return true;
2764 }
2765
GetSnapshotMergeStatsInstance()2766 ISnapshotMergeStats* SnapshotManager::GetSnapshotMergeStatsInstance() {
2767 return SnapshotMergeStats::GetInstance(*this);
2768 }
2769
GetMappedImageDeviceStringOrPath(const std::string & device_name,std::string * device_string_or_mapped_path)2770 bool SnapshotManager::GetMappedImageDeviceStringOrPath(const std::string& device_name,
2771 std::string* device_string_or_mapped_path) {
2772 auto& dm = DeviceMapper::Instance();
2773 // Try getting the device string if it is a device mapper device.
2774 if (dm.GetState(device_name) != DmDeviceState::INVALID) {
2775 return dm.GetDeviceString(device_name, device_string_or_mapped_path);
2776 }
2777
2778 // Otherwise, get path from IImageManager.
2779 if (!images_->GetMappedImageDevice(device_name, device_string_or_mapped_path)) {
2780 return false;
2781 }
2782
2783 LOG(WARNING) << "Calling GetMappedImageDevice with local image manager; device "
2784 << (device_string_or_mapped_path ? *device_string_or_mapped_path : "(nullptr)")
2785 << "may not be available in first stage init! ";
2786 return true;
2787 }
2788
2789 } // namespace snapshot
2790 } // namespace android
2791