1# Copyright 2019 - The Android Open Source Project 2# 3# Licensed under the Apache License, Version 2.0 (the "License"); 4# you may not use this file except in compliance with the License. 5# You may obtain a copy of the License at 6# 7# http://www.apache.org/licenses/LICENSE-2.0 8# 9# Unless required by applicable law or agreed to in writing, software 10# distributed under the License is distributed on an "AS IS" BASIS, 11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12# See the License for the specific language governing permissions and 13# limitations under the License. 14"""A client that manages Cuttlefish Virtual Device on compute engine. 15 16** CvdComputeClient ** 17 18CvdComputeClient derives from AndroidComputeClient. It manges a google 19compute engine project that is setup for running Cuttlefish Virtual Devices. 20It knows how to create a host instance from Cuttlefish Stable Host Image, fetch 21Android build, and start Android within the host instance. 22 23** Class hierarchy ** 24 25 base_cloud_client.BaseCloudApiClient 26 ^ 27 | 28 gcompute_client.ComputeClient 29 ^ 30 | 31 android_compute_client.AndroidComputeClient 32 ^ 33 | 34 cvd_compute_client_multi_stage.CvdComputeClient 35 36""" 37 38import logging 39import os 40import ssl 41import stat 42import subprocess 43import tempfile 44import time 45 46from acloud import errors 47from acloud.internal import constants 48from acloud.internal.lib import android_build_client 49from acloud.internal.lib import android_compute_client 50from acloud.internal.lib import gcompute_client 51from acloud.internal.lib import utils 52from acloud.internal.lib.ssh import Ssh 53from acloud.pull import pull 54 55 56logger = logging.getLogger(__name__) 57 58_DECOMPRESS_KERNEL_ARG = "-decompress_kernel=true" 59_GPU_ARG = "-gpu_mode=drm_virgl" 60_AGREEMENT_PROMPT_ARGS = ["-undefok=report_anonymous_usage_stats", 61 "-report_anonymous_usage_stats=y"] 62_NUM_AVDS_ARG = "-num_instances=%(num_AVD)s" 63_DEFAULT_BRANCH = "aosp-master" 64_FETCHER_BUILD_TARGET = "aosp_cf_x86_phone-userdebug" 65_FETCHER_NAME = "fetch_cvd" 66# Time info to write in report. 67_FETCH_ARTIFACT = "fetch_artifact_time" 68_GCE_CREATE = "gce_create_time" 69_LAUNCH_CVD = "launch_cvd_time" 70# WebRTC args for launching AVD 71_GUEST_ENFORCE_SECURITY_FALSE = "--guest_enforce_security=false" 72_START_WEBRTC = "--start_webrtc" 73_VM_MANAGER = "--vm_manager=crosvm" 74_WEBRTC_ARGS = [_GUEST_ENFORCE_SECURITY_FALSE, _START_WEBRTC, _VM_MANAGER] 75_NO_RETRY = 0 76_MAX_RETRY = 3 77_RETRY_SLEEP_SECS = 3 78# Launch cvd command for acloud report 79_LAUNCH_CVD_COMMAND = "launch_cvd_command" 80 81 82def _ProcessBuild(build_id=None, branch=None, build_target=None): 83 """Create a Cuttlefish fetch_cvd build string. 84 85 Args: 86 build_id: A specific build number to load from. Takes precedence over `branch`. 87 branch: A manifest-branch at which to get the latest build. 88 build_target: A particular device to load at the desired build. 89 90 Returns: 91 A string, used in the fetch_cvd cmd or None if all args are None. 92 """ 93 if not build_target: 94 return build_id or branch 95 elif build_target and not branch: 96 branch = _DEFAULT_BRANCH 97 return (build_id or branch) + "/" + build_target 98 99 100class CvdComputeClient(android_compute_client.AndroidComputeClient): 101 """Client that manages Android Virtual Device.""" 102 103 DATA_POLICY_CREATE_IF_MISSING = "create_if_missing" 104 105 def __init__(self, 106 acloud_config, 107 oauth2_credentials, 108 boot_timeout_secs=None, 109 ins_timeout_secs=None, 110 report_internal_ip=None, 111 gpu=None): 112 """Initialize. 113 114 Args: 115 acloud_config: An AcloudConfig object. 116 oauth2_credentials: An oauth2client.OAuth2Credentials instance. 117 boot_timeout_secs: Integer, the maximum time to wait for the AVD 118 to boot up. 119 ins_timeout_secs: Integer, the maximum time to wait for the 120 instance ready. 121 report_internal_ip: Boolean to report the internal ip instead of 122 external ip. 123 gpu: String, GPU to attach to the device. 124 """ 125 super(CvdComputeClient, self).__init__(acloud_config, oauth2_credentials) 126 127 self._fetch_cvd_version = acloud_config.fetch_cvd_version 128 self._build_api = ( 129 android_build_client.AndroidBuildClient(oauth2_credentials)) 130 self._ssh_private_key_path = acloud_config.ssh_private_key_path 131 self._boot_timeout_secs = boot_timeout_secs 132 self._ins_timeout_secs = ins_timeout_secs 133 self._report_internal_ip = report_internal_ip 134 self._gpu = gpu 135 # Store all failures result when creating one or multiple instances. 136 self._all_failures = dict() 137 self._extra_args_ssh_tunnel = acloud_config.extra_args_ssh_tunnel 138 self._ssh = None 139 self._ip = None 140 self._user = constants.GCE_USER 141 self._execution_time = {_FETCH_ARTIFACT: 0, _GCE_CREATE: 0, _LAUNCH_CVD: 0} 142 143 def InitRemoteHost(self, ssh, ip, user): 144 """Init remote host. 145 146 Check if we can ssh to the remote host, stop any cf instances running 147 on it, and remove existing files. 148 149 Args: 150 ssh: Ssh object. 151 ip: namedtuple (internal, external) that holds IP address of the 152 remote host, e.g. "external:140.110.20.1, internal:10.0.0.1" 153 user: String of user log in to the instance. 154 """ 155 self._ssh = ssh 156 self._ip = ip 157 self._user = user 158 self._ssh.WaitForSsh(timeout=self._ins_timeout_secs) 159 self.StopCvd() 160 self.CleanUp() 161 162 # pylint: disable=arguments-differ,too-many-locals,broad-except 163 def CreateInstance(self, instance, image_name, image_project, 164 build_target=None, branch=None, build_id=None, 165 kernel_branch=None, kernel_build_id=None, 166 kernel_build_target=None, blank_data_disk_size_gb=None, 167 avd_spec=None, extra_scopes=None, 168 system_build_target=None, system_branch=None, 169 system_build_id=None): 170 171 """Create/Reuse a single configured cuttlefish device. 172 1. Prepare GCE instance. 173 Create a new instnace or get IP address for reusing the specific instance. 174 2. Put fetch_cvd on the instance. 175 3. Invoke fetch_cvd to fetch and run the instance. 176 177 Args: 178 instance: instance name. 179 image_name: A string, the name of the GCE image. 180 image_project: A string, name of the project where the image lives. 181 Assume the default project if None. 182 build_target: Target name, e.g. "aosp_cf_x86_phone-userdebug" 183 branch: Branch name, e.g. "aosp-master" 184 build_id: Build id, a string, e.g. "2263051", "P2804227" 185 kernel_branch: Kernel branch name, e.g. "kernel-common-android-4.14" 186 kernel_build_id: Kernel build id, a string, e.g. "223051", "P280427" 187 kernel_build_target: String, Kernel build target name. 188 blank_data_disk_size_gb: Size of the blank data disk in GB. 189 avd_spec: An AVDSpec instance. 190 extra_scopes: A list of extra scopes to be passed to the instance. 191 system_build_target: Target name for the system image, 192 e.g. "cf_x86_phone-userdebug" 193 system_branch: A String, branch name for the system image. 194 system_build_id: A string, build id for the system image. 195 196 Returns: 197 A string, representing instance name. 198 """ 199 200 # A blank data disk would be created on the host. Make sure the size of 201 # the boot disk is large enough to hold it. 202 boot_disk_size_gb = ( 203 int(self.GetImage(image_name, image_project)["diskSizeGb"]) + 204 blank_data_disk_size_gb) 205 206 if avd_spec and avd_spec.instance_name_to_reuse: 207 self._ip = self._ReusingGceInstance(avd_spec) 208 else: 209 self._VerifyZoneByQuota() 210 self._ip = self._CreateGceInstance(instance, image_name, image_project, 211 extra_scopes, boot_disk_size_gb, 212 avd_spec) 213 self._ssh = Ssh(ip=self._ip, 214 user=constants.GCE_USER, 215 ssh_private_key_path=self._ssh_private_key_path, 216 extra_args_ssh_tunnel=self._extra_args_ssh_tunnel, 217 report_internal_ip=self._report_internal_ip) 218 try: 219 self._ssh.WaitForSsh(timeout=self._ins_timeout_secs) 220 if avd_spec: 221 if avd_spec.instance_name_to_reuse: 222 self.StopCvd() 223 self.CleanUp() 224 return instance 225 226 # TODO: Remove following code after create_cf deprecated. 227 self.UpdateFetchCvd() 228 229 self.FetchBuild(build_id, branch, build_target, system_build_id, 230 system_branch, system_build_target, kernel_build_id, 231 kernel_branch, kernel_build_target) 232 kernel_build = self.GetKernelBuild(kernel_build_id, 233 kernel_branch, 234 kernel_build_target) 235 self.LaunchCvd(instance, 236 blank_data_disk_size_gb=blank_data_disk_size_gb, 237 kernel_build=kernel_build, 238 boot_timeout_secs=self._boot_timeout_secs) 239 240 return instance 241 except Exception as e: 242 self._all_failures[instance] = e 243 return instance 244 245 def _GetLaunchCvdArgs(self, avd_spec=None, blank_data_disk_size_gb=None, 246 kernel_build=None, decompress_kernel=None): 247 """Get launch_cvd args. 248 249 Args: 250 avd_spec: An AVDSpec instance. 251 blank_data_disk_size_gb: Size of the blank data disk in GB. 252 kernel_build: String, kernel build info. 253 decompress_kernel: Boolean, if true decompress the kernel. 254 255 Returns: 256 String, args of launch_cvd. 257 """ 258 launch_cvd_args = [] 259 if blank_data_disk_size_gb and blank_data_disk_size_gb > 0: 260 # Policy 'create_if_missing' would create a blank userdata disk if 261 # missing. If already exist, reuse the disk. 262 launch_cvd_args.append( 263 "-data_policy=" + self.DATA_POLICY_CREATE_IF_MISSING) 264 launch_cvd_args.append( 265 "-blank_data_image_mb=%d" % (blank_data_disk_size_gb * 1024)) 266 if avd_spec: 267 launch_cvd_args.append( 268 "-x_res=" + avd_spec.hw_property[constants.HW_X_RES]) 269 launch_cvd_args.append( 270 "-y_res=" + avd_spec.hw_property[constants.HW_Y_RES]) 271 launch_cvd_args.append( 272 "-dpi=" + avd_spec.hw_property[constants.HW_ALIAS_DPI]) 273 if constants.HW_ALIAS_DISK in avd_spec.hw_property: 274 launch_cvd_args.append( 275 "-data_policy=" + self.DATA_POLICY_CREATE_IF_MISSING) 276 launch_cvd_args.append( 277 "-blank_data_image_mb=" 278 + avd_spec.hw_property[constants.HW_ALIAS_DISK]) 279 if constants.HW_ALIAS_CPUS in avd_spec.hw_property: 280 launch_cvd_args.append( 281 "-cpus=%s" % avd_spec.hw_property[constants.HW_ALIAS_CPUS]) 282 if constants.HW_ALIAS_MEMORY in avd_spec.hw_property: 283 launch_cvd_args.append( 284 "-memory_mb=%s" % avd_spec.hw_property[constants.HW_ALIAS_MEMORY]) 285 if avd_spec.connect_webrtc: 286 launch_cvd_args.extend(_WEBRTC_ARGS) 287 if avd_spec.num_avds_per_instance > 1: 288 launch_cvd_args.append( 289 _NUM_AVDS_ARG % {"num_AVD": avd_spec.num_avds_per_instance}) 290 else: 291 resolution = self._resolution.split("x") 292 launch_cvd_args.append("-x_res=" + resolution[0]) 293 launch_cvd_args.append("-y_res=" + resolution[1]) 294 launch_cvd_args.append("-dpi=" + resolution[3]) 295 296 if kernel_build: 297 launch_cvd_args.append("-kernel_path=kernel") 298 299 if self._launch_args: 300 launch_cvd_args.append(self._launch_args) 301 302 if decompress_kernel: 303 launch_cvd_args.append(_DECOMPRESS_KERNEL_ARG) 304 305 if self._gpu: 306 launch_cvd_args.append(_GPU_ARG) 307 308 launch_cvd_args.extend(_AGREEMENT_PROMPT_ARGS) 309 return launch_cvd_args 310 311 @staticmethod 312 def GetKernelBuild(kernel_build_id, kernel_branch, kernel_build_target): 313 """Get kernel build args for fetch_cvd. 314 315 Args: 316 kernel_branch: Kernel branch name, e.g. "kernel-common-android-4.14" 317 kernel_build_id: Kernel build id, a string, e.g. "223051", "P280427" 318 kernel_build_target: String, Kernel build target name. 319 320 Returns: 321 String of kernel build args for fetch_cvd. 322 If no kernel build then return None. 323 """ 324 # kernel_target have default value "kernel". If user provide kernel_build_id 325 # or kernel_branch, then start to process kernel image. 326 if kernel_build_id or kernel_branch: 327 return _ProcessBuild(kernel_build_id, kernel_branch, kernel_build_target) 328 return None 329 330 def StopCvd(self): 331 """Stop CVD. 332 333 If stop_cvd fails, assume that it's because there was no previously 334 running device. 335 """ 336 ssh_command = "./bin/stop_cvd" 337 try: 338 self._ssh.Run(ssh_command) 339 except subprocess.CalledProcessError as e: 340 logger.debug("Failed to stop_cvd (possibly no running device): %s", e) 341 342 def CleanUp(self): 343 """Clean up the files/folders on the existing instance. 344 345 If previous AVD have these files/folders, reusing the instance may have 346 side effects if not cleaned. The path in the instance is /home/vsoc-01/* 347 if the GCE user is vsoc-01. 348 """ 349 350 ssh_command = "'/bin/rm -rf /home/%s/*'" % self._user 351 try: 352 self._ssh.Run(ssh_command) 353 except subprocess.CalledProcessError as e: 354 logger.debug("Failed to clean up the files/folders: %s", e) 355 356 @utils.TimeExecute(function_description="Launching AVD(s) and waiting for boot up", 357 result_evaluator=utils.BootEvaluator) 358 def LaunchCvd(self, instance, avd_spec=None, 359 blank_data_disk_size_gb=None, kernel_build=None, 360 decompress_kernel=None, 361 boot_timeout_secs=None): 362 """Launch CVD. 363 364 Launch AVD with launch_cvd. If the process is failed, acloud would show 365 error messages and auto download log files from remote instance. 366 367 Args: 368 instance: String, instance name. 369 avd_spec: An AVDSpec instance. 370 blank_data_disk_size_gb: Size of the blank data disk in GB. 371 kernel_build: String, kernel build info. 372 decompress_kernel: Boolean, if true decompress the kernel. 373 boot_timeout_secs: Integer, the maximum time to wait for the 374 command to respond. 375 376 Returns: 377 dict of faliures, return this dict for BootEvaluator to handle 378 LaunchCvd success or fail messages. 379 """ 380 timestart = time.time() 381 error_msg = "" 382 launch_cvd_args = self._GetLaunchCvdArgs(avd_spec, 383 blank_data_disk_size_gb, 384 kernel_build, 385 decompress_kernel) 386 boot_timeout_secs = boot_timeout_secs or constants.DEFAULT_CF_BOOT_TIMEOUT 387 ssh_command = "./bin/launch_cvd -daemon " + " ".join(launch_cvd_args) 388 try: 389 self.ExtendReportData(_LAUNCH_CVD_COMMAND, ssh_command) 390 self._ssh.Run(ssh_command, boot_timeout_secs, retry=_NO_RETRY) 391 except (subprocess.CalledProcessError, errors.DeviceConnectionError) as e: 392 # TODO(b/140475060): Distinguish the error is command return error 393 # or timeout error. 394 error_msg = ("Device %s did not finish on boot within timeout (%s secs)" 395 % (instance, boot_timeout_secs)) 396 self._all_failures[instance] = error_msg 397 utils.PrintColorString(str(e), utils.TextColors.FAIL) 398 if avd_spec and not avd_spec.no_pull_log: 399 self._PullAllLogFiles(instance) 400 401 self._execution_time[_LAUNCH_CVD] = round(time.time() - timestart, 2) 402 return {instance: error_msg} if error_msg else {} 403 404 def _PullAllLogFiles(self, instance): 405 """Pull all log files from instance. 406 407 1. Download log files to temp folder. 408 2. Show messages about the download folder for users. 409 410 Args: 411 instance: String, instance name. 412 """ 413 log_files = pull.GetAllLogFilePaths(self._ssh) 414 error_log_folder = pull.GetDownloadLogFolder(instance) 415 pull.PullLogs(self._ssh, log_files, error_log_folder) 416 self.ExtendReportData(constants.ERROR_LOG_FOLDER, error_log_folder) 417 418 @utils.TimeExecute(function_description="Reusing GCE instance") 419 def _ReusingGceInstance(self, avd_spec): 420 """Reusing a cuttlefish existing instance. 421 422 Args: 423 avd_spec: An AVDSpec instance. 424 425 Returns: 426 ssh.IP object, that stores internal and external ip of the instance. 427 """ 428 gcompute_client.ComputeClient.AddSshRsaInstanceMetadata( 429 self, constants.GCE_USER, avd_spec.cfg.ssh_public_key_path, 430 avd_spec.instance_name_to_reuse) 431 ip = gcompute_client.ComputeClient.GetInstanceIP( 432 self, instance=avd_spec.instance_name_to_reuse, zone=self._zone) 433 434 return ip 435 436 @utils.TimeExecute(function_description="Creating GCE instance") 437 def _CreateGceInstance(self, instance, image_name, image_project, 438 extra_scopes, boot_disk_size_gb, avd_spec): 439 """Create a single configured cuttlefish device. 440 441 Override method from parent class. 442 Args: 443 instance: String, instance name. 444 image_name: String, the name of the GCE image. 445 image_project: String, the name of the project where the image. 446 extra_scopes: A list of extra scopes to be passed to the instance. 447 boot_disk_size_gb: Integer, size of the boot disk in GB. 448 avd_spec: An AVDSpec instance. 449 450 Returns: 451 ssh.IP object, that stores internal and external ip of the instance. 452 """ 453 timestart = time.time() 454 metadata = self._metadata.copy() 455 456 if avd_spec: 457 metadata[constants.INS_KEY_AVD_TYPE] = avd_spec.avd_type 458 metadata[constants.INS_KEY_AVD_FLAVOR] = avd_spec.flavor 459 metadata[constants.INS_KEY_DISPLAY] = ("%sx%s (%s)" % ( 460 avd_spec.hw_property[constants.HW_X_RES], 461 avd_spec.hw_property[constants.HW_Y_RES], 462 avd_spec.hw_property[constants.HW_ALIAS_DPI])) 463 464 disk_args = self._GetDiskArgs( 465 instance, image_name, image_project, boot_disk_size_gb) 466 gcompute_client.ComputeClient.CreateInstance( 467 self, 468 instance=instance, 469 image_name=image_name, 470 image_project=image_project, 471 disk_args=disk_args, 472 metadata=metadata, 473 machine_type=self._machine_type, 474 network=self._network, 475 zone=self._zone, 476 gpu=self._gpu, 477 extra_scopes=extra_scopes, 478 tags=["appstreaming"] if ( 479 avd_spec and avd_spec.connect_webrtc) else None) 480 ip = gcompute_client.ComputeClient.GetInstanceIP( 481 self, instance=instance, zone=self._zone) 482 logger.debug("'instance_ip': %s", ip.internal 483 if self._report_internal_ip else ip.external) 484 485 self._execution_time[_GCE_CREATE] = round(time.time() - timestart, 2) 486 return ip 487 488 @utils.TimeExecute(function_description="Uploading build fetcher to instance") 489 def UpdateFetchCvd(self): 490 """Download fetch_cvd from the Build API, and upload it to a remote instance. 491 492 The version of fetch_cvd to use is retrieved from the configuration file. Once fetch_cvd 493 is on the instance, future commands can use it to download relevant Cuttlefish files from 494 the Build API on the instance itself. 495 """ 496 # TODO(schuffelen): Support fetch_cvd_version="latest" when there is 497 # stronger automated testing on it. 498 download_dir = tempfile.mkdtemp() 499 download_target = os.path.join(download_dir, _FETCHER_NAME) 500 utils.RetryExceptionType( 501 exception_types=ssl.SSLError, 502 max_retries=_MAX_RETRY, 503 functor=self._build_api.DownloadArtifact, 504 sleep_multiplier=_RETRY_SLEEP_SECS, 505 retry_backoff_factor=utils.DEFAULT_RETRY_BACKOFF_FACTOR, 506 build_target=_FETCHER_BUILD_TARGET, 507 build_id=self._fetch_cvd_version, 508 resource_id=_FETCHER_NAME, 509 local_dest=download_target, 510 attempt_id="latest") 511 fetch_cvd_stat = os.stat(download_target) 512 os.chmod(download_target, fetch_cvd_stat.st_mode | stat.S_IEXEC) 513 self._ssh.ScpPushFile(src_file=download_target, dst_file=_FETCHER_NAME) 514 os.remove(download_target) 515 os.rmdir(download_dir) 516 517 @utils.TimeExecute(function_description="Downloading build on instance") 518 def FetchBuild(self, build_id, branch, build_target, system_build_id, 519 system_branch, system_build_target, kernel_build_id, 520 kernel_branch, kernel_build_target): 521 """Execute fetch_cvd on the remote instance to get Cuttlefish runtime files. 522 523 Args: 524 fetch_args: String of arguments to pass to fetch_cvd. 525 """ 526 timestart = time.time() 527 fetch_cvd_args = ["-credential_source=gce"] 528 529 default_build = _ProcessBuild(build_id, branch, build_target) 530 if default_build: 531 fetch_cvd_args.append("-default_build=" + default_build) 532 system_build = _ProcessBuild(system_build_id, system_branch, system_build_target) 533 if system_build: 534 fetch_cvd_args.append("-system_build=" + system_build) 535 kernel_build = self.GetKernelBuild(kernel_build_id, 536 kernel_branch, 537 kernel_build_target) 538 if kernel_build: 539 fetch_cvd_args.append("-kernel_build=" + kernel_build) 540 541 self._ssh.Run("./fetch_cvd " + " ".join(fetch_cvd_args), 542 timeout=constants.DEFAULT_SSH_TIMEOUT) 543 self._execution_time[_FETCH_ARTIFACT] = round(time.time() - timestart, 2) 544 545 def GetInstanceIP(self, instance=None): 546 """Override method from parent class. 547 548 It need to get the IP address in the common_operation. If the class 549 already defind the ip address, return the ip address. 550 551 Args: 552 instance: String, representing instance name. 553 554 Returns: 555 ssh.IP object, that stores internal and external ip of the instance. 556 """ 557 if self._ip: 558 return self._ip 559 return gcompute_client.ComputeClient.GetInstanceIP( 560 self, instance=instance, zone=self._zone) 561 562 def GetHostImageName(self, stable_image_name, image_family, image_project): 563 """Get host image name. 564 565 Args: 566 stable_image_name: String of stable host image name. 567 image_family: String of image family. 568 image_project: String of image project. 569 570 Returns: 571 String of stable host image name. 572 573 Raises: 574 errors.ConfigError: There is no host image name in config file. 575 """ 576 if stable_image_name: 577 return stable_image_name 578 579 if image_family: 580 image_name = gcompute_client.ComputeClient.GetImageFromFamily( 581 self, image_family, image_project)["name"] 582 logger.debug("Get the host image name from image family: %s", image_name) 583 return image_name 584 585 raise errors.ConfigError( 586 "Please specify 'stable_host_image_name' or 'stable_host_image_family'" 587 " in config.") 588 589 @property 590 def all_failures(self): 591 """Return all_failures""" 592 return self._all_failures 593 594 @property 595 def execution_time(self): 596 """Return execution_time""" 597 return self._execution_time 598