1# Copyright 2019 - The Android Open Source Project
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7#     http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14"""A client that manages Cuttlefish Virtual Device on compute engine.
15
16** CvdComputeClient **
17
18CvdComputeClient derives from AndroidComputeClient. It manges a google
19compute engine project that is setup for running Cuttlefish Virtual Devices.
20It knows how to create a host instance from Cuttlefish Stable Host Image, fetch
21Android build, and start Android within the host instance.
22
23** Class hierarchy **
24
25  base_cloud_client.BaseCloudApiClient
26                ^
27                |
28       gcompute_client.ComputeClient
29                ^
30                |
31       android_compute_client.AndroidComputeClient
32                ^
33                |
34       cvd_compute_client_multi_stage.CvdComputeClient
35
36"""
37
38import logging
39import os
40import ssl
41import stat
42import subprocess
43import tempfile
44import time
45
46from acloud import errors
47from acloud.internal import constants
48from acloud.internal.lib import android_build_client
49from acloud.internal.lib import android_compute_client
50from acloud.internal.lib import gcompute_client
51from acloud.internal.lib import utils
52from acloud.internal.lib.ssh import Ssh
53from acloud.pull import pull
54
55
56logger = logging.getLogger(__name__)
57
58_DECOMPRESS_KERNEL_ARG = "-decompress_kernel=true"
59_GPU_ARG = "-gpu_mode=drm_virgl"
60_AGREEMENT_PROMPT_ARGS = ["-undefok=report_anonymous_usage_stats",
61                          "-report_anonymous_usage_stats=y"]
62_NUM_AVDS_ARG = "-num_instances=%(num_AVD)s"
63_DEFAULT_BRANCH = "aosp-master"
64_FETCHER_BUILD_TARGET = "aosp_cf_x86_phone-userdebug"
65_FETCHER_NAME = "fetch_cvd"
66# Time info to write in report.
67_FETCH_ARTIFACT = "fetch_artifact_time"
68_GCE_CREATE = "gce_create_time"
69_LAUNCH_CVD = "launch_cvd_time"
70# WebRTC args for launching AVD
71_GUEST_ENFORCE_SECURITY_FALSE = "--guest_enforce_security=false"
72_START_WEBRTC = "--start_webrtc"
73_VM_MANAGER = "--vm_manager=crosvm"
74_WEBRTC_ARGS = [_GUEST_ENFORCE_SECURITY_FALSE, _START_WEBRTC, _VM_MANAGER]
75_NO_RETRY = 0
76_MAX_RETRY = 3
77_RETRY_SLEEP_SECS = 3
78# Launch cvd command for acloud report
79_LAUNCH_CVD_COMMAND = "launch_cvd_command"
80
81
82def _ProcessBuild(build_id=None, branch=None, build_target=None):
83    """Create a Cuttlefish fetch_cvd build string.
84
85    Args:
86        build_id: A specific build number to load from. Takes precedence over `branch`.
87        branch: A manifest-branch at which to get the latest build.
88        build_target: A particular device to load at the desired build.
89
90    Returns:
91        A string, used in the fetch_cvd cmd or None if all args are None.
92    """
93    if not build_target:
94        return build_id or branch
95    elif build_target and not branch:
96        branch = _DEFAULT_BRANCH
97    return (build_id or branch) + "/" + build_target
98
99
100class CvdComputeClient(android_compute_client.AndroidComputeClient):
101    """Client that manages Android Virtual Device."""
102
103    DATA_POLICY_CREATE_IF_MISSING = "create_if_missing"
104
105    def __init__(self,
106                 acloud_config,
107                 oauth2_credentials,
108                 boot_timeout_secs=None,
109                 ins_timeout_secs=None,
110                 report_internal_ip=None,
111                 gpu=None):
112        """Initialize.
113
114        Args:
115            acloud_config: An AcloudConfig object.
116            oauth2_credentials: An oauth2client.OAuth2Credentials instance.
117            boot_timeout_secs: Integer, the maximum time to wait for the AVD
118                               to boot up.
119            ins_timeout_secs: Integer, the maximum time to wait for the
120                              instance ready.
121            report_internal_ip: Boolean to report the internal ip instead of
122                                external ip.
123            gpu: String, GPU to attach to the device.
124        """
125        super(CvdComputeClient, self).__init__(acloud_config, oauth2_credentials)
126
127        self._fetch_cvd_version = acloud_config.fetch_cvd_version
128        self._build_api = (
129            android_build_client.AndroidBuildClient(oauth2_credentials))
130        self._ssh_private_key_path = acloud_config.ssh_private_key_path
131        self._boot_timeout_secs = boot_timeout_secs
132        self._ins_timeout_secs = ins_timeout_secs
133        self._report_internal_ip = report_internal_ip
134        self._gpu = gpu
135        # Store all failures result when creating one or multiple instances.
136        self._all_failures = dict()
137        self._extra_args_ssh_tunnel = acloud_config.extra_args_ssh_tunnel
138        self._ssh = None
139        self._ip = None
140        self._user = constants.GCE_USER
141        self._execution_time = {_FETCH_ARTIFACT: 0, _GCE_CREATE: 0, _LAUNCH_CVD: 0}
142
143    def InitRemoteHost(self, ssh, ip, user):
144        """Init remote host.
145
146        Check if we can ssh to the remote host, stop any cf instances running
147        on it, and remove existing files.
148
149        Args:
150            ssh: Ssh object.
151            ip: namedtuple (internal, external) that holds IP address of the
152                remote host, e.g. "external:140.110.20.1, internal:10.0.0.1"
153            user: String of user log in to the instance.
154        """
155        self._ssh = ssh
156        self._ip = ip
157        self._user = user
158        self._ssh.WaitForSsh(timeout=self._ins_timeout_secs)
159        self.StopCvd()
160        self.CleanUp()
161
162    # pylint: disable=arguments-differ,too-many-locals,broad-except
163    def CreateInstance(self, instance, image_name, image_project,
164                       build_target=None, branch=None, build_id=None,
165                       kernel_branch=None, kernel_build_id=None,
166                       kernel_build_target=None, blank_data_disk_size_gb=None,
167                       avd_spec=None, extra_scopes=None,
168                       system_build_target=None, system_branch=None,
169                       system_build_id=None):
170
171        """Create/Reuse a single configured cuttlefish device.
172        1. Prepare GCE instance.
173           Create a new instnace or get IP address for reusing the specific instance.
174        2. Put fetch_cvd on the instance.
175        3. Invoke fetch_cvd to fetch and run the instance.
176
177        Args:
178            instance: instance name.
179            image_name: A string, the name of the GCE image.
180            image_project: A string, name of the project where the image lives.
181                           Assume the default project if None.
182            build_target: Target name, e.g. "aosp_cf_x86_phone-userdebug"
183            branch: Branch name, e.g. "aosp-master"
184            build_id: Build id, a string, e.g. "2263051", "P2804227"
185            kernel_branch: Kernel branch name, e.g. "kernel-common-android-4.14"
186            kernel_build_id: Kernel build id, a string, e.g. "223051", "P280427"
187            kernel_build_target: String, Kernel build target name.
188            blank_data_disk_size_gb: Size of the blank data disk in GB.
189            avd_spec: An AVDSpec instance.
190            extra_scopes: A list of extra scopes to be passed to the instance.
191            system_build_target: Target name for the system image,
192                                e.g. "cf_x86_phone-userdebug"
193            system_branch: A String, branch name for the system image.
194            system_build_id: A string, build id for the system image.
195
196        Returns:
197            A string, representing instance name.
198        """
199
200        # A blank data disk would be created on the host. Make sure the size of
201        # the boot disk is large enough to hold it.
202        boot_disk_size_gb = (
203            int(self.GetImage(image_name, image_project)["diskSizeGb"]) +
204            blank_data_disk_size_gb)
205
206        if avd_spec and avd_spec.instance_name_to_reuse:
207            self._ip = self._ReusingGceInstance(avd_spec)
208        else:
209            self._VerifyZoneByQuota()
210            self._ip = self._CreateGceInstance(instance, image_name, image_project,
211                                               extra_scopes, boot_disk_size_gb,
212                                               avd_spec)
213        self._ssh = Ssh(ip=self._ip,
214                        user=constants.GCE_USER,
215                        ssh_private_key_path=self._ssh_private_key_path,
216                        extra_args_ssh_tunnel=self._extra_args_ssh_tunnel,
217                        report_internal_ip=self._report_internal_ip)
218        try:
219            self._ssh.WaitForSsh(timeout=self._ins_timeout_secs)
220            if avd_spec:
221                if avd_spec.instance_name_to_reuse:
222                    self.StopCvd()
223                    self.CleanUp()
224                return instance
225
226            # TODO: Remove following code after create_cf deprecated.
227            self.UpdateFetchCvd()
228
229            self.FetchBuild(build_id, branch, build_target, system_build_id,
230                            system_branch, system_build_target, kernel_build_id,
231                            kernel_branch, kernel_build_target)
232            kernel_build = self.GetKernelBuild(kernel_build_id,
233                                               kernel_branch,
234                                               kernel_build_target)
235            self.LaunchCvd(instance,
236                           blank_data_disk_size_gb=blank_data_disk_size_gb,
237                           kernel_build=kernel_build,
238                           boot_timeout_secs=self._boot_timeout_secs)
239
240            return instance
241        except Exception as e:
242            self._all_failures[instance] = e
243            return instance
244
245    def _GetLaunchCvdArgs(self, avd_spec=None, blank_data_disk_size_gb=None,
246                          kernel_build=None, decompress_kernel=None):
247        """Get launch_cvd args.
248
249        Args:
250            avd_spec: An AVDSpec instance.
251            blank_data_disk_size_gb: Size of the blank data disk in GB.
252            kernel_build: String, kernel build info.
253            decompress_kernel: Boolean, if true decompress the kernel.
254
255        Returns:
256            String, args of launch_cvd.
257        """
258        launch_cvd_args = []
259        if blank_data_disk_size_gb and blank_data_disk_size_gb > 0:
260            # Policy 'create_if_missing' would create a blank userdata disk if
261            # missing. If already exist, reuse the disk.
262            launch_cvd_args.append(
263                "-data_policy=" + self.DATA_POLICY_CREATE_IF_MISSING)
264            launch_cvd_args.append(
265                "-blank_data_image_mb=%d" % (blank_data_disk_size_gb * 1024))
266        if avd_spec:
267            launch_cvd_args.append(
268                "-x_res=" + avd_spec.hw_property[constants.HW_X_RES])
269            launch_cvd_args.append(
270                "-y_res=" + avd_spec.hw_property[constants.HW_Y_RES])
271            launch_cvd_args.append(
272                "-dpi=" + avd_spec.hw_property[constants.HW_ALIAS_DPI])
273            if constants.HW_ALIAS_DISK in avd_spec.hw_property:
274                launch_cvd_args.append(
275                    "-data_policy=" + self.DATA_POLICY_CREATE_IF_MISSING)
276                launch_cvd_args.append(
277                    "-blank_data_image_mb="
278                    + avd_spec.hw_property[constants.HW_ALIAS_DISK])
279            if constants.HW_ALIAS_CPUS in avd_spec.hw_property:
280                launch_cvd_args.append(
281                    "-cpus=%s" % avd_spec.hw_property[constants.HW_ALIAS_CPUS])
282            if constants.HW_ALIAS_MEMORY in avd_spec.hw_property:
283                launch_cvd_args.append(
284                    "-memory_mb=%s" % avd_spec.hw_property[constants.HW_ALIAS_MEMORY])
285            if avd_spec.connect_webrtc:
286                launch_cvd_args.extend(_WEBRTC_ARGS)
287            if avd_spec.num_avds_per_instance > 1:
288                launch_cvd_args.append(
289                    _NUM_AVDS_ARG % {"num_AVD": avd_spec.num_avds_per_instance})
290        else:
291            resolution = self._resolution.split("x")
292            launch_cvd_args.append("-x_res=" + resolution[0])
293            launch_cvd_args.append("-y_res=" + resolution[1])
294            launch_cvd_args.append("-dpi=" + resolution[3])
295
296        if kernel_build:
297            launch_cvd_args.append("-kernel_path=kernel")
298
299        if self._launch_args:
300            launch_cvd_args.append(self._launch_args)
301
302        if decompress_kernel:
303            launch_cvd_args.append(_DECOMPRESS_KERNEL_ARG)
304
305        if self._gpu:
306            launch_cvd_args.append(_GPU_ARG)
307
308        launch_cvd_args.extend(_AGREEMENT_PROMPT_ARGS)
309        return launch_cvd_args
310
311    @staticmethod
312    def GetKernelBuild(kernel_build_id, kernel_branch, kernel_build_target):
313        """Get kernel build args for fetch_cvd.
314
315        Args:
316            kernel_branch: Kernel branch name, e.g. "kernel-common-android-4.14"
317            kernel_build_id: Kernel build id, a string, e.g. "223051", "P280427"
318            kernel_build_target: String, Kernel build target name.
319
320        Returns:
321            String of kernel build args for fetch_cvd.
322            If no kernel build then return None.
323        """
324        # kernel_target have default value "kernel". If user provide kernel_build_id
325        # or kernel_branch, then start to process kernel image.
326        if kernel_build_id or kernel_branch:
327            return _ProcessBuild(kernel_build_id, kernel_branch, kernel_build_target)
328        return None
329
330    def StopCvd(self):
331        """Stop CVD.
332
333        If stop_cvd fails, assume that it's because there was no previously
334        running device.
335        """
336        ssh_command = "./bin/stop_cvd"
337        try:
338            self._ssh.Run(ssh_command)
339        except subprocess.CalledProcessError as e:
340            logger.debug("Failed to stop_cvd (possibly no running device): %s", e)
341
342    def CleanUp(self):
343        """Clean up the files/folders on the existing instance.
344
345        If previous AVD have these files/folders, reusing the instance may have
346        side effects if not cleaned. The path in the instance is /home/vsoc-01/*
347        if the GCE user is vsoc-01.
348        """
349
350        ssh_command = "'/bin/rm -rf /home/%s/*'" % self._user
351        try:
352            self._ssh.Run(ssh_command)
353        except subprocess.CalledProcessError as e:
354            logger.debug("Failed to clean up the files/folders: %s", e)
355
356    @utils.TimeExecute(function_description="Launching AVD(s) and waiting for boot up",
357                       result_evaluator=utils.BootEvaluator)
358    def LaunchCvd(self, instance, avd_spec=None,
359                  blank_data_disk_size_gb=None, kernel_build=None,
360                  decompress_kernel=None,
361                  boot_timeout_secs=None):
362        """Launch CVD.
363
364        Launch AVD with launch_cvd. If the process is failed, acloud would show
365        error messages and auto download log files from remote instance.
366
367        Args:
368            instance: String, instance name.
369            avd_spec: An AVDSpec instance.
370            blank_data_disk_size_gb: Size of the blank data disk in GB.
371            kernel_build: String, kernel build info.
372            decompress_kernel: Boolean, if true decompress the kernel.
373            boot_timeout_secs: Integer, the maximum time to wait for the
374                               command to respond.
375
376        Returns:
377           dict of faliures, return this dict for BootEvaluator to handle
378           LaunchCvd success or fail messages.
379        """
380        timestart = time.time()
381        error_msg = ""
382        launch_cvd_args = self._GetLaunchCvdArgs(avd_spec,
383                                                 blank_data_disk_size_gb,
384                                                 kernel_build,
385                                                 decompress_kernel)
386        boot_timeout_secs = boot_timeout_secs or constants.DEFAULT_CF_BOOT_TIMEOUT
387        ssh_command = "./bin/launch_cvd -daemon " + " ".join(launch_cvd_args)
388        try:
389            self.ExtendReportData(_LAUNCH_CVD_COMMAND, ssh_command)
390            self._ssh.Run(ssh_command, boot_timeout_secs, retry=_NO_RETRY)
391        except (subprocess.CalledProcessError, errors.DeviceConnectionError) as e:
392            # TODO(b/140475060): Distinguish the error is command return error
393            # or timeout error.
394            error_msg = ("Device %s did not finish on boot within timeout (%s secs)"
395                         % (instance, boot_timeout_secs))
396            self._all_failures[instance] = error_msg
397            utils.PrintColorString(str(e), utils.TextColors.FAIL)
398            if avd_spec and not avd_spec.no_pull_log:
399                self._PullAllLogFiles(instance)
400
401        self._execution_time[_LAUNCH_CVD] = round(time.time() - timestart, 2)
402        return {instance: error_msg} if error_msg else {}
403
404    def _PullAllLogFiles(self, instance):
405        """Pull all log files from instance.
406
407        1. Download log files to temp folder.
408        2. Show messages about the download folder for users.
409
410        Args:
411            instance: String, instance name.
412        """
413        log_files = pull.GetAllLogFilePaths(self._ssh)
414        error_log_folder = pull.GetDownloadLogFolder(instance)
415        pull.PullLogs(self._ssh, log_files, error_log_folder)
416        self.ExtendReportData(constants.ERROR_LOG_FOLDER, error_log_folder)
417
418    @utils.TimeExecute(function_description="Reusing GCE instance")
419    def _ReusingGceInstance(self, avd_spec):
420        """Reusing a cuttlefish existing instance.
421
422        Args:
423            avd_spec: An AVDSpec instance.
424
425        Returns:
426            ssh.IP object, that stores internal and external ip of the instance.
427        """
428        gcompute_client.ComputeClient.AddSshRsaInstanceMetadata(
429            self, constants.GCE_USER, avd_spec.cfg.ssh_public_key_path,
430            avd_spec.instance_name_to_reuse)
431        ip = gcompute_client.ComputeClient.GetInstanceIP(
432            self, instance=avd_spec.instance_name_to_reuse, zone=self._zone)
433
434        return ip
435
436    @utils.TimeExecute(function_description="Creating GCE instance")
437    def _CreateGceInstance(self, instance, image_name, image_project,
438                           extra_scopes, boot_disk_size_gb, avd_spec):
439        """Create a single configured cuttlefish device.
440
441        Override method from parent class.
442        Args:
443            instance: String, instance name.
444            image_name: String, the name of the GCE image.
445            image_project: String, the name of the project where the image.
446            extra_scopes: A list of extra scopes to be passed to the instance.
447            boot_disk_size_gb: Integer, size of the boot disk in GB.
448            avd_spec: An AVDSpec instance.
449
450        Returns:
451            ssh.IP object, that stores internal and external ip of the instance.
452        """
453        timestart = time.time()
454        metadata = self._metadata.copy()
455
456        if avd_spec:
457            metadata[constants.INS_KEY_AVD_TYPE] = avd_spec.avd_type
458            metadata[constants.INS_KEY_AVD_FLAVOR] = avd_spec.flavor
459            metadata[constants.INS_KEY_DISPLAY] = ("%sx%s (%s)" % (
460                avd_spec.hw_property[constants.HW_X_RES],
461                avd_spec.hw_property[constants.HW_Y_RES],
462                avd_spec.hw_property[constants.HW_ALIAS_DPI]))
463
464        disk_args = self._GetDiskArgs(
465            instance, image_name, image_project, boot_disk_size_gb)
466        gcompute_client.ComputeClient.CreateInstance(
467            self,
468            instance=instance,
469            image_name=image_name,
470            image_project=image_project,
471            disk_args=disk_args,
472            metadata=metadata,
473            machine_type=self._machine_type,
474            network=self._network,
475            zone=self._zone,
476            gpu=self._gpu,
477            extra_scopes=extra_scopes,
478            tags=["appstreaming"] if (
479                avd_spec and avd_spec.connect_webrtc) else None)
480        ip = gcompute_client.ComputeClient.GetInstanceIP(
481            self, instance=instance, zone=self._zone)
482        logger.debug("'instance_ip': %s", ip.internal
483                     if self._report_internal_ip else ip.external)
484
485        self._execution_time[_GCE_CREATE] = round(time.time() - timestart, 2)
486        return ip
487
488    @utils.TimeExecute(function_description="Uploading build fetcher to instance")
489    def UpdateFetchCvd(self):
490        """Download fetch_cvd from the Build API, and upload it to a remote instance.
491
492        The version of fetch_cvd to use is retrieved from the configuration file. Once fetch_cvd
493        is on the instance, future commands can use it to download relevant Cuttlefish files from
494        the Build API on the instance itself.
495        """
496        # TODO(schuffelen): Support fetch_cvd_version="latest" when there is
497        # stronger automated testing on it.
498        download_dir = tempfile.mkdtemp()
499        download_target = os.path.join(download_dir, _FETCHER_NAME)
500        utils.RetryExceptionType(
501            exception_types=ssl.SSLError,
502            max_retries=_MAX_RETRY,
503            functor=self._build_api.DownloadArtifact,
504            sleep_multiplier=_RETRY_SLEEP_SECS,
505            retry_backoff_factor=utils.DEFAULT_RETRY_BACKOFF_FACTOR,
506            build_target=_FETCHER_BUILD_TARGET,
507            build_id=self._fetch_cvd_version,
508            resource_id=_FETCHER_NAME,
509            local_dest=download_target,
510            attempt_id="latest")
511        fetch_cvd_stat = os.stat(download_target)
512        os.chmod(download_target, fetch_cvd_stat.st_mode | stat.S_IEXEC)
513        self._ssh.ScpPushFile(src_file=download_target, dst_file=_FETCHER_NAME)
514        os.remove(download_target)
515        os.rmdir(download_dir)
516
517    @utils.TimeExecute(function_description="Downloading build on instance")
518    def FetchBuild(self, build_id, branch, build_target, system_build_id,
519                   system_branch, system_build_target, kernel_build_id,
520                   kernel_branch, kernel_build_target):
521        """Execute fetch_cvd on the remote instance to get Cuttlefish runtime files.
522
523        Args:
524            fetch_args: String of arguments to pass to fetch_cvd.
525        """
526        timestart = time.time()
527        fetch_cvd_args = ["-credential_source=gce"]
528
529        default_build = _ProcessBuild(build_id, branch, build_target)
530        if default_build:
531            fetch_cvd_args.append("-default_build=" + default_build)
532        system_build = _ProcessBuild(system_build_id, system_branch, system_build_target)
533        if system_build:
534            fetch_cvd_args.append("-system_build=" + system_build)
535        kernel_build = self.GetKernelBuild(kernel_build_id,
536                                           kernel_branch,
537                                           kernel_build_target)
538        if kernel_build:
539            fetch_cvd_args.append("-kernel_build=" + kernel_build)
540
541        self._ssh.Run("./fetch_cvd " + " ".join(fetch_cvd_args),
542                      timeout=constants.DEFAULT_SSH_TIMEOUT)
543        self._execution_time[_FETCH_ARTIFACT] = round(time.time() - timestart, 2)
544
545    def GetInstanceIP(self, instance=None):
546        """Override method from parent class.
547
548        It need to get the IP address in the common_operation. If the class
549        already defind the ip address, return the ip address.
550
551        Args:
552            instance: String, representing instance name.
553
554        Returns:
555            ssh.IP object, that stores internal and external ip of the instance.
556        """
557        if self._ip:
558            return self._ip
559        return gcompute_client.ComputeClient.GetInstanceIP(
560            self, instance=instance, zone=self._zone)
561
562    def GetHostImageName(self, stable_image_name, image_family, image_project):
563        """Get host image name.
564
565        Args:
566            stable_image_name: String of stable host image name.
567            image_family: String of image family.
568            image_project: String of image project.
569
570        Returns:
571            String of stable host image name.
572
573        Raises:
574            errors.ConfigError: There is no host image name in config file.
575        """
576        if stable_image_name:
577            return stable_image_name
578
579        if image_family:
580            image_name = gcompute_client.ComputeClient.GetImageFromFamily(
581                self, image_family, image_project)["name"]
582            logger.debug("Get the host image name from image family: %s", image_name)
583            return image_name
584
585        raise errors.ConfigError(
586            "Please specify 'stable_host_image_name' or 'stable_host_image_family'"
587            " in config.")
588
589    @property
590    def all_failures(self):
591        """Return all_failures"""
592        return self._all_failures
593
594    @property
595    def execution_time(self):
596        """Return execution_time"""
597        return self._execution_time
598