1#!/usr/bin/env python 2# 3# Copyright 2016 - The Android Open Source Project 4# 5# Licensed under the Apache License, Version 2.0 (the "License"); 6# you may not use this file except in compliance with the License. 7# You may obtain a copy of the License at 8# 9# http://www.apache.org/licenses/LICENSE-2.0 10# 11# Unless required by applicable law or agreed to in writing, software 12# distributed under the License is distributed on an "AS IS" BASIS, 13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14# See the License for the specific language governing permissions and 15# limitations under the License. 16"""A client that manages Google Compute Engine. 17 18** ComputeClient ** 19 20ComputeClient is a wrapper around Google Compute Engine APIs. 21It provides a set of methods for managing a google compute engine project, 22such as creating images, creating instances, etc. 23 24Design philosophy: We tried to make ComputeClient as stateless as possible, 25and it only keeps states about authentication. ComputeClient should be very 26generic, and only knows how to talk to Compute Engine APIs. 27""" 28# pylint: disable=too-many-lines 29import copy 30import functools 31import getpass 32import logging 33import os 34import re 35 36import six 37 38from acloud import errors 39from acloud.internal import constants 40from acloud.internal.lib import base_cloud_client 41from acloud.internal.lib import utils 42from acloud.internal.lib.ssh import IP 43 44 45logger = logging.getLogger(__name__) 46 47_MAX_RETRIES_ON_FINGERPRINT_CONFLICT = 10 48_METADATA_KEY = "key" 49_METADATA_KEY_VALUE = "value" 50_SSH_KEYS_NAME = "sshKeys" 51_ITEMS = "items" 52_METADATA = "metadata" 53_ZONE_RE = re.compile(r"^zones/(?P<zone>.+)") 54# Quota metrics 55_METRIC_CPUS = "CPUS" 56_METRIC_DISKS_GB = "DISKS_TOTAL_GB" 57_METRICS = [_METRIC_CPUS, _METRIC_DISKS_GB] 58_USAGE = "usage" 59_LIMIT = "limit" 60# The minimum requirement to create an instance. 61_REQUIRE_METRICS = {_METRIC_CPUS: 8, _METRIC_DISKS_GB: 1000} 62 63BASE_DISK_ARGS = { 64 "type": "PERSISTENT", 65 "boot": True, 66 "mode": "READ_WRITE", 67 "autoDelete": True, 68 "initializeParams": {}, 69} 70 71 72class OperationScope(object): 73 """Represents operation scope enum.""" 74 ZONE = "zone" 75 REGION = "region" 76 GLOBAL = "global" 77 78 79class PersistentDiskType(object): 80 """Represents different persistent disk types. 81 82 pd-standard for regular hard disk. 83 pd-ssd for solid state disk. 84 """ 85 STANDARD = "pd-standard" 86 SSD = "pd-ssd" 87 88 89class ImageStatus(object): 90 """Represents the status of an image.""" 91 PENDING = "PENDING" 92 READY = "READY" 93 FAILED = "FAILED" 94 95 96def _IsFingerPrintError(exc): 97 """Determine if the exception is a HTTP error with code 412. 98 99 Args: 100 exc: Exception instance. 101 102 Returns: 103 Boolean. True if the exception is a "Precondition Failed" error. 104 """ 105 return isinstance(exc, errors.HttpError) and exc.code == 412 106 107 108# pylint: disable=too-many-public-methods 109class ComputeClient(base_cloud_client.BaseCloudApiClient): 110 """Client that manages GCE.""" 111 112 # API settings, used by BaseCloudApiClient. 113 API_NAME = "compute" 114 API_VERSION = "v1" 115 SCOPE = " ".join([ 116 "https://www.googleapis.com/auth/compute", 117 "https://www.googleapis.com/auth/devstorage.read_write" 118 ]) 119 # Default settings for gce operations 120 DEFAULT_INSTANCE_SCOPE = [ 121 "https://www.googleapis.com/auth/androidbuild.internal", 122 "https://www.googleapis.com/auth/devstorage.read_only", 123 "https://www.googleapis.com/auth/logging.write" 124 ] 125 OPERATION_TIMEOUT_SECS = 30 * 60 # 30 mins 126 OPERATION_POLL_INTERVAL_SECS = 20 127 MACHINE_SIZE_METRICS = ["guestCpus", "memoryMb"] 128 ACCESS_DENIED_CODE = 403 129 130 def __init__(self, acloud_config, oauth2_credentials): 131 """Initialize. 132 133 Args: 134 acloud_config: An AcloudConfig object. 135 oauth2_credentials: An oauth2client.OAuth2Credentials instance. 136 """ 137 super(ComputeClient, self).__init__(oauth2_credentials) 138 self._project = acloud_config.project 139 140 def _GetOperationStatus(self, operation, operation_scope, scope_name=None): 141 """Get status of an operation. 142 143 Args: 144 operation: An Operation resource in the format of json. 145 operation_scope: A value from OperationScope, "zone", "region", 146 or "global". 147 scope_name: If operation_scope is "zone" or "region", this should be 148 the name of the zone or region, e.g. "us-central1-f". 149 150 Returns: 151 Status of the operation, one of "DONE", "PENDING", "RUNNING". 152 153 Raises: 154 errors.DriverError: if the operation fails. 155 """ 156 operation_name = operation["name"] 157 if operation_scope == OperationScope.GLOBAL: 158 api = self.service.globalOperations().get( 159 project=self._project, operation=operation_name) 160 result = self.Execute(api) 161 elif operation_scope == OperationScope.ZONE: 162 api = self.service.zoneOperations().get( 163 project=self._project, 164 operation=operation_name, 165 zone=scope_name) 166 result = self.Execute(api) 167 elif operation_scope == OperationScope.REGION: 168 api = self.service.regionOperations().get( 169 project=self._project, 170 operation=operation_name, 171 region=scope_name) 172 result = self.Execute(api) 173 174 if result.get("error"): 175 errors_list = result["error"]["errors"] 176 raise errors.DriverError( 177 "Get operation state failed, errors: %s" % str(errors_list)) 178 return result["status"] 179 180 def WaitOnOperation(self, operation, operation_scope, scope_name=None): 181 """Wait for an operation to finish. 182 183 Args: 184 operation: An Operation resource in the format of json. 185 operation_scope: A value from OperationScope, "zone", "region", 186 or "global". 187 scope_name: If operation_scope is "zone" or "region", this should be 188 the name of the zone or region, e.g. "us-central1-f". 189 """ 190 timeout_exception = errors.GceOperationTimeoutError( 191 "Operation hits timeout, did not complete within %d secs." % 192 self.OPERATION_TIMEOUT_SECS) 193 utils.PollAndWait( 194 func=self._GetOperationStatus, 195 expected_return="DONE", 196 timeout_exception=timeout_exception, 197 timeout_secs=self.OPERATION_TIMEOUT_SECS, 198 sleep_interval_secs=self.OPERATION_POLL_INTERVAL_SECS, 199 operation=operation, 200 operation_scope=operation_scope, 201 scope_name=scope_name) 202 203 def GetProject(self): 204 """Get project information. 205 206 Returns: 207 A project resource in json. 208 """ 209 api = self.service.projects().get(project=self._project) 210 return self.Execute(api) 211 212 def GetRegionInfo(self): 213 """Get region information that includes all quotas limit. 214 215 The region info example: 216 {"items": 217 [{"status": "UP", 218 "name": "asia-east1", 219 "quotas": 220 [{"usage": 92, "metric": "CPUS", "limit": 100}, 221 {"usage": 640, "metric": "DISKS_TOTAL_GB", "limit": 10240}, 222 ...]]} 223 } 224 225 Returns: 226 A region resource in json. 227 """ 228 api = self.service.regions().list(project=self._project) 229 return self.Execute(api) 230 231 @staticmethod 232 def GetMetricQuota(regions_info, zone, metric): 233 """Get CPU quota limit in specific zone and project. 234 235 Args: 236 regions_info: Dict, regions resource in json. 237 zone: String, name of zone. 238 metric: String, name of metric, e.g. "CPUS". 239 240 Returns: 241 A dict of quota information. Such as 242 {"usage": 100, "metric": "CPUS", "limit": 200} 243 """ 244 for region_info in regions_info["items"]: 245 if region_info["name"] in zone: 246 for quota in region_info["quotas"]: 247 if quota["metric"] == metric: 248 return quota 249 logger.info("Can't get %s quota info from zone(%s)", metric, zone) 250 return None 251 252 def EnoughMetricsInZone(self, zone): 253 """Check the zone have enough metrics to create instance. 254 255 The metrics include CPUS and DISKS. 256 257 Args: 258 zone: String, name of zone. 259 260 Returns: 261 Boolean. True if zone have enough quota. 262 """ 263 regions_info = self.GetRegionInfo() 264 for metric in _METRICS: 265 quota = self.GetMetricQuota(regions_info, zone, metric) 266 if not quota: 267 logger.debug( 268 "Can't query the metric(%s) in zone(%s)", metric, zone) 269 return False 270 if quota[_LIMIT] - quota[_USAGE] < _REQUIRE_METRICS[metric]: 271 logger.debug( 272 "The metric(%s) is over limit in zone(%s)", metric, zone) 273 return False 274 return True 275 276 def GetDisk(self, disk_name, zone): 277 """Get disk information. 278 279 Args: 280 disk_name: A string. 281 zone: String, name of zone. 282 283 Returns: 284 An disk resource in json. 285 https://cloud.google.com/compute/docs/reference/latest/disks#resource 286 """ 287 api = self.service.disks().get( 288 project=self._project, zone=zone, disk=disk_name) 289 return self.Execute(api) 290 291 def CheckDiskExists(self, disk_name, zone): 292 """Check if disk exists. 293 294 Args: 295 disk_name: A string 296 zone: String, name of zone. 297 298 Returns: 299 True if disk exists, otherwise False. 300 """ 301 try: 302 self.GetDisk(disk_name, zone) 303 exists = True 304 except errors.ResourceNotFoundError: 305 exists = False 306 logger.debug("CheckDiskExists: disk_name: %s, result: %s", disk_name, 307 exists) 308 return exists 309 310 def CreateDisk(self, 311 disk_name, 312 source_image, 313 size_gb, 314 zone, 315 source_project=None, 316 disk_type=PersistentDiskType.STANDARD): 317 """Create a gce disk. 318 319 Args: 320 disk_name: String 321 source_image: String, name of the image. 322 size_gb: Integer, size in gb. 323 zone: String, name of the zone, e.g. us-central1-b. 324 source_project: String, required if the image is located in a different 325 project. 326 disk_type: String, a value from PersistentDiskType, STANDARD 327 for regular hard disk or SSD for solid state disk. 328 """ 329 source_project = source_project or self._project 330 source_image = "projects/%s/global/images/%s" % ( 331 source_project, source_image) if source_image else None 332 logger.info("Creating disk %s, size_gb: %d, source_image: %s", 333 disk_name, size_gb, str(source_image)) 334 body = { 335 "name": disk_name, 336 "sizeGb": size_gb, 337 "type": "projects/%s/zones/%s/diskTypes/%s" % (self._project, zone, 338 disk_type), 339 } 340 api = self.service.disks().insert( 341 project=self._project, 342 sourceImage=source_image, 343 zone=zone, 344 body=body) 345 operation = self.Execute(api) 346 try: 347 self.WaitOnOperation( 348 operation=operation, 349 operation_scope=OperationScope.ZONE, 350 scope_name=zone) 351 except errors.DriverError: 352 logger.error("Creating disk failed, cleaning up: %s", disk_name) 353 if self.CheckDiskExists(disk_name, zone): 354 self.DeleteDisk(disk_name, zone) 355 raise 356 logger.info("Disk %s has been created.", disk_name) 357 358 def DeleteDisk(self, disk_name, zone): 359 """Delete a gce disk. 360 361 Args: 362 disk_name: A string, name of disk. 363 zone: A string, name of zone. 364 """ 365 logger.info("Deleting disk %s", disk_name) 366 api = self.service.disks().delete( 367 project=self._project, zone=zone, disk=disk_name) 368 operation = self.Execute(api) 369 self.WaitOnOperation( 370 operation=operation, 371 operation_scope=OperationScope.ZONE, 372 scope_name=zone) 373 logger.info("Deleted disk %s", disk_name) 374 375 def DeleteDisks(self, disk_names, zone): 376 """Delete multiple disks. 377 378 Args: 379 disk_names: A list of disk names. 380 zone: A string, name of zone. 381 382 Returns: 383 A tuple, (deleted, failed, error_msgs) 384 deleted: A list of names of disks that have been deleted. 385 failed: A list of names of disks that we fail to delete. 386 error_msgs: A list of failure messages. 387 """ 388 if not disk_names: 389 logger.warning("Nothing to delete. Arg disk_names is not provided.") 390 return [], [], [] 391 # Batch send deletion requests. 392 logger.info("Deleting disks: %s", disk_names) 393 delete_requests = {} 394 for disk_name in set(disk_names): 395 request = self.service.disks().delete( 396 project=self._project, disk=disk_name, zone=zone) 397 delete_requests[disk_name] = request 398 return self._BatchExecuteAndWait( 399 delete_requests, OperationScope.ZONE, scope_name=zone) 400 401 def ListDisks(self, zone, disk_filter=None): 402 """List disks. 403 404 Args: 405 zone: A string, representing zone name. e.g. "us-central1-f" 406 disk_filter: A string representing a filter in format of 407 FIELD_NAME COMPARISON_STRING LITERAL_STRING 408 e.g. "name ne example-instance" 409 e.g. "name eq "example-instance-[0-9]+"" 410 411 Returns: 412 A list of disks. 413 """ 414 return self.ListWithMultiPages( 415 api_resource=self.service.disks().list, 416 project=self._project, 417 zone=zone, 418 filter=disk_filter) 419 420 def CreateImage(self, 421 image_name, 422 source_uri=None, 423 source_disk=None, 424 labels=None): 425 """Create a Gce image. 426 427 Args: 428 image_name: String, name of image 429 source_uri: Full Google Cloud Storage URL where the disk image is 430 stored. e.g. "https://storage.googleapis.com/my-bucket/ 431 avd-system-2243663.tar.gz" 432 source_disk: String, this should be the disk's selfLink value 433 (including zone and project), rather than the disk_name 434 e.g. https://www.googleapis.com/compute/v1/projects/ 435 google.com:android-builds-project/zones/ 436 us-east1-d/disks/<disk_name> 437 labels: Dict, will be added to the image's labels. 438 439 Raises: 440 errors.DriverError: For malformed request or response. 441 errors.GceOperationTimeoutError: Operation takes too long to finish. 442 """ 443 if self.CheckImageExists(image_name): 444 return 445 if (source_uri and source_disk) or (not source_uri 446 and not source_disk): 447 raise errors.DriverError( 448 "Creating image %s requires either source_uri %s or " 449 "source_disk %s but not both" % (image_name, source_uri, 450 source_disk)) 451 elif source_uri: 452 logger.info("Creating image %s, source_uri %s", image_name, 453 source_uri) 454 body = { 455 "name": image_name, 456 "rawDisk": { 457 "source": source_uri, 458 }, 459 } 460 else: 461 logger.info("Creating image %s, source_disk %s", image_name, 462 source_disk) 463 body = { 464 "name": image_name, 465 "sourceDisk": source_disk, 466 } 467 if labels is not None: 468 body["labels"] = labels 469 api = self.service.images().insert(project=self._project, body=body) 470 operation = self.Execute(api) 471 try: 472 self.WaitOnOperation( 473 operation=operation, operation_scope=OperationScope.GLOBAL) 474 except errors.DriverError: 475 logger.error("Creating image failed, cleaning up: %s", image_name) 476 if self.CheckImageExists(image_name): 477 self.DeleteImage(image_name) 478 raise 479 logger.info("Image %s has been created.", image_name) 480 481 @utils.RetryOnException(_IsFingerPrintError, 482 _MAX_RETRIES_ON_FINGERPRINT_CONFLICT) 483 def SetImageLabels(self, image_name, new_labels): 484 """Update image's labels. Retry for finger print conflict. 485 486 Note: Decorator RetryOnException will retry the call for FingerPrint 487 conflict (HTTP error code 412). The fingerprint is used to detect 488 conflicts of GCE resource updates. The fingerprint is initially generated 489 by Compute Engine and changes after every request to modify or update 490 resources (e.g. GCE "image" resource has "fingerPrint" for "labels" 491 updates). 492 493 Args: 494 image_name: A string, the image name. 495 new_labels: Dict, will be added to the image's labels. 496 497 Returns: 498 A GlobalOperation resouce. 499 https://cloud.google.com/compute/docs/reference/latest/globalOperations 500 """ 501 image = self.GetImage(image_name) 502 labels = image.get("labels", {}) 503 labels.update(new_labels) 504 body = { 505 "labels": labels, 506 "labelFingerprint": image["labelFingerprint"] 507 } 508 api = self.service.images().setLabels( 509 project=self._project, resource=image_name, body=body) 510 return self.Execute(api) 511 512 def CheckImageExists(self, image_name): 513 """Check if image exists. 514 515 Args: 516 image_name: A string 517 518 Returns: 519 True if image exists, otherwise False. 520 """ 521 try: 522 self.GetImage(image_name) 523 exists = True 524 except errors.ResourceNotFoundError: 525 exists = False 526 logger.debug("CheckImageExists: image_name: %s, result: %s", 527 image_name, exists) 528 return exists 529 530 def GetImage(self, image_name, image_project=None): 531 """Get image information. 532 533 Args: 534 image_name: A string 535 image_project: A string 536 537 Returns: 538 An image resource in json. 539 https://cloud.google.com/compute/docs/reference/latest/images#resource 540 """ 541 api = self.service.images().get( 542 project=image_project or self._project, image=image_name) 543 return self.Execute(api) 544 545 def GetImageFromFamily(self, image_family, image_project=None): 546 """Get image information from image_family. 547 548 Args: 549 image_family: String of image family. 550 image_project: String of image project. 551 552 Returns: 553 An image resource in json. 554 https://cloud.google.com/compute/docs/reference/latest/images#resource 555 """ 556 api = self.service.images().getFromFamily( 557 project=image_project or self._project, family=image_family) 558 return self.Execute(api) 559 560 def DeleteImage(self, image_name): 561 """Delete an image. 562 563 Args: 564 image_name: A string 565 """ 566 logger.info("Deleting image %s", image_name) 567 api = self.service.images().delete( 568 project=self._project, image=image_name) 569 operation = self.Execute(api) 570 self.WaitOnOperation( 571 operation=operation, operation_scope=OperationScope.GLOBAL) 572 logger.info("Deleted image %s", image_name) 573 574 def DeleteImages(self, image_names): 575 """Delete multiple images. 576 577 Args: 578 image_names: A list of image names. 579 580 Returns: 581 A tuple, (deleted, failed, error_msgs) 582 deleted: A list of names of images that have been deleted. 583 failed: A list of names of images that we fail to delete. 584 error_msgs: A list of failure messages. 585 """ 586 if not image_names: 587 return [], [], [] 588 # Batch send deletion requests. 589 logger.info("Deleting images: %s", image_names) 590 delete_requests = {} 591 for image_name in set(image_names): 592 request = self.service.images().delete( 593 project=self._project, image=image_name) 594 delete_requests[image_name] = request 595 return self._BatchExecuteAndWait(delete_requests, 596 OperationScope.GLOBAL) 597 598 def ListImages(self, image_filter=None, image_project=None): 599 """List images. 600 601 Args: 602 image_filter: A string representing a filter in format of 603 FIELD_NAME COMPARISON_STRING LITERAL_STRING 604 e.g. "name ne example-image" 605 e.g. "name eq "example-image-[0-9]+"" 606 image_project: String. If not provided, will list images from the default 607 project. Otherwise, will list images from the given 608 project, which can be any arbitrary project where the 609 account has read access 610 (i.e. has the role "roles/compute.imageUser") 611 612 Read more about image sharing across project: 613 https://cloud.google.com/compute/docs/images/sharing-images-across-projects 614 615 Returns: 616 A list of images. 617 """ 618 return self.ListWithMultiPages( 619 api_resource=self.service.images().list, 620 project=image_project or self._project, 621 filter=image_filter) 622 623 def GetInstance(self, instance, zone): 624 """Get information about an instance. 625 626 Args: 627 instance: A string, representing instance name. 628 zone: A string, representing zone name. e.g. "us-central1-f" 629 630 Returns: 631 An instance resource in json. 632 https://cloud.google.com/compute/docs/reference/latest/instances#resource 633 """ 634 api = self.service.instances().get( 635 project=self._project, zone=zone, instance=instance) 636 return self.Execute(api) 637 638 def AttachAccelerator(self, instance, zone, accelerator_count, 639 accelerator_type): 640 """Attach a GPU accelerator to the instance. 641 642 Note: In order for this to succeed the following must hold: 643 - The machine schedule must be set to "terminate" i.e: 644 SetScheduling(self, instance, zone, on_host_maintenance="terminate") 645 must have been called. 646 - The machine is not starting or running. i.e. 647 StopInstance(self, instance) must have been called. 648 649 Args: 650 instance: A string, representing instance name. 651 zone: String, name of zone. 652 accelerator_count: The number accelerators to be attached to the instance. 653 a value of 0 will detach all accelerators. 654 accelerator_type: The type of accelerator to attach. e.g. 655 "nvidia-tesla-k80" 656 """ 657 body = { 658 "guestAccelerators": [{ 659 "acceleratorType": 660 self.GetAcceleratorUrl(accelerator_type, zone), 661 "acceleratorCount": 662 accelerator_count 663 }] 664 } 665 api = self.service.instances().setMachineResources( 666 project=self._project, zone=zone, instance=instance, body=body) 667 operation = self.Execute(api) 668 try: 669 self.WaitOnOperation( 670 operation=operation, 671 operation_scope=OperationScope.ZONE, 672 scope_name=zone) 673 except errors.GceOperationTimeoutError: 674 logger.error("Attach instance failed: %s", instance) 675 raise 676 logger.info("%d x %s have been attached to instance %s.", 677 accelerator_count, accelerator_type, instance) 678 679 def AttachDisk(self, instance, zone, **kwargs): 680 """Attach the external disk to the instance. 681 682 Args: 683 instance: A string, representing instance name. 684 zone: String, name of zone. 685 **kwargs: The attachDisk request body. See "https://cloud.google.com/ 686 compute/docs/reference/latest/instances/attachDisk" for detail. 687 { 688 "kind": "compute#attachedDisk", 689 "type": string, 690 "mode": string, 691 "source": string, 692 "deviceName": string, 693 "index": integer, 694 "boot": boolean, 695 "initializeParams": { 696 "diskName": string, 697 "sourceImage": string, 698 "diskSizeGb": long, 699 "diskType": string, 700 "sourceImageEncryptionKey": { 701 "rawKey": string, 702 "sha256": string 703 } 704 }, 705 "autoDelete": boolean, 706 "licenses": [ 707 string 708 ], 709 "interface": string, 710 "diskEncryptionKey": { 711 "rawKey": string, 712 "sha256": string 713 } 714 } 715 716 Returns: 717 An disk resource in json. 718 https://cloud.google.com/compute/docs/reference/latest/disks#resource 719 720 721 Raises: 722 errors.GceOperationTimeoutError: Operation takes too long to finish. 723 """ 724 api = self.service.instances().attachDisk( 725 project=self._project, zone=zone, instance=instance, body=kwargs) 726 operation = self.Execute(api) 727 try: 728 self.WaitOnOperation( 729 operation=operation, 730 operation_scope=OperationScope.ZONE, 731 scope_name=zone) 732 except errors.GceOperationTimeoutError: 733 logger.error("Attach instance failed: %s", instance) 734 raise 735 logger.info("Disk has been attached to instance %s.", instance) 736 737 def DetachDisk(self, instance, zone, disk_name): 738 """Attach the external disk to the instance. 739 740 Args: 741 instance: A string, representing instance name. 742 zone: String, name of zone. 743 disk_name: A string, the name of the detach disk. 744 745 Returns: 746 A ZoneOperation resource. 747 See https://cloud.google.com/compute/docs/reference/latest/zoneOperations 748 749 Raises: 750 errors.GceOperationTimeoutError: Operation takes too long to finish. 751 """ 752 api = self.service.instances().detachDisk( 753 project=self._project, 754 zone=zone, 755 instance=instance, 756 deviceName=disk_name) 757 operation = self.Execute(api) 758 try: 759 self.WaitOnOperation( 760 operation=operation, 761 operation_scope=OperationScope.ZONE, 762 scope_name=zone) 763 except errors.GceOperationTimeoutError: 764 logger.error("Detach instance failed: %s", instance) 765 raise 766 logger.info("Disk has been detached to instance %s.", instance) 767 768 def StartInstance(self, instance, zone): 769 """Start |instance| in |zone|. 770 771 Args: 772 instance: A string, representing instance name. 773 zone: A string, representing zone name. e.g. "us-central1-f" 774 775 Raises: 776 errors.GceOperationTimeoutError: Operation takes too long to finish. 777 """ 778 api = self.service.instances().start( 779 project=self._project, zone=zone, instance=instance) 780 operation = self.Execute(api) 781 try: 782 self.WaitOnOperation( 783 operation=operation, 784 operation_scope=OperationScope.ZONE, 785 scope_name=zone) 786 except errors.GceOperationTimeoutError: 787 logger.error("Start instance failed: %s", instance) 788 raise 789 logger.info("Instance %s has been started.", instance) 790 791 def StartInstances(self, instances, zone): 792 """Start |instances| in |zone|. 793 794 Args: 795 instances: A list of strings, representing instance names's list. 796 zone: A string, representing zone name. e.g. "us-central1-f" 797 798 Returns: 799 A tuple, (done, failed, error_msgs) 800 done: A list of string, representing the names of instances that 801 have been executed. 802 failed: A list of string, representing the names of instances that 803 we failed to execute. 804 error_msgs: A list of string, representing the failure messages. 805 """ 806 action = functools.partial( 807 self.service.instances().start, project=self._project, zone=zone) 808 return self._BatchExecuteOnInstances(instances, zone, action) 809 810 def StopInstance(self, instance, zone): 811 """Stop |instance| in |zone|. 812 813 Args: 814 instance: A string, representing instance name. 815 zone: A string, representing zone name. e.g. "us-central1-f" 816 817 Raises: 818 errors.GceOperationTimeoutError: Operation takes too long to finish. 819 """ 820 api = self.service.instances().stop( 821 project=self._project, zone=zone, instance=instance) 822 operation = self.Execute(api) 823 try: 824 self.WaitOnOperation( 825 operation=operation, 826 operation_scope=OperationScope.ZONE, 827 scope_name=zone) 828 except errors.GceOperationTimeoutError: 829 logger.error("Stop instance failed: %s", instance) 830 raise 831 logger.info("Instance %s has been terminated.", instance) 832 833 def StopInstances(self, instances, zone): 834 """Stop |instances| in |zone|. 835 836 Args: 837 instances: A list of strings, representing instance names's list. 838 zone: A string, representing zone name. e.g. "us-central1-f" 839 840 Returns: 841 A tuple, (done, failed, error_msgs) 842 done: A list of string, representing the names of instances that 843 have been executed. 844 failed: A list of string, representing the names of instances that 845 we failed to execute. 846 error_msgs: A list of string, representing the failure messages. 847 """ 848 action = functools.partial( 849 self.service.instances().stop, project=self._project, zone=zone) 850 return self._BatchExecuteOnInstances(instances, zone, action) 851 852 def SetScheduling(self, 853 instance, 854 zone, 855 automatic_restart=True, 856 on_host_maintenance="MIGRATE"): 857 """Update scheduling config |automatic_restart| and |on_host_maintenance|. 858 859 Args: 860 instance: A string, representing instance name. 861 zone: A string, representing zone name. e.g. "us-central1-f". 862 automatic_restart: Boolean, determine whether the instance will 863 automatically restart if it crashes or not, 864 default to True. 865 on_host_maintenance: enum["MIGRATE", "TERMINATE"] 866 The instance's maintenance behavior, which 867 determines whether the instance is live 868 "MIGRATE" or "TERMINATE" when there is 869 a maintenance event. 870 871 Raises: 872 errors.GceOperationTimeoutError: Operation takes too long to finish. 873 """ 874 body = { 875 "automaticRestart": automatic_restart, 876 "onHostMaintenance": on_host_maintenance 877 } 878 api = self.service.instances().setScheduling( 879 project=self._project, zone=zone, instance=instance, body=body) 880 operation = self.Execute(api) 881 try: 882 self.WaitOnOperation( 883 operation=operation, 884 operation_scope=OperationScope.ZONE, 885 scope_name=zone) 886 except errors.GceOperationTimeoutError: 887 logger.error("Set instance scheduling failed: %s", instance) 888 raise 889 logger.info( 890 "Instance scheduling changed:\n" 891 " automaticRestart: %s\n" 892 " onHostMaintenance: %s\n", 893 str(automatic_restart).lower(), on_host_maintenance) 894 895 def ListInstances(self, instance_filter=None): 896 """List instances cross all zones. 897 898 Gcompute response instance. For example: 899 { 900 'items': 901 { 902 'zones/europe-west3-b': 903 { 904 'warning': 905 { 906 'message': "There are no results for scope 907 'zones/europe-west3-b' on this page.", 908 'code': 'NO_RESULTS_ON_PAGE', 909 'data': [{'value': u'zones/europe-west3-b', 910 'key': u'scope'}] 911 } 912 }, 913 'zones/asia-east1-b': 914 { 915 'instances': [ 916 { 917 'name': 'ins-bc212dc8-userbuild-aosp-cf-x86-64-phone' 918 'status': 'RUNNING', 919 'cpuPlatform': 'Intel Broadwell', 920 'startRestricted': False, 921 'labels': {u'created_by': u'herbertxue'}, 922 'name': 'ins-bc212dc8-userbuild-aosp-cf-x86-64-phone', 923 ... 924 }] 925 } 926 } 927 } 928 929 Args: 930 instance_filter: A string representing a filter in format of 931 FIELD_NAME COMPARISON_STRING LITERAL_STRING 932 e.g. "name ne example-instance" 933 e.g. "name eq "example-instance-[0-9]+"" 934 935 Returns: 936 A list of instances. 937 """ 938 # aggregatedList will only return 500 results max, so if there are more, 939 # we need to send in the next page token to get the next 500 (and so on 940 # and so forth. 941 get_more_instances = True 942 page_token = None 943 instances_list = [] 944 while get_more_instances: 945 api = self.service.instances().aggregatedList( 946 project=self._project, 947 filter=instance_filter, 948 pageToken=page_token) 949 response = self.Execute(api) 950 page_token = response.get("nextPageToken") 951 get_more_instances = page_token is not None 952 for instances_data in response["items"].values(): 953 if "instances" in instances_data: 954 for instance in instances_data.get("instances"): 955 instances_list.append(instance) 956 957 return instances_list 958 959 def SetSchedulingInstances(self, 960 instances, 961 zone, 962 automatic_restart=True, 963 on_host_maintenance="MIGRATE"): 964 """Update scheduling config |automatic_restart| and |on_host_maintenance|. 965 966 See //cloud/cluster/api/mixer_instances.proto Scheduling for config option. 967 968 Args: 969 instances: A list of string, representing instance names. 970 zone: A string, representing zone name. e.g. "us-central1-f". 971 automatic_restart: Boolean, determine whether the instance will 972 automatically restart if it crashes or not, 973 default to True. 974 on_host_maintenance: enum["MIGRATE", "TERMINATE"] 975 The instance's maintenance behavior, which 976 determines whether the instance is live 977 migrated or terminated when there is 978 a maintenance event. 979 980 Returns: 981 A tuple, (done, failed, error_msgs) 982 done: A list of string, representing the names of instances that 983 have been executed. 984 failed: A list of string, representing the names of instances that 985 we failed to execute. 986 error_msgs: A list of string, representing the failure messages. 987 """ 988 body = { 989 "automaticRestart": automatic_restart, 990 "OnHostMaintenance": on_host_maintenance 991 } 992 action = functools.partial( 993 self.service.instances().setScheduling, 994 project=self._project, 995 zone=zone, 996 body=body) 997 return self._BatchExecuteOnInstances(instances, zone, action) 998 999 def _BatchExecuteOnInstances(self, instances, zone, action): 1000 """Batch processing operations requiring computing time. 1001 1002 Args: 1003 instances: A list of instance names. 1004 zone: A string, e.g. "us-central1-f". 1005 action: partial func, all kwargs for this gcloud action has been 1006 defined in the caller function (e.g. See "StartInstances") 1007 except 'instance' which will be defined by iterating the 1008 |instances|. 1009 1010 Returns: 1011 A tuple, (done, failed, error_msgs) 1012 done: A list of string, representing the names of instances that 1013 have been executed. 1014 failed: A list of string, representing the names of instances that 1015 we failed to execute. 1016 error_msgs: A list of string, representing the failure messages. 1017 """ 1018 if not instances: 1019 return [], [], [] 1020 # Batch send requests. 1021 logger.info("Batch executing instances: %s", instances) 1022 requests = {} 1023 for instance_name in set(instances): 1024 requests[instance_name] = action(instance=instance_name) 1025 return self._BatchExecuteAndWait( 1026 requests, operation_scope=OperationScope.ZONE, scope_name=zone) 1027 1028 def _BatchExecuteAndWait(self, requests, operation_scope, scope_name=None): 1029 """Batch processing requests and wait on the operation. 1030 1031 Args: 1032 requests: A dictionary. The key is a string representing the resource 1033 name. For example, an instance name, or an image name. 1034 operation_scope: A value from OperationScope, "zone", "region", 1035 or "global". 1036 scope_name: If operation_scope is "zone" or "region", this should be 1037 the name of the zone or region, e.g. "us-central1-f". 1038 Returns: 1039 A tuple, (done, failed, error_msgs) 1040 done: A list of string, representing the resource names that have 1041 been executed. 1042 failed: A list of string, representing resource names that 1043 we failed to execute. 1044 error_msgs: A list of string, representing the failure messages. 1045 """ 1046 results = self.BatchExecute(requests) 1047 # Initialize return values 1048 failed = [] 1049 error_msgs = [] 1050 for resource_name, (_, error) in six.iteritems(results): 1051 if error is not None: 1052 failed.append(resource_name) 1053 error_msgs.append(str(error)) 1054 done = [] 1055 # Wait for the executing operations to finish. 1056 logger.info("Waiting for executing operations") 1057 for resource_name in six.iterkeys(requests): 1058 operation, _ = results[resource_name] 1059 if operation: 1060 try: 1061 self.WaitOnOperation(operation, operation_scope, 1062 scope_name) 1063 done.append(resource_name) 1064 except errors.DriverError as exc: 1065 failed.append(resource_name) 1066 error_msgs.append(str(exc)) 1067 return done, failed, error_msgs 1068 1069 def ListZones(self): 1070 """List all zone instances in the project. 1071 1072 Returns: 1073 Gcompute response instance. For example: 1074 { 1075 "id": "projects/google.com%3Aandroid-build-staging/zones", 1076 "kind": "compute#zoneList", 1077 "selfLink": "https://www.googleapis.com/compute/v1/projects/" 1078 "google.com:android-build-staging/zones" 1079 "items": [ 1080 { 1081 'creationTimestamp': '2014-07-15T10:44:08.663-07:00', 1082 'description': 'asia-east1-c', 1083 'id': '2222', 1084 'kind': 'compute#zone', 1085 'name': 'asia-east1-c', 1086 'region': 'https://www.googleapis.com/compute/v1/projects/' 1087 'google.com:android-build-staging/regions/asia-east1', 1088 'selfLink': 'https://www.googleapis.com/compute/v1/projects/' 1089 'google.com:android-build-staging/zones/asia-east1-c', 1090 'status': 'UP' 1091 }, { 1092 'creationTimestamp': '2014-05-30T18:35:16.575-07:00', 1093 'description': 'asia-east1-b', 1094 'id': '2221', 1095 'kind': 'compute#zone', 1096 'name': 'asia-east1-b', 1097 'region': 'https://www.googleapis.com/compute/v1/projects/' 1098 'google.com:android-build-staging/regions/asia-east1', 1099 'selfLink': 'https://www.googleapis.com/compute/v1/projects' 1100 '/google.com:android-build-staging/zones/asia-east1-b', 1101 'status': 'UP' 1102 }] 1103 } 1104 See cloud cluster's api/mixer_zones.proto 1105 """ 1106 api = self.service.zones().list(project=self._project) 1107 return self.Execute(api) 1108 1109 def ListRegions(self): 1110 """List all the regions for a project. 1111 1112 Returns: 1113 A dictionary containing all the zones and additional data. See this link 1114 for the detailed response: 1115 https://cloud.google.com/compute/docs/reference/latest/regions/list. 1116 Example: 1117 { 1118 'items': [{ 1119 'name': 1120 'us-central1', 1121 'quotas': [{ 1122 'usage': 2.0, 1123 'limit': 24.0, 1124 'metric': 'CPUS' 1125 }, { 1126 'usage': 1.0, 1127 'limit': 23.0, 1128 'metric': 'IN_USE_ADDRESSES' 1129 }, { 1130 'usage': 209.0, 1131 'limit': 10240.0, 1132 'metric': 'DISKS_TOTAL_GB' 1133 }, { 1134 'usage': 1000.0, 1135 'limit': 20000.0, 1136 'metric': 'INSTANCES' 1137 }] 1138 },..] 1139 } 1140 """ 1141 api = self.service.regions().list(project=self._project) 1142 return self.Execute(api) 1143 1144 def _GetNetworkArgs(self, network, zone): 1145 """Helper to generate network args that is used to create an instance. 1146 1147 Args: 1148 network: A string, e.g. "default". 1149 zone: String, representing zone name, e.g. "us-central1-f" 1150 1151 Returns: 1152 A dictionary representing network args. 1153 """ 1154 network_args = { 1155 "network": self.GetNetworkUrl(network), 1156 "accessConfigs": [{ 1157 "name": "External NAT", 1158 "type": "ONE_TO_ONE_NAT" 1159 }] 1160 } 1161 # default network can be blank or set to default, we don't need to 1162 # specify the subnetwork for that. 1163 if network and network != "default": 1164 network_args["subnetwork"] = self.GetSubnetworkUrl(network, zone) 1165 return network_args 1166 1167 def _GetDiskArgs(self, 1168 disk_name, 1169 image_name, 1170 image_project=None, 1171 disk_size_gb=None): 1172 """Helper to generate disk args that is used to create an instance. 1173 1174 Args: 1175 disk_name: A string 1176 image_name: A string 1177 image_project: A string 1178 disk_size_gb: An integer 1179 1180 Returns: 1181 List holding dict of disk args. 1182 """ 1183 args = copy.deepcopy(BASE_DISK_ARGS) 1184 args["initializeParams"] = { 1185 "diskName": disk_name, 1186 "sourceImage": self.GetImage(image_name, 1187 image_project)["selfLink"], 1188 } 1189 # TODO: Remove this check once it's validated that we can either pass in 1190 # a None diskSizeGb or we find an appropriate default val. 1191 if disk_size_gb: 1192 args["diskSizeGb"] = disk_size_gb 1193 return [args] 1194 1195 def _GetExtraDiskArgs(self, extra_disk_name, zone): 1196 """Get extra disk arg for given disk. 1197 1198 Args: 1199 extra_disk_name: String, name of the disk. 1200 zone: String, representing zone name, e.g. "us-central1-f" 1201 1202 Returns: 1203 A dictionary of disk args. 1204 """ 1205 return [{ 1206 "type": "PERSISTENT", 1207 "mode": "READ_WRITE", 1208 "source": "projects/%s/zones/%s/disks/%s" % (self._project, zone, 1209 extra_disk_name), 1210 "autoDelete": True, 1211 "boot": False, 1212 "interface": "SCSI", 1213 "deviceName": extra_disk_name, 1214 }] 1215 1216 # pylint: disable=too-many-locals 1217 def CreateInstance(self, 1218 instance, 1219 image_name, 1220 machine_type, 1221 metadata, 1222 network, 1223 zone, 1224 disk_args=None, 1225 image_project=None, 1226 gpu=None, 1227 extra_disk_name=None, 1228 extra_scopes=None, 1229 tags=None): 1230 """Create a gce instance with a gce image. 1231 1232 Args: 1233 instance: String, instance name. 1234 image_name: String, source image used to create this disk. 1235 machine_type: String, representing machine_type, 1236 e.g. "n1-standard-1" 1237 metadata: Dict, maps a metadata name to its value. 1238 network: String, representing network name, e.g. "default" 1239 zone: String, representing zone name, e.g. "us-central1-f" 1240 disk_args: A list of extra disk args (strings), see _GetDiskArgs 1241 for example, if None, will create a disk using the given 1242 image. 1243 image_project: String, name of the project where the image 1244 belongs. Assume the default project if None. 1245 gpu: String, type of gpu to attach. e.g. "nvidia-tesla-k80", if 1246 None no gpus will be attached. For more details see: 1247 https://cloud.google.com/compute/docs/gpus/add-gpus 1248 extra_disk_name: String,the name of the extra disk to attach. 1249 extra_scopes: A list of extra scopes to be provided to the instance. 1250 tags: A list of tags to associate with the instance. e.g. 1251 ["http-server", "https-server"] 1252 """ 1253 disk_args = (disk_args 1254 or self._GetDiskArgs(instance, image_name, image_project)) 1255 if extra_disk_name: 1256 disk_args.extend(self._GetExtraDiskArgs(extra_disk_name, zone)) 1257 1258 scopes = [] 1259 scopes.extend(self.DEFAULT_INSTANCE_SCOPE) 1260 if extra_scopes: 1261 scopes.extend(extra_scopes) 1262 1263 # Add labels for giving the instances ability to be filter for 1264 # acloud list/delete cmds. 1265 body = { 1266 "machineType": self.GetMachineType(machine_type, zone)["selfLink"], 1267 "name": instance, 1268 "networkInterfaces": [self._GetNetworkArgs(network, zone)], 1269 "disks": disk_args, 1270 "labels": {constants.LABEL_CREATE_BY: getpass.getuser()}, 1271 "serviceAccounts": [{ 1272 "email": "default", 1273 "scopes": scopes, 1274 }], 1275 } 1276 1277 if tags: 1278 body["tags"] = {"items": tags} 1279 if gpu: 1280 body["guestAccelerators"] = [{ 1281 "acceleratorType": self.GetAcceleratorUrl(gpu, zone), 1282 "acceleratorCount": 1 1283 }] 1284 # Instances with GPUs cannot live migrate because they are assigned 1285 # to specific hardware devices. 1286 body["scheduling"] = {"onHostMaintenance": "terminate"} 1287 if metadata: 1288 metadata_list = [{ 1289 _METADATA_KEY: key, 1290 _METADATA_KEY_VALUE: val 1291 } for key, val in six.iteritems(metadata)] 1292 body[_METADATA] = {_ITEMS: metadata_list} 1293 logger.info("Creating instance: project %s, zone %s, body:%s", 1294 self._project, zone, body) 1295 api = self.service.instances().insert( 1296 project=self._project, zone=zone, body=body) 1297 operation = self.Execute(api) 1298 self.WaitOnOperation( 1299 operation, operation_scope=OperationScope.ZONE, scope_name=zone) 1300 logger.info("Instance %s has been created.", instance) 1301 1302 def DeleteInstance(self, instance, zone): 1303 """Delete a gce instance. 1304 1305 Args: 1306 instance: A string, instance name. 1307 zone: A string, e.g. "us-central1-f" 1308 """ 1309 logger.info("Deleting instance: %s", instance) 1310 api = self.service.instances().delete( 1311 project=self._project, zone=zone, instance=instance) 1312 operation = self.Execute(api) 1313 self.WaitOnOperation( 1314 operation, operation_scope=OperationScope.ZONE, scope_name=zone) 1315 logger.info("Deleted instance: %s", instance) 1316 1317 def DeleteInstances(self, instances, zone): 1318 """Delete multiple instances. 1319 1320 Args: 1321 instances: A list of instance names. 1322 zone: A string, e.g. "us-central1-f". 1323 1324 Returns: 1325 A tuple, (deleted, failed, error_msgs) 1326 deleted: A list of names of instances that have been deleted. 1327 failed: A list of names of instances that we fail to delete. 1328 error_msgs: A list of failure messages. 1329 """ 1330 action = functools.partial( 1331 self.service.instances().delete, project=self._project, zone=zone) 1332 return self._BatchExecuteOnInstances(instances, zone, action) 1333 1334 def ResetInstance(self, instance, zone): 1335 """Reset the gce instance. 1336 1337 Args: 1338 instance: A string, instance name. 1339 zone: A string, e.g. "us-central1-f". 1340 """ 1341 logger.info("Resetting instance: %s", instance) 1342 api = self.service.instances().reset( 1343 project=self._project, zone=zone, instance=instance) 1344 operation = self.Execute(api) 1345 self.WaitOnOperation( 1346 operation, operation_scope=OperationScope.ZONE, scope_name=zone) 1347 logger.info("Instance has been reset: %s", instance) 1348 1349 def GetMachineType(self, machine_type, zone): 1350 """Get URL for a given machine typle. 1351 1352 Args: 1353 machine_type: A string, name of the machine type. 1354 zone: A string, e.g. "us-central1-f" 1355 1356 Returns: 1357 A machine type resource in json. 1358 https://cloud.google.com/compute/docs/reference/latest/ 1359 machineTypes#resource 1360 """ 1361 api = self.service.machineTypes().get( 1362 project=self._project, zone=zone, machineType=machine_type) 1363 return self.Execute(api) 1364 1365 def GetAcceleratorUrl(self, accelerator_type, zone): 1366 """Get URL for a given type of accelator. 1367 1368 Args: 1369 accelerator_type: A string, representing the accelerator, e.g 1370 "nvidia-tesla-k80" 1371 zone: A string representing a zone, e.g. "us-west1-b" 1372 1373 Returns: 1374 A URL that points to the accelerator resource, e.g. 1375 https://www.googleapis.com/compute/v1/projects/<project id>/zones/ 1376 us-west1-b/acceleratorTypes/nvidia-tesla-k80 1377 """ 1378 api = self.service.acceleratorTypes().get( 1379 project=self._project, zone=zone, acceleratorType=accelerator_type) 1380 result = self.Execute(api) 1381 return result["selfLink"] 1382 1383 def GetNetworkUrl(self, network): 1384 """Get URL for a given network. 1385 1386 Args: 1387 network: A string, representing network name, e.g "default" 1388 1389 Returns: 1390 A URL that points to the network resource, e.g. 1391 https://www.googleapis.com/compute/v1/projects/<project id>/ 1392 global/networks/default 1393 """ 1394 api = self.service.networks().get( 1395 project=self._project, network=network) 1396 result = self.Execute(api) 1397 return result["selfLink"] 1398 1399 def GetSubnetworkUrl(self, network, zone): 1400 """Get URL for a given network and zone. 1401 1402 Return the subnetwork for the network in the specified region that the 1403 specified zone resides in. If there is no subnetwork for the specified 1404 zone, raise an exception. 1405 1406 Args: 1407 network: A string, representing network name, e.g "default" 1408 zone: String, representing zone name, e.g. "us-central1-f" 1409 1410 Returns: 1411 A URL that points to the network resource, e.g. 1412 https://www.googleapis.com/compute/v1/projects/<project id>/ 1413 global/networks/default 1414 1415 Raises: 1416 errors.NoSubnetwork: When no subnetwork exists for the zone 1417 specified. 1418 """ 1419 api = self.service.networks().get( 1420 project=self._project, network=network) 1421 result = self.Execute(api) 1422 region = zone.rsplit("-", 1)[0] 1423 for subnetwork in result.get("subnetworks", []): 1424 if region in subnetwork: 1425 return subnetwork 1426 raise errors.NoSubnetwork("No subnetwork for network %s in region %s" % 1427 (network, region)) 1428 1429 def CompareMachineSize(self, machine_type_1, machine_type_2, zone): 1430 """Compare the size of two machine types. 1431 1432 Args: 1433 machine_type_1: A string representing a machine type, e.g. n1-standard-1 1434 machine_type_2: A string representing a machine type, e.g. n1-standard-1 1435 zone: A string representing a zone, e.g. "us-central1-f" 1436 1437 Returns: 1438 -1 if any metric of machine size of the first type is smaller than 1439 the second type. 1440 0 if all metrics of machine size are equal. 1441 1 if at least one metric of machine size of the first type is 1442 greater than the second type and all metrics of first type are 1443 greater or equal to the second type. 1444 1445 Raises: 1446 errors.DriverError: For malformed response. 1447 """ 1448 machine_info_1 = self.GetMachineType(machine_type_1, zone) 1449 machine_info_2 = self.GetMachineType(machine_type_2, zone) 1450 result = 0 1451 for metric in self.MACHINE_SIZE_METRICS: 1452 if metric not in machine_info_1 or metric not in machine_info_2: 1453 raise errors.DriverError( 1454 "Malformed machine size record: Can't find '%s' in %s or %s" 1455 % (metric, machine_info_1, machine_info_2)) 1456 cmp_result = machine_info_1[metric] - machine_info_2[metric] 1457 if cmp_result < 0: 1458 return -1 1459 if cmp_result > 0: 1460 result = 1 1461 return result 1462 1463 def GetSerialPortOutput(self, instance, zone, port=1): 1464 """Get serial port output. 1465 1466 Args: 1467 instance: string, instance name. 1468 zone: string, zone name. 1469 port: int, which COM port to read from, 1-4, default to 1. 1470 1471 Returns: 1472 String, contents of the output. 1473 1474 Raises: 1475 errors.DriverError: For malformed response. 1476 """ 1477 api = self.service.instances().getSerialPortOutput( 1478 project=self._project, zone=zone, instance=instance, port=port) 1479 result = self.Execute(api) 1480 if "contents" not in result: 1481 raise errors.DriverError( 1482 "Malformed response for GetSerialPortOutput: %s" % result) 1483 return result["contents"] 1484 1485 def GetInstanceNamesByIPs(self, ips): 1486 """Get Instance names by IPs. 1487 1488 This function will go through all instances, which 1489 could be slow if there are too many instances. However, currently 1490 GCE doesn't support search for instance by IP. 1491 1492 Args: 1493 ips: A set of IPs. 1494 1495 Returns: 1496 A dictionary where key is IP and value is instance name or None 1497 if instance is not found for the given IP. 1498 """ 1499 ip_name_map = dict.fromkeys(ips) 1500 for instance in self.ListInstances(): 1501 try: 1502 ip = instance["networkInterfaces"][0]["accessConfigs"][0][ 1503 "natIP"] 1504 if ip in ips: 1505 ip_name_map[ip] = instance["name"] 1506 except (IndexError, KeyError) as e: 1507 logger.error("Could not get instance names by ips: %s", str(e)) 1508 return ip_name_map 1509 1510 def GetInstanceIP(self, instance, zone): 1511 """Get Instance IP given instance name. 1512 1513 Args: 1514 instance: String, representing instance name. 1515 zone: String, name of the zone. 1516 1517 Returns: 1518 ssh.IP object, that stores internal and external ip of the instance. 1519 """ 1520 instance = self.GetInstance(instance, zone) 1521 internal_ip = instance["networkInterfaces"][0]["networkIP"] 1522 external_ip = instance["networkInterfaces"][0]["accessConfigs"][0]["natIP"] 1523 return IP(internal=internal_ip, external=external_ip) 1524 1525 @utils.TimeExecute(function_description="Updating instance metadata: ") 1526 def SetInstanceMetadata(self, zone, instance, body): 1527 """Set instance metadata. 1528 1529 Args: 1530 zone: String, name of zone. 1531 instance: String, representing instance name. 1532 body: Dict, Metadata body. 1533 metdata is in the following format. 1534 { 1535 "kind": "compute#metadata", 1536 "fingerprint": "a-23icsyx4E=", 1537 "items": [ 1538 { 1539 "key": "sshKeys", 1540 "value": "key" 1541 }, ... 1542 ] 1543 } 1544 """ 1545 api = self.service.instances().setMetadata( 1546 project=self._project, zone=zone, instance=instance, body=body) 1547 operation = self.Execute(api) 1548 self.WaitOnOperation( 1549 operation, operation_scope=OperationScope.ZONE, scope_name=zone) 1550 1551 def AddSshRsaInstanceMetadata(self, user, ssh_rsa_path, instance): 1552 """Add the public rsa key to the instance's metadata. 1553 1554 Confirm that the instance has this public key in the instance's 1555 metadata, if not we will add this public key. 1556 1557 Args: 1558 user: String, name of the user which the key belongs to. 1559 ssh_rsa_path: String, The absolute path to public rsa key. 1560 instance: String, representing instance name. 1561 """ 1562 ssh_rsa_path = os.path.expanduser(ssh_rsa_path) 1563 rsa = GetRsaKey(ssh_rsa_path) 1564 entry = "%s:%s" % (user, rsa) 1565 logger.debug("New RSA entry: %s", entry) 1566 1567 zone = self.GetZoneByInstance(instance) 1568 gce_instance = self.GetInstance(instance, zone) 1569 metadata = gce_instance.get(_METADATA) 1570 if RsaNotInMetadata(metadata, entry): 1571 self.UpdateRsaInMetadata(zone, instance, metadata, entry) 1572 1573 def GetZoneByInstance(self, instance): 1574 """Get the zone from instance name. 1575 1576 Gcompute response instance. For example: 1577 { 1578 'items': 1579 { 1580 'zones/europe-west3-b': 1581 { 1582 'warning': 1583 { 1584 'message': "There are no results for scope 1585 'zones/europe-west3-b' on this page.", 1586 'code': 'NO_RESULTS_ON_PAGE', 1587 'data': [{'value': u'zones/europe-west3-b', 1588 'key': u'scope'}] 1589 } 1590 }, 1591 'zones/asia-east1-b': 1592 { 1593 'instances': [ 1594 { 1595 'name': 'ins-bc212dc8-userbuild-aosp-cf-x86-64-phone' 1596 'status': 'RUNNING', 1597 'cpuPlatform': 'Intel Broadwell', 1598 'startRestricted': False, 1599 'labels': {u'created_by': u'herbertxue'}, 1600 'name': 'ins-bc212dc8-userbuild-aosp-cf-x86-64-phone', 1601 ... 1602 }] 1603 } 1604 } 1605 } 1606 1607 Args: 1608 instance: String, representing instance name. 1609 1610 Raises: 1611 errors.GetGceZoneError: Can't get zone from instance name. 1612 1613 Returns: 1614 String of zone name. 1615 """ 1616 api = self.service.instances().aggregatedList( 1617 project=self._project, 1618 filter="name=%s" % instance) 1619 response = self.Execute(api) 1620 for zone, instance_data in response["items"].items(): 1621 if "instances" in instance_data: 1622 zone_match = _ZONE_RE.match(zone) 1623 if zone_match: 1624 return zone_match.group("zone") 1625 raise errors.GetGceZoneError("Can't get zone from the instance name %s" 1626 % instance) 1627 1628 def GetZonesByInstances(self, instances): 1629 """Get the zone from instance name. 1630 1631 Args: 1632 instances: List of strings, representing instance names. 1633 1634 Returns: 1635 A dictionary that contains the name of all instances in the zone. 1636 The key is the name of the zone, and the value is a list contains 1637 the name of the instances. 1638 """ 1639 zone_instances = {} 1640 for instance in instances: 1641 zone = self.GetZoneByInstance(instance) 1642 if zone in zone_instances: 1643 zone_instances[zone].append(instance) 1644 else: 1645 zone_instances[zone] = [instance] 1646 return zone_instances 1647 1648 def CheckAccess(self): 1649 """Check if the user has read access to the cloud project. 1650 1651 Returns: 1652 True if the user has at least read access to the project. 1653 False otherwise. 1654 1655 Raises: 1656 errors.HttpError if other unexpected error happens when 1657 accessing the project. 1658 """ 1659 api = self.service.zones().list(project=self._project) 1660 retry_http_codes = copy.copy(self.RETRY_HTTP_CODES) 1661 retry_http_codes.remove(self.ACCESS_DENIED_CODE) 1662 try: 1663 self.Execute(api, retry_http_codes=retry_http_codes) 1664 except errors.HttpError as e: 1665 if e.code == self.ACCESS_DENIED_CODE: 1666 return False 1667 raise 1668 return True 1669 1670 def UpdateRsaInMetadata(self, zone, instance, metadata, entry): 1671 """Update ssh public key to sshKeys's value in this metadata. 1672 1673 Args: 1674 zone: String, name of zone. 1675 instance: String, representing instance name. 1676 metadata: Dict, maps a metadata name to its value. 1677 entry: String, ssh public key. 1678 """ 1679 ssh_key_item = GetSshKeyFromMetadata(metadata) 1680 if ssh_key_item: 1681 # The ssh key exists in the metadata so update the reference to it 1682 # in the metadata. There may not be an actual ssh key value so 1683 # that's why we filter for None to avoid an empty line in front. 1684 ssh_key_item[_METADATA_KEY_VALUE] = "\n".join( 1685 list(filter(None, [ssh_key_item[_METADATA_KEY_VALUE], entry]))) 1686 else: 1687 # Since there is no ssh key item in the metadata, we need to add it in. 1688 ssh_key_item = {_METADATA_KEY: _SSH_KEYS_NAME, 1689 _METADATA_KEY_VALUE: entry} 1690 metadata[_ITEMS].append(ssh_key_item) 1691 utils.PrintColorString( 1692 "Ssh public key doesn't exist in the instance(%s), adding it." 1693 % instance, utils.TextColors.WARNING) 1694 self.SetInstanceMetadata(zone, instance, metadata) 1695 1696 1697def RsaNotInMetadata(metadata, entry): 1698 """Check ssh public key exist in sshKeys's value. 1699 1700 Args: 1701 metadata: Dict, maps a metadata name to its value. 1702 entry: String, ssh public key. 1703 1704 Returns: 1705 Boolean. True if ssh public key doesn't exist in metadata. 1706 """ 1707 for item in metadata.setdefault(_ITEMS, []): 1708 if item[_METADATA_KEY] == _SSH_KEYS_NAME: 1709 if entry in item[_METADATA_KEY_VALUE]: 1710 return False 1711 return True 1712 1713 1714def GetSshKeyFromMetadata(metadata): 1715 """Get ssh key item from metadata. 1716 1717 Args: 1718 metadata: Dict, maps a metadata name to its value. 1719 1720 Returns: 1721 Dict of ssk_key_item in metadata, None if can't find the ssh key item 1722 in metadata. 1723 """ 1724 for item in metadata.setdefault(_ITEMS, []): 1725 if item.get(_METADATA_KEY, '') == _SSH_KEYS_NAME: 1726 return item 1727 return None 1728 1729 1730def GetRsaKey(ssh_rsa_path): 1731 """Get rsa key from rsa path. 1732 1733 Args: 1734 ssh_rsa_path: String, The absolute path to public rsa key. 1735 1736 Returns: 1737 String, rsa key. 1738 1739 Raises: 1740 errors.DriverError: RSA file does not exist. 1741 """ 1742 ssh_rsa_path = os.path.expanduser(ssh_rsa_path) 1743 if not os.path.exists(ssh_rsa_path): 1744 raise errors.DriverError( 1745 "RSA file %s does not exist." % ssh_rsa_path) 1746 1747 with open(ssh_rsa_path) as f: 1748 rsa = f.read() 1749 # The space must be removed here for string processing, 1750 # if it is not string, it doesn't have a strip function. 1751 rsa = rsa.strip() if rsa else rsa 1752 utils.VerifyRsaPubKey(rsa) 1753 return rsa 1754