1#!/usr/bin/env python3 2# 3# Copyright (C) 2020 The Android Open Source Project 4# 5# Licensed under the Apache License, Version 2.0 (the "License"); 6# you may not use this file except in compliance with the License. 7# You may obtain a copy of the License at 8# 9# http://www.apache.org/licenses/LICENSE-2.0 10# 11# Unless required by applicable law or agreed to in writing, software 12# distributed under the License is distributed on an "AS IS" BASIS, 13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14# See the License for the specific language governing permissions and 15# limitations under the License. 16"""Fetch a Rust package from crates.io. 17 18Usage: get_rust_pkg.py -v syn-1.0.7 19Get the package syn-1.0.7 from crates.io and untar it into ./syn-1.0.7. 20 21Usage: get_rust_pkg.py -v -o tmp syn 22Get the latest version of package syn, say 1.0.17, 23and untar it into tmp/syn-1.0.17. 24 25Usage: get_rust_pkg.py -show bindgen cxx 26Count dependent packages of bindgen and cxx. 27 28When downloading a package, its target directory should not exist, 29or the download will be skipped. 30""" 31 32import argparse 33import functools 34import json 35import os 36import re 37import shutil 38import sys 39import tarfile 40import tempfile 41import urllib.request 42 43PKG_VERSION_PATTERN = r"(.*)-([0-9]+\.[0-9]+\.[0-9]+.*)" 44 45PKG_VERSION_MATCHER = re.compile(PKG_VERSION_PATTERN) 46 47VERSION_PATTERN = r"([0-9]+)\.([0-9]+)\.([0-9]+)" 48 49VERSION_MATCHER = re.compile(VERSION_PATTERN) 50 51 52def parse_args(): 53 """Parse main arguments.""" 54 parser = argparse.ArgumentParser("get_rust_pkg") 55 parser.add_argument( 56 "-v", action="store_true", default=False, help="echo executed commands") 57 parser.add_argument( 58 "-o", metavar="out_dir", default=".", help="output directory") 59 parser.add_argument( 60 "-show", "--show", 61 action="store_true", 62 default=False, 63 help="show all default dependent packages, using crates.io api") 64 parser.add_argument( 65 dest="pkgs", 66 metavar="pkg_name", 67 nargs="+", 68 help="name of Rust package to be fetched from crates.io") 69 return parser.parse_args() 70 71 72def set2list(a_set): 73 return (" " + " ".join(sorted(a_set))) if a_set else "" 74 75 76def echo(args, msg): 77 if args.v: 78 print("INFO: {}".format(msg), flush=True) 79 80 81def echo_all_deps(args, kind, deps): 82 if args.v and deps: 83 print("INFO: now {} in {}:{}".format(len(deps), kind, set2list(deps))) 84 85 86def pkg_base_name(pkg): 87 match = PKG_VERSION_MATCHER.match(pkg) 88 if match is not None: 89 return (match.group(1), match.group(2)) 90 else: 91 return (pkg, "") 92 93 94def get_version_numbers(version): 95 match = VERSION_MATCHER.match(version) 96 if match is not None: 97 return tuple(int(match.group(i)) for i in range(1, 4)) 98 return (0, 0, 0) 99 100 101def is_newer_version(args, prev_version, prev_id, check_version, check_id): 102 """Return true if check_version+id is newer than prev_version+id.""" 103 echo(args, "checking version={} id={}".format(check_version, check_id)) 104 return ((get_version_numbers(check_version), check_id) > 105 (get_version_numbers(prev_version), prev_id)) 106 107 108def get_max_version(pkg): 109 """Ask crates.io for a pkg's latest version.""" 110 url = "https://crates.io/api/v1/crates/" + pkg 111 with urllib.request.urlopen(url) as request: 112 data = json.loads(request.read().decode()) 113 return data["crate"]["max_version"] 114 115 116def find_dl_path(args, name): 117 """Ask crates.io for the latest version download path.""" 118 base_name, version = pkg_base_name(name) 119 if not version: 120 version = get_max_version(name) 121 url = "https://crates.io/api/v1/crates/{}/{}".format(base_name, version) 122 echo(args, "try to get dl_path from {}".format(url)) 123 with urllib.request.urlopen(url) as request: 124 data = json.loads(request.read().decode()) 125 if "version" not in data or "dl_path" not in data["version"]: 126 print("ERROR: cannot find version {} of package {}".format( 127 version, base_name)) 128 return None 129 echo(args, "found download path for version {}".format(version)) 130 return data["version"]["dl_path"] 131 132 133def fetch_pkg(args, pkg, dl_path): 134 """Fetch package from crates.io and untar it into a subdirectory.""" 135 if not dl_path: 136 print("ERROR: cannot find download path for '{}'".format(pkg)) 137 return False 138 url = "https://crates.io" + dl_path 139 tmp_dir = tempfile.mkdtemp() 140 echo(args, "fetch tar file from {}".format(url)) 141 tar_file, _ = urllib.request.urlretrieve(url) 142 with tarfile.open(tar_file, mode="r") as tfile: 143 echo(args, "extract tar file {} into {}".format(tar_file, tmp_dir)) 144 tfile.extractall(tmp_dir) 145 files = os.listdir(tmp_dir) 146 # There should be only one directory in the tar file, 147 # but it might not be (name + "-" + version) 148 pkg_tmp_dir = os.path.join(tmp_dir, files[0]) 149 echo(args, "untared package in {}".format(pkg_tmp_dir)) 150 dest_dir = os.path.join(args.o, files[0]) 151 if os.path.exists(dest_dir): 152 print("ERROR: do not overwrite existing {}".format(dest_dir)) 153 return False # leave tar_file and tmp_dir 154 else: 155 echo(args, "move {} to {}".format(pkg_tmp_dir, dest_dir)) 156 shutil.move(pkg_tmp_dir, dest_dir) 157 echo(args, "delete downloaded tar file {}".format(tar_file)) 158 os.remove(tar_file) 159 echo(args, "delete temp directory {}".format(tmp_dir)) 160 shutil.rmtree(tmp_dir) 161 print("SUCCESS: downloaded package to '{}'".format(dest_dir)) 162 return True 163 164 165def get_crate_dependencies(args, pkg): 166 """Ask crates.io for pkg's dependencies.""" 167 echo(args, "Ask crates.io for {} ...".format(pkg)) 168 try: 169 url = "https://crates.io/api/v1/crates/{}/{}/dependencies".format( 170 pkg, get_max_version(pkg)) 171 with urllib.request.urlopen(url) as request: 172 data = json.loads(request.read().decode()) 173 except urllib.error.HTTPError: 174 print("ERROR: failed to find {}".format(pkg)) 175 return False, None, None 176 build_deps = set() 177 dev_deps = set() 178 for crate in data["dependencies"]: 179 if not crate["optional"]: # some package has a lot of optional features 180 # dev_deps is a super set of build_deps 181 dev_deps.add(crate["crate_id"]) 182 if crate["kind"] != "dev": 183 build_deps.add(crate["crate_id"]) 184 return True, build_deps, dev_deps 185 186 187def compare_pkg_deps(pkg1, pkg2): 188 """Compare dependency order of pkg1 and pkg2.""" 189 base1, build_deps1, dev_deps1 = pkg1 190 base2, build_deps2, dev_deps2 = pkg2 191 # Some pkg1 can be build-dependent (non-dev-dependent) on pkg2, 192 # when pkg2 is only test-dependent (dev-dependent) on pkg1. 193 # This is not really a build dependency cycle, because pkg2 194 # can be built before pkg1, but tested after pkg1. 195 # So the dependency order is based on build_deps first, and then dev_deps. 196 if base1 in build_deps2: 197 return -1 # pkg2 needs base1 198 if base2 in build_deps1: 199 return 1 # pkg1 needs base2 200 if base1 in dev_deps2: 201 return -1 # pkg2 needs base1 202 if base2 in dev_deps1: 203 return 1 # pkg1 needs base2 204 # If there is no dependency between pkg1 and pkg2, 205 # order them by the size of build_deps or dev_deps, or the name. 206 count1 = (len(build_deps1), len(dev_deps1), base1) 207 count2 = (len(build_deps2), len(dev_deps2), base2) 208 if count1 != count2: 209 return -1 if count1 < count2 else 1 210 return 0 211 212 213def sort_found_pkgs(tuples): 214 """A topological sort of tuples based on build_deps.""" 215 # tuples is a list of (base_name, build_deps, dev_deps) 216 217 # Use build_deps as the dependency relation in a topological sort. 218 # The new_tuples list is used in topological sort. It is the input tuples 219 # prefixed with a changing build_deps during the sorting process. 220 # Collect all package base names. 221 # Dependent packages not found in all_base_names will be treated as 222 # "external" and ignored in topological sort. 223 all_base_names = set(map(lambda t: t[0], tuples)) 224 new_tuples = [] 225 all_names = set() 226 for (base_name, build_deps, dev_deps) in tuples: 227 new_tuples.append((build_deps, (base_name, build_deps, dev_deps))) 228 all_names = all_names.union(build_deps) 229 external_names = all_names.difference(all_base_names) 230 new_tuples = list( 231 map(lambda t: (t[0].difference(external_names), t[1]), new_tuples)) 232 233 sorted_tuples = [] 234 # A brute force topological sort; 235 # tuples with empty build_deps are put before the others. 236 while new_tuples: 237 first_group = list(filter(lambda t: not t[0], new_tuples)) 238 other_group = list(filter(lambda t: t[0], new_tuples)) 239 new_tuples = [] 240 if first_group: 241 # Remove the extra build_deps in first_group, 242 # then sort it, and add its tuples to the sorted_tuples list. 243 first_group = list(map(lambda t: t[1], first_group)) 244 first_group.sort(key=functools.cmp_to_key(compare_pkg_deps)) 245 sorted_tuples.extend(first_group) 246 # Copy other_group to new_tuples but remove names in the first_group. 247 base_names = set(map(lambda t: t[0], first_group)) 248 new_tuples = list( 249 map(lambda t: (t[0].difference(base_names), t[1]), other_group)) 250 else: 251 # There is a bug, or a cycle in the build_deps. 252 # If we include all optional dependent packages into build_deps, 253 # we will see one cycle: futures-util has an optional dependent 254 # on futures, which has a normal dependent on futures-util. 255 print("ERROR: leftover in topological sort: {}".format( 256 list(map(lambda t: t[1][1], other_group)))) 257 # Anyway, sort the other_group to include them into final report. 258 other_group = list(map(lambda t: t[1], other_group)) 259 other_group.sort(key=functools.cmp_to_key(compare_pkg_deps)) 260 sorted_tuples.extend(other_group) 261 return sorted_tuples 262 263 264def show_all_dependencies(args, found_pkgs): 265 """Topological sort found_pkgs and report number of dependent packages.""" 266 found_pkgs = sort_found_pkgs(found_pkgs) 267 max_length = functools.reduce(lambda a, t: max(a, len(t[0])), found_pkgs, 1) 268 name_format = "{:" + str(max_length) + "s}" 269 print("\n##### Summary of all dependent package counts #####") 270 pattern = "{:>9s}_deps[k] = # of {}dependent packages of {}" 271 for (prefix, suffix) in [(" ", "pkg[k]"), ("all_", "pkg[1] to pkg[k]")]: 272 for (name, kind) in [("build", "non-dev-"), ("dev", "all ")]: 273 print(pattern.format(prefix + name, kind, suffix)) 274 print(("{:>4s} " + name_format + " {:>10s} {:>10s} {:>14s} {:>14s}").format( 275 "k", "pkg", "build_deps", "dev_deps", "all_build_deps", "all_dev_deps")) 276 all_pkgs = set() 277 all_build_deps = set() 278 all_dev_deps = set() 279 k = 0 280 prev_all_build_deps = set() 281 prev_all_dev_deps = set() 282 for (pkg, build_deps, dev_deps) in found_pkgs: 283 all_pkgs.add(pkg) 284 all_build_deps = all_build_deps.union(build_deps).difference(all_pkgs) 285 all_dev_deps = all_dev_deps.union(dev_deps).difference(all_pkgs) 286 k += 1 287 print(("{:4d} " + name_format + " {:10d} {:10d} {:14d} {:14d}").format( 288 k, pkg, len(build_deps), len(dev_deps), len(all_build_deps), 289 len(all_dev_deps))) 290 if prev_all_build_deps != all_build_deps: 291 echo_all_deps(args, "all_build_deps", all_build_deps) 292 prev_all_build_deps = all_build_deps 293 if prev_all_dev_deps != all_dev_deps: 294 echo_all_deps(args, "all_dev_deps", all_dev_deps) 295 prev_all_dev_deps = all_dev_deps 296 print("\nNOTE: from all {} package(s):{}".format( 297 len(all_pkgs), set2list(all_pkgs))) 298 for (kind, deps) in [("non-dev-", all_build_deps), ("", all_dev_deps)]: 299 if deps: 300 print("NOTE: found {:3d} other {}dependent package(s):{}".format( 301 len(deps), kind, set2list(deps))) 302 303 304def crates_io_find_pkgs(args, pkgs, found_pkgs): 305 """Call crates.io api to find direct dependent packages.""" 306 success = True 307 for pkg in sorted(pkgs): 308 ok, build_deps, dev_deps = get_crate_dependencies(args, pkg) 309 if not ok: 310 success = False 311 else: 312 found_pkgs.append((pkg, build_deps, dev_deps)) 313 return success 314 315 316def add_non_dev_dependencies(args, all_deps, core_pkgs, visited, pkg): 317 """Add reachable non-dev dependencies to all_deps[pkg]'s dependencies.""" 318 if pkg not in all_deps: 319 ok, build_deps, dev_deps = get_crate_dependencies(args, pkg) 320 if not ok: 321 return set() 322 all_deps[pkg] = (pkg, build_deps, dev_deps) 323 else: 324 (_, build_deps, dev_deps) = all_deps[pkg] 325 if pkg in visited: 326 return build_deps 327 visited.add(pkg) 328 329 for p in sorted(build_deps): 330 if p not in core_pkgs and pkg in core_pkgs: 331 core_pkgs.add(p) 332 deps = add_non_dev_dependencies(args, all_deps, core_pkgs, visited, p) 333 build_deps = build_deps.union(deps) 334 if pkg in core_pkgs: 335 for p in sorted(dev_deps): 336 deps = add_non_dev_dependencies(args, all_deps, core_pkgs, visited, p) 337 dev_deps = dev_deps.union(deps) 338 all_deps[pkg] = (pkg, build_deps, dev_deps) 339 return build_deps 340 341 342def add_indirect_build_deps(args, found_pkgs): 343 """Add all indirect dependencies and return a new found_pkgs.""" 344 all_deps = dict(map(lambda t: (t[0], t), found_pkgs)) 345 core_pkgs = set(map(lambda t: t[0], found_pkgs)) 346 dev_pkgs = set() 347 dump_pkgs(args, "BEFORE", all_deps, core_pkgs, dev_pkgs) 348 visited = set() 349 for pkg in sorted(core_pkgs): 350 add_non_dev_dependencies(args, all_deps, core_pkgs, visited, pkg) 351 # Revisit core packages to add build_deps into core_pkgs and other deps. 352 check_again = True 353 while check_again: 354 pattern = "Revisiting {} core packages:{}" 355 echo(args, pattern.format(len(core_pkgs), set2list(core_pkgs))) 356 check_again = False 357 visited = set() 358 num_core_pkgs = len(core_pkgs) 359 for pkg in sorted(core_pkgs): 360 (_, build_deps, dev_deps) = all_deps[pkg] 361 (num_build_deps, num_dev_deps) = (len(build_deps), len(dev_deps)) 362 add_non_dev_dependencies(args, all_deps, core_pkgs, visited, pkg) 363 (_, build_deps, dev_deps) = all_deps[pkg] 364 (new_num_build_deps, new_num_dev_deps) = (len(build_deps), len(dev_deps)) 365 # If build_deps, dev_deps, or core_pkgs was changed, check again. 366 if (num_build_deps != new_num_build_deps or 367 num_dev_deps != new_num_dev_deps or num_core_pkgs != len(core_pkgs)): 368 check_again = True 369 dev_pkgs = visited.difference(core_pkgs) 370 dump_pkgs(args, "AFTER", all_deps, core_pkgs, dev_pkgs) 371 found_pkgs = list(map(lambda p: all_deps[p], core_pkgs)) 372 found_pkgs.sort(key=lambda t: t[0]) 373 return found_pkgs 374 375 376def echo_dump_found_pkgs(args, msg, all_deps, pkgs): 377 if not args.v or not pkgs: 378 return 379 echo(args, msg) 380 for pkg in sorted(pkgs): 381 (_, build_deps, dev_deps) = all_deps[pkg] 382 for (name, deps) in [("normal", build_deps), ("dev", dev_deps)]: 383 pattern = " {} has {} " + name + " deps:{}" 384 echo(args, pattern.format(pkg, len(deps), set2list(deps))) 385 386 387def dump_pkgs(args, msg, all_deps, core_pkgs, dev_pkgs): 388 echo_dump_found_pkgs(args, msg + " core_pkgs:", all_deps, core_pkgs) 389 echo_dump_found_pkgs(args, msg + " other_dev_pkgs:", all_deps, dev_pkgs) 390 391 392def show_dependencies(args): 393 """Calls crates.io api to find dependent packages; returns True on success.""" 394 all_pkgs = set(map(lambda p: pkg_base_name(p)[0], args.pkgs)) 395 if "" in all_pkgs: 396 # TODO(chh): detect and report ill formed names in args.pkgs 397 print("WARNING: skip some ill formatted pkg arguments.") 398 all_pkgs = all_pkgs.remove("") 399 if not all_pkgs: 400 print("ERROR: no valid pkg names to show dependencies.") 401 return False 402 pkgs = sorted(all_pkgs) 403 found_pkgs = [] 404 success = crates_io_find_pkgs(args, pkgs, found_pkgs) 405 if not found_pkgs: 406 return False 407 # All normal (non-dev) dependent packages will be added into found_pkgs. 408 found_pkgs = add_indirect_build_deps(args, found_pkgs) 409 show_all_dependencies(args, found_pkgs) 410 return success 411 412 413def main(): 414 args = parse_args() 415 if args.show: 416 # only show dependencies, not to fetch any package 417 if not show_dependencies(args): 418 sys.exit(2) 419 return 420 echo(args, "to fetch packages = {}".format(args.pkgs)) 421 success = True 422 for pkg in args.pkgs: 423 echo(args, "trying to fetch package '{}'".format(pkg)) 424 try: 425 if not fetch_pkg(args, pkg, find_dl_path(args, pkg)): 426 success = False 427 except urllib.error.HTTPError: 428 print("ERROR: unknown package '{}'".format(pkg)) 429 success = False 430 if not success: 431 sys.exit(1) 432 433 434if __name__ == "__main__": 435 main() 436