1#!/usr/bin/env python3
2#
3# Copyright (C) 2020 The Android Open Source Project
4#
5# Licensed under the Apache License, Version 2.0 (the "License");
6# you may not use this file except in compliance with the License.
7# You may obtain a copy of the License at
8#
9#      http://www.apache.org/licenses/LICENSE-2.0
10#
11# Unless required by applicable law or agreed to in writing, software
12# distributed under the License is distributed on an "AS IS" BASIS,
13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14# See the License for the specific language governing permissions and
15# limitations under the License.
16"""Fetch a Rust package from crates.io.
17
18Usage: get_rust_pkg.py -v syn-1.0.7
19Get the package syn-1.0.7 from crates.io and untar it into ./syn-1.0.7.
20
21Usage: get_rust_pkg.py -v -o tmp syn
22Get the latest version of package syn, say 1.0.17,
23and untar it into tmp/syn-1.0.17.
24
25Usage: get_rust_pkg.py -show bindgen cxx
26Count dependent packages of bindgen and cxx.
27
28When downloading a package, its target directory should not exist,
29or the download will be skipped.
30"""
31
32import argparse
33import functools
34import json
35import os
36import re
37import shutil
38import sys
39import tarfile
40import tempfile
41import urllib.request
42
43PKG_VERSION_PATTERN = r"(.*)-([0-9]+\.[0-9]+\.[0-9]+.*)"
44
45PKG_VERSION_MATCHER = re.compile(PKG_VERSION_PATTERN)
46
47VERSION_PATTERN = r"([0-9]+)\.([0-9]+)\.([0-9]+)"
48
49VERSION_MATCHER = re.compile(VERSION_PATTERN)
50
51
52def parse_args():
53  """Parse main arguments."""
54  parser = argparse.ArgumentParser("get_rust_pkg")
55  parser.add_argument(
56      "-v", action="store_true", default=False, help="echo executed commands")
57  parser.add_argument(
58      "-o", metavar="out_dir", default=".", help="output directory")
59  parser.add_argument(
60      "-show", "--show",
61      action="store_true",
62      default=False,
63      help="show all default dependent packages, using crates.io api")
64  parser.add_argument(
65      dest="pkgs",
66      metavar="pkg_name",
67      nargs="+",
68      help="name of Rust package to be fetched from crates.io")
69  return parser.parse_args()
70
71
72def set2list(a_set):
73  return (" " + " ".join(sorted(a_set))) if a_set else ""
74
75
76def echo(args, msg):
77  if args.v:
78    print("INFO: {}".format(msg), flush=True)
79
80
81def echo_all_deps(args, kind, deps):
82  if args.v and deps:
83    print("INFO: now {} in {}:{}".format(len(deps), kind, set2list(deps)))
84
85
86def pkg_base_name(pkg):
87  match = PKG_VERSION_MATCHER.match(pkg)
88  if match is not None:
89    return (match.group(1), match.group(2))
90  else:
91    return (pkg, "")
92
93
94def get_version_numbers(version):
95  match = VERSION_MATCHER.match(version)
96  if match is not None:
97    return tuple(int(match.group(i)) for i in range(1, 4))
98  return (0, 0, 0)
99
100
101def is_newer_version(args, prev_version, prev_id, check_version, check_id):
102  """Return true if check_version+id is newer than prev_version+id."""
103  echo(args, "checking version={}  id={}".format(check_version, check_id))
104  return ((get_version_numbers(check_version), check_id) >
105          (get_version_numbers(prev_version), prev_id))
106
107
108def get_max_version(pkg):
109  """Ask crates.io for a pkg's latest version."""
110  url = "https://crates.io/api/v1/crates/" + pkg
111  with urllib.request.urlopen(url) as request:
112    data = json.loads(request.read().decode())
113    return data["crate"]["max_version"]
114
115
116def find_dl_path(args, name):
117  """Ask crates.io for the latest version download path."""
118  base_name, version = pkg_base_name(name)
119  if not version:
120    version = get_max_version(name)
121  url = "https://crates.io/api/v1/crates/{}/{}".format(base_name, version)
122  echo(args, "try to get dl_path from {}".format(url))
123  with urllib.request.urlopen(url) as request:
124    data = json.loads(request.read().decode())
125    if "version" not in data or "dl_path" not in data["version"]:
126      print("ERROR: cannot find version {} of package {}".format(
127          version, base_name))
128      return None
129    echo(args, "found download path for version {}".format(version))
130    return data["version"]["dl_path"]
131
132
133def fetch_pkg(args, pkg, dl_path):
134  """Fetch package from crates.io and untar it into a subdirectory."""
135  if not dl_path:
136    print("ERROR: cannot find download path for '{}'".format(pkg))
137    return False
138  url = "https://crates.io" + dl_path
139  tmp_dir = tempfile.mkdtemp()
140  echo(args, "fetch tar file from {}".format(url))
141  tar_file, _ = urllib.request.urlretrieve(url)
142  with tarfile.open(tar_file, mode="r") as tfile:
143    echo(args, "extract tar file {} into {}".format(tar_file, tmp_dir))
144    tfile.extractall(tmp_dir)
145    files = os.listdir(tmp_dir)
146    # There should be only one directory in the tar file,
147    # but it might not be (name + "-" + version)
148    pkg_tmp_dir = os.path.join(tmp_dir, files[0])
149    echo(args, "untared package in {}".format(pkg_tmp_dir))
150    dest_dir = os.path.join(args.o, files[0])
151    if os.path.exists(dest_dir):
152      print("ERROR: do not overwrite existing {}".format(dest_dir))
153      return False  # leave tar_file and tmp_dir
154    else:
155      echo(args, "move {} to {}".format(pkg_tmp_dir, dest_dir))
156      shutil.move(pkg_tmp_dir, dest_dir)
157  echo(args, "delete downloaded tar file {}".format(tar_file))
158  os.remove(tar_file)
159  echo(args, "delete temp directory {}".format(tmp_dir))
160  shutil.rmtree(tmp_dir)
161  print("SUCCESS: downloaded package to '{}'".format(dest_dir))
162  return True
163
164
165def get_crate_dependencies(args, pkg):
166  """Ask crates.io for pkg's dependencies."""
167  echo(args, "Ask crates.io for {} ...".format(pkg))
168  try:
169    url = "https://crates.io/api/v1/crates/{}/{}/dependencies".format(
170        pkg, get_max_version(pkg))
171    with urllib.request.urlopen(url) as request:
172      data = json.loads(request.read().decode())
173  except urllib.error.HTTPError:
174    print("ERROR: failed to find {}".format(pkg))
175    return False, None, None
176  build_deps = set()
177  dev_deps = set()
178  for crate in data["dependencies"]:
179    if not crate["optional"]:  # some package has a lot of optional features
180      # dev_deps is a super set of build_deps
181      dev_deps.add(crate["crate_id"])
182      if crate["kind"] != "dev":
183        build_deps.add(crate["crate_id"])
184  return True, build_deps, dev_deps
185
186
187def compare_pkg_deps(pkg1, pkg2):
188  """Compare dependency order of pkg1 and pkg2."""
189  base1, build_deps1, dev_deps1 = pkg1
190  base2, build_deps2, dev_deps2 = pkg2
191  # Some pkg1 can be build-dependent (non-dev-dependent) on pkg2,
192  # when pkg2 is only test-dependent (dev-dependent) on pkg1.
193  # This is not really a build dependency cycle, because pkg2
194  # can be built before pkg1, but tested after pkg1.
195  # So the dependency order is based on build_deps first, and then dev_deps.
196  if base1 in build_deps2:
197    return -1  # pkg2 needs base1
198  if base2 in build_deps1:
199    return 1  # pkg1 needs base2
200  if base1 in dev_deps2:
201    return -1  # pkg2 needs base1
202  if base2 in dev_deps1:
203    return 1  # pkg1 needs base2
204  # If there is no dependency between pkg1 and pkg2,
205  # order them by the size of build_deps or dev_deps, or the name.
206  count1 = (len(build_deps1), len(dev_deps1), base1)
207  count2 = (len(build_deps2), len(dev_deps2), base2)
208  if count1 != count2:
209    return -1 if count1 < count2 else 1
210  return 0
211
212
213def sort_found_pkgs(tuples):
214  """A topological sort of tuples based on build_deps."""
215  # tuples is a list of (base_name, build_deps, dev_deps)
216
217  # Use build_deps as the dependency relation in a topological sort.
218  # The new_tuples list is used in topological sort. It is the input tuples
219  # prefixed with a changing build_deps during the sorting process.
220  # Collect all package base names.
221  # Dependent packages not found in all_base_names will be treated as
222  # "external" and ignored in topological sort.
223  all_base_names = set(map(lambda t: t[0], tuples))
224  new_tuples = []
225  all_names = set()
226  for (base_name, build_deps, dev_deps) in tuples:
227    new_tuples.append((build_deps, (base_name, build_deps, dev_deps)))
228    all_names = all_names.union(build_deps)
229  external_names = all_names.difference(all_base_names)
230  new_tuples = list(
231      map(lambda t: (t[0].difference(external_names), t[1]), new_tuples))
232
233  sorted_tuples = []
234  # A brute force topological sort;
235  # tuples with empty build_deps are put before the others.
236  while new_tuples:
237    first_group = list(filter(lambda t: not t[0], new_tuples))
238    other_group = list(filter(lambda t: t[0], new_tuples))
239    new_tuples = []
240    if first_group:
241      # Remove the extra build_deps in first_group,
242      # then sort it, and add its tuples to the sorted_tuples list.
243      first_group = list(map(lambda t: t[1], first_group))
244      first_group.sort(key=functools.cmp_to_key(compare_pkg_deps))
245      sorted_tuples.extend(first_group)
246      # Copy other_group to new_tuples but remove names in the first_group.
247      base_names = set(map(lambda t: t[0], first_group))
248      new_tuples = list(
249          map(lambda t: (t[0].difference(base_names), t[1]), other_group))
250    else:
251      # There is a bug, or a cycle in the build_deps.
252      # If we include all optional dependent packages into build_deps,
253      # we will see one cycle: futures-util has an optional dependent
254      # on futures, which has a normal dependent on futures-util.
255      print("ERROR: leftover in topological sort: {}".format(
256          list(map(lambda t: t[1][1], other_group))))
257      # Anyway, sort the other_group to include them into final report.
258      other_group = list(map(lambda t: t[1], other_group))
259      other_group.sort(key=functools.cmp_to_key(compare_pkg_deps))
260      sorted_tuples.extend(other_group)
261  return sorted_tuples
262
263
264def show_all_dependencies(args, found_pkgs):
265  """Topological sort found_pkgs and report number of dependent packages."""
266  found_pkgs = sort_found_pkgs(found_pkgs)
267  max_length = functools.reduce(lambda a, t: max(a, len(t[0])), found_pkgs, 1)
268  name_format = "{:" + str(max_length) + "s}"
269  print("\n##### Summary of all dependent package counts #####")
270  pattern = "{:>9s}_deps[k] = # of {}dependent packages of {}"
271  for (prefix, suffix) in [("    ", "pkg[k]"), ("all_", "pkg[1] to pkg[k]")]:
272    for (name, kind) in [("build", "non-dev-"), ("dev", "all ")]:
273      print(pattern.format(prefix + name, kind, suffix))
274  print(("{:>4s} " + name_format + " {:>10s} {:>10s} {:>14s} {:>14s}").format(
275      "k", "pkg", "build_deps", "dev_deps", "all_build_deps", "all_dev_deps"))
276  all_pkgs = set()
277  all_build_deps = set()
278  all_dev_deps = set()
279  k = 0
280  prev_all_build_deps = set()
281  prev_all_dev_deps = set()
282  for (pkg, build_deps, dev_deps) in found_pkgs:
283    all_pkgs.add(pkg)
284    all_build_deps = all_build_deps.union(build_deps).difference(all_pkgs)
285    all_dev_deps = all_dev_deps.union(dev_deps).difference(all_pkgs)
286    k += 1
287    print(("{:4d} " + name_format + " {:10d} {:10d} {:14d} {:14d}").format(
288        k, pkg, len(build_deps), len(dev_deps), len(all_build_deps),
289        len(all_dev_deps)))
290    if prev_all_build_deps != all_build_deps:
291      echo_all_deps(args, "all_build_deps", all_build_deps)
292      prev_all_build_deps = all_build_deps
293    if prev_all_dev_deps != all_dev_deps:
294      echo_all_deps(args, "all_dev_deps", all_dev_deps)
295      prev_all_dev_deps = all_dev_deps
296  print("\nNOTE: from all {} package(s):{}".format(
297      len(all_pkgs), set2list(all_pkgs)))
298  for (kind, deps) in [("non-dev-", all_build_deps), ("", all_dev_deps)]:
299    if deps:
300      print("NOTE: found {:3d} other {}dependent package(s):{}".format(
301          len(deps), kind, set2list(deps)))
302
303
304def crates_io_find_pkgs(args, pkgs, found_pkgs):
305  """Call crates.io api to find direct dependent packages."""
306  success = True
307  for pkg in sorted(pkgs):
308    ok, build_deps, dev_deps = get_crate_dependencies(args, pkg)
309    if not ok:
310      success = False
311    else:
312      found_pkgs.append((pkg, build_deps, dev_deps))
313  return success
314
315
316def add_non_dev_dependencies(args, all_deps, core_pkgs, visited, pkg):
317  """Add reachable non-dev dependencies to all_deps[pkg]'s dependencies."""
318  if pkg not in all_deps:
319    ok, build_deps, dev_deps = get_crate_dependencies(args, pkg)
320    if not ok:
321      return set()
322    all_deps[pkg] = (pkg, build_deps, dev_deps)
323  else:
324    (_, build_deps, dev_deps) = all_deps[pkg]
325  if pkg in visited:
326    return build_deps
327  visited.add(pkg)
328
329  for p in sorted(build_deps):
330    if p not in core_pkgs and pkg in core_pkgs:
331      core_pkgs.add(p)
332    deps = add_non_dev_dependencies(args, all_deps, core_pkgs, visited, p)
333    build_deps = build_deps.union(deps)
334  if pkg in core_pkgs:
335    for p in sorted(dev_deps):
336      deps = add_non_dev_dependencies(args, all_deps, core_pkgs, visited, p)
337      dev_deps = dev_deps.union(deps)
338  all_deps[pkg] = (pkg, build_deps, dev_deps)
339  return build_deps
340
341
342def add_indirect_build_deps(args, found_pkgs):
343  """Add all indirect dependencies and return a new found_pkgs."""
344  all_deps = dict(map(lambda t: (t[0], t), found_pkgs))
345  core_pkgs = set(map(lambda t: t[0], found_pkgs))
346  dev_pkgs = set()
347  dump_pkgs(args, "BEFORE", all_deps, core_pkgs, dev_pkgs)
348  visited = set()
349  for pkg in sorted(core_pkgs):
350    add_non_dev_dependencies(args, all_deps, core_pkgs, visited, pkg)
351  # Revisit core packages to add build_deps into core_pkgs and other deps.
352  check_again = True
353  while check_again:
354    pattern = "Revisiting {} core packages:{}"
355    echo(args, pattern.format(len(core_pkgs), set2list(core_pkgs)))
356    check_again = False
357    visited = set()
358    num_core_pkgs = len(core_pkgs)
359    for pkg in sorted(core_pkgs):
360      (_, build_deps, dev_deps) = all_deps[pkg]
361      (num_build_deps, num_dev_deps) = (len(build_deps), len(dev_deps))
362      add_non_dev_dependencies(args, all_deps, core_pkgs, visited, pkg)
363      (_, build_deps, dev_deps) = all_deps[pkg]
364      (new_num_build_deps, new_num_dev_deps) = (len(build_deps), len(dev_deps))
365      # If build_deps, dev_deps, or core_pkgs was changed, check again.
366      if (num_build_deps != new_num_build_deps or
367          num_dev_deps != new_num_dev_deps or num_core_pkgs != len(core_pkgs)):
368        check_again = True
369  dev_pkgs = visited.difference(core_pkgs)
370  dump_pkgs(args, "AFTER", all_deps, core_pkgs, dev_pkgs)
371  found_pkgs = list(map(lambda p: all_deps[p], core_pkgs))
372  found_pkgs.sort(key=lambda t: t[0])
373  return found_pkgs
374
375
376def echo_dump_found_pkgs(args, msg, all_deps, pkgs):
377  if not args.v or not pkgs:
378    return
379  echo(args, msg)
380  for pkg in sorted(pkgs):
381    (_, build_deps, dev_deps) = all_deps[pkg]
382    for (name, deps) in [("normal", build_deps), ("dev", dev_deps)]:
383      pattern = "  {} has {} " + name + " deps:{}"
384      echo(args, pattern.format(pkg, len(deps), set2list(deps)))
385
386
387def dump_pkgs(args, msg, all_deps, core_pkgs, dev_pkgs):
388  echo_dump_found_pkgs(args, msg + " core_pkgs:", all_deps, core_pkgs)
389  echo_dump_found_pkgs(args, msg + " other_dev_pkgs:", all_deps, dev_pkgs)
390
391
392def show_dependencies(args):
393  """Calls crates.io api to find dependent packages; returns True on success."""
394  all_pkgs = set(map(lambda p: pkg_base_name(p)[0], args.pkgs))
395  if "" in all_pkgs:
396    # TODO(chh): detect and report ill formed names in args.pkgs
397    print("WARNING: skip some ill formatted pkg arguments.")
398    all_pkgs = all_pkgs.remove("")
399  if not all_pkgs:
400    print("ERROR: no valid pkg names to show dependencies.")
401    return False
402  pkgs = sorted(all_pkgs)
403  found_pkgs = []
404  success = crates_io_find_pkgs(args, pkgs, found_pkgs)
405  if not found_pkgs:
406    return False
407  # All normal (non-dev) dependent packages will be added into found_pkgs.
408  found_pkgs = add_indirect_build_deps(args, found_pkgs)
409  show_all_dependencies(args, found_pkgs)
410  return success
411
412
413def main():
414  args = parse_args()
415  if args.show:
416    # only show dependencies, not to fetch any package
417    if not show_dependencies(args):
418      sys.exit(2)
419    return
420  echo(args, "to fetch packages = {}".format(args.pkgs))
421  success = True
422  for pkg in args.pkgs:
423    echo(args, "trying to fetch package '{}'".format(pkg))
424    try:
425      if not fetch_pkg(args, pkg, find_dl_path(args, pkg)):
426        success = False
427    except urllib.error.HTTPError:
428      print("ERROR: unknown package '{}'".format(pkg))
429      success = False
430  if not success:
431    sys.exit(1)
432
433
434if __name__ == "__main__":
435  main()
436