1#!/usr/bin/env python
2#
3# Copyright (C) 2017 The Android Open Source Project
4#
5# Licensed under the Apache License, Version 2.0 (the "License");
6# you may not use this file except in compliance with the License.
7# You may obtain a copy of the License at
8#
9#      http://www.apache.org/licenses/LICENSE-2.0
10#
11# Unless required by applicable law or agreed to in writing, software
12# distributed under the License is distributed on an "AS IS" BASIS,
13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14# See the License for the specific language governing permissions and
15# limitations under the License.
16
17"""Updates a JSON data file of supported algorithms.
18
19Takes input on stdin a list of provided algorithms as produced by
20ListProviders.java along with a JSON file of the previous set of algorithm
21support and what the current API level is, and produces an updated JSON
22record of algorithm support.
23"""
24
25import argparse
26import collections
27import datetime
28import json
29import re
30import sys
31
32import crypto_docs
33
34SUPPORTED_CATEGORIES = [
35    'AlgorithmParameterGenerator',
36    'AlgorithmParameters',
37    'CertificateFactory',
38    'CertPathBuilder',
39    'CertPathValidator',
40    'CertStore',
41    'Cipher',
42    'KeyAgreement',
43    'KeyFactory',
44    'KeyGenerator',
45    'KeyManagerFactory',
46    'KeyPairGenerator',
47    'KeyStore',
48    'Mac',
49    'MessageDigest',
50    'SecretKeyFactory',
51    'SecureRandom',
52    'Signature',
53    'SSLContext',
54    'SSLEngine.Enabled',
55    'SSLEngine.Supported',
56    'SSLSocket.Enabled',
57    'SSLSocket.Supported',
58    'TrustManagerFactory',
59]
60
61# For these categories, we really want to maintain the casing that was in the
62# original data, so avoid changing it.
63CASE_SENSITIVE_CATEGORIES = [
64    'SSLEngine.Enabled',
65    'SSLEngine.Supported',
66    'SSLSocket.Enabled',
67    'SSLSocket.Supported',
68]
69
70
71find_by_name = crypto_docs.find_by_name
72
73
74def find_by_normalized_name(seq, name):
75    """Returns the first element in seq with the given normalized name."""
76    for item in seq:
77        if normalize_name(item['name']) == name:
78            return item
79    return None
80
81
82def sort_by_name(seq):
83    """Returns a copy of the input sequence sorted by name."""
84    return sorted(seq, key=lambda x: x['name'])
85
86
87def normalize_name(name):
88    """Returns a normalized version of the given algorithm name."""
89    name = name.upper()
90    # BouncyCastle uses X.509 with an alias of X509, Conscrypt does the
91    # reverse.  X.509 is the official name of the standard, so use that.
92    if name == "X509":
93        name = "X.509"
94    # PKCS5PADDING and PKCS7PADDING are the same thing (more accurately, PKCS#5
95    # is a special case of PKCS#7), but providers are inconsistent in their
96    # naming.  Use PKCS5PADDING because that's what our docs have used
97    # historically.
98    if name.endswith("/PKCS7PADDING"):
99        name = name[:-1 * len("/PKCS7PADDING")] + "/PKCS5PADDING"
100    return name
101
102
103def fix_name_caps_for_output(name):
104    """Returns a version of the given algorithm name with capitalization fixed."""
105    # It's important that this must only change the capitalization of the
106    # name, not any of its text, otherwise future runs won't be able to
107    # match this name with the name coming from the device.
108
109    # We current make the following capitalization fixes
110    # DESede (not DESEDE)
111    # FOOwithBAR (not FOOWITHBAR or FOOWithBAR)
112    # Hmac (not HMAC)
113    name = re.sub('WITH', 'with', name, flags=re.I)
114    name = re.sub('DESEDE', 'DESede', name, flags=re.I)
115    name = re.sub('HMAC', 'Hmac', name, flags=re.I)
116    return name
117
118
119def get_current_data(f):
120    """Returns a map of the algorithms in the given input.
121
122    The input file-like object must supply a "BEGIN ALGORITHM LIST" line
123    followed by any number of lines of an algorithm category and algorithm name
124    separated by whitespace followed by a "END ALGORITHM LIST" line.  The
125    input can supply arbitrary values outside of the BEGIN and END lines, it
126    will be ignored.
127
128    The returned algorithms will have their names normalized.
129
130    Returns:
131      A dict of categories to lists of normalized algorithm names and a
132        dict of normalized algorithm names to original algorithm names.
133
134    Raises:
135      EOFError: If either the BEGIN or END sentinel lines are not present.
136      ValueError: If a line between the BEGIN and END sentinel lines is not
137        made up of two identifiers separated by whitespace.
138    """
139    current_data = collections.defaultdict(list)
140    name_dict = {}
141
142    saw_begin = False
143    saw_end = False
144    for line in f.readlines():
145        line = line.strip()
146        if not saw_begin:
147            if line.strip() == 'BEGIN ALGORITHM LIST':
148                saw_begin = True
149            continue
150        if line == 'END ALGORITHM LIST':
151            saw_end = True
152            break
153        category, algorithm = line.split()
154        if category not in SUPPORTED_CATEGORIES:
155            continue
156        normalized_name = normalize_name(algorithm)
157        current_data[category].append(normalized_name)
158        name_dict[normalized_name] = algorithm
159
160    if not saw_begin:
161        raise EOFError(
162            'Reached the end of input without encountering the begin sentinel')
163    if not saw_end:
164        raise EOFError(
165            'Reached the end of input without encountering the end sentinel')
166    return dict(current_data), name_dict
167
168
169def update_data(prev_data, current_data, name_dict, api_level, date):
170    """Returns a copy of prev_data, modified to take into account current_data.
171
172    Updates the algorithm support metadata structure by starting with the
173    information in prev_data and updating it to take into account the algorithms
174    listed in current_data.  Algorithms not present in current_data will still
175    be present in the return value, but their supported_api_levels may be
176    modified to indicate that they are no longer supported.
177
178    Args:
179      prev_data: The data on algorithm support from the previous API level.
180      current_data: The algorithms supported in the current API level, as a map
181        from algorithm category to list of algorithm names.
182      api_level: An integer representing the current API level.
183      date: A datetime object containing the time of update.
184    """
185    new_data = {'categories': []}
186
187    for category in SUPPORTED_CATEGORIES:
188        prev_category = find_by_name(prev_data['categories'], category)
189        if prev_category is None:
190            prev_category = {'name': category, 'algorithms': []}
191        current_category = (
192            current_data[category] if category in current_data else [])
193        new_category = {'name': category, 'algorithms': []}
194        prev_algorithms = [normalize_name(x['name']) for x in prev_category['algorithms']]
195        alg_union = set(prev_algorithms) | set(current_category)
196        for alg in alg_union:
197            prev_alg = find_by_normalized_name(prev_category['algorithms'], alg)
198            if prev_alg is not None:
199                new_algorithm = {'name': prev_alg['name']}
200            elif alg in name_dict:
201                new_algorithm = {'name': name_dict[alg]}
202            else:
203                new_algorithm = {'name': alg}
204            if category not in CASE_SENSITIVE_CATEGORIES:
205                new_algorithm['name'] = fix_name_caps_for_output(new_algorithm['name'])
206            new_level = None
207            if alg in current_category and alg in prev_algorithms:
208                # Both old and new have it, just ensure the API level is right
209                if prev_alg['supported_api_levels'].endswith('+'):
210                    new_level = prev_alg['supported_api_levels']
211                else:
212                    new_level = (prev_alg['supported_api_levels']
213                                 + ',%d+' % api_level)
214            elif alg in prev_algorithms:
215                # Only in the old set, so ensure the API level is marked
216                # as ending
217                if prev_alg['supported_api_levels'].endswith('+'):
218                    # The algorithm is newly missing, so modify the support
219                    # to end at the previous level
220                    new_level = prev_alg['supported_api_levels'][:-1]
221                    if not new_level.endswith(str(api_level - 1)):
222                        new_level += '-%d' % (api_level - 1)
223                else:
224                    new_level = prev_alg['supported_api_levels']
225                new_algorithm['deprecated'] = 'true'
226            else:
227                # Only in the new set, so add it
228                new_level = '%d+' % api_level
229            if alg in prev_algorithms and 'note' in prev_alg:
230                new_algorithm['note'] = prev_alg['note']
231            new_algorithm['supported_api_levels'] = new_level
232            new_category['algorithms'].append(new_algorithm)
233        if new_category['algorithms']:
234            new_category['algorithms'] = sort_by_name(
235                new_category['algorithms'])
236            new_data['categories'].append(new_category)
237    new_data['categories'] = sort_by_name(new_data['categories'])
238    new_data['api_level'] = str(api_level)
239    new_data['last_updated'] = date.strftime('%Y-%m-%d %H:%M:%S UTC')
240
241    return new_data
242
243
244def main():
245    parser = argparse.ArgumentParser(description='Update JSON support file')
246    parser.add_argument('--api_level',
247                        required=True,
248                        type=int,
249                        help='The current API level')
250    parser.add_argument('--rewrite_file',
251                        action='store_true',
252                        help='If specified, rewrite the'
253                             ' input file with the result')
254    parser.add_argument('file',
255                        help='The JSON file to update')
256    args = parser.parse_args()
257
258    prev_data = crypto_docs.load_json(args.file)
259
260    current_data, name_dict = get_current_data(sys.stdin)
261
262    new_data = update_data(prev_data,
263                           current_data,
264                           name_dict,
265                           args.api_level,
266                           datetime.datetime.utcnow())
267
268    if args.rewrite_file:
269        f = open(args.file, 'w')
270        f.write('# This file is autogenerated.'
271                '  See libcore/tools/docs/crypto/README for details.\n')
272        json.dump(
273            new_data, f, indent=2, sort_keys=True, separators=(',', ': '))
274        f.close()
275    else:
276        print json.dumps(
277            new_data, indent=2, sort_keys=True, separators=(',', ': '))
278
279
280if __name__ == '__main__':
281    main()
282