1#
2# Copyright (C) 2016 The Android Open Source Project
3#
4# Licensed under the Apache License, Version 2.0 (the "License");
5# you may not use this file except in compliance with the License.
6# You may obtain a copy of the License at
7#
8#      http://www.apache.org/licenses/LICENSE-2.0
9#
10# Unless required by applicable law or agreed to in writing, software
11# distributed under the License is distributed on an "AS IS" BASIS,
12# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13# See the License for the specific language governing permissions and
14# limitations under the License.
15#
16"""Parser for Android's version script information."""
17import logging
18import re
19
20
21ALL_ARCHITECTURES = (
22    'arm',
23    'arm64',
24    'x86',
25    'x86_64',
26)
27
28
29# Arbitrary magic number. We use the same one in api-level.h for this purpose.
30FUTURE_API_LEVEL = 10000
31
32
33def logger():
34    """Return the main logger for this module."""
35    return logging.getLogger(__name__)
36
37
38def get_tags(line):
39    """Returns a list of all tags on this line."""
40    _, _, all_tags = line.strip().partition('#')
41    return [e for e in re.split(r'\s+', all_tags) if e.strip()]
42
43
44def is_api_level_tag(tag):
45    """Returns true if this tag has an API level that may need decoding."""
46    if tag.startswith('introduced='):
47        return True
48    if tag.startswith('introduced-'):
49        return True
50    if tag.startswith('versioned='):
51        return True
52    return False
53
54
55def decode_api_level(api, api_map):
56    """Decodes the API level argument into the API level number.
57
58    For the average case, this just decodes the integer value from the string,
59    but for unreleased APIs we need to translate from the API codename (like
60    "O") to the future API level for that codename.
61    """
62    try:
63        return int(api)
64    except ValueError:
65        pass
66
67    if api == "current":
68        return FUTURE_API_LEVEL
69
70    return api_map[api]
71
72
73def decode_api_level_tags(tags, api_map):
74    """Decodes API level code names in a list of tags.
75
76    Raises:
77        ParseError: An unknown version name was found in a tag.
78    """
79    for idx, tag in enumerate(tags):
80        if not is_api_level_tag(tag):
81            continue
82        name, value = split_tag(tag)
83
84        try:
85            decoded = str(decode_api_level(value, api_map))
86            tags[idx] = '='.join([name, decoded])
87        except KeyError:
88            raise ParseError('Unknown version name in tag: {}'.format(tag))
89    return tags
90
91
92def split_tag(tag):
93    """Returns a key/value tuple of the tag.
94
95    Raises:
96        ValueError: Tag is not a key/value type tag.
97
98    Returns: Tuple of (key, value) of the tag. Both components are strings.
99    """
100    if '=' not in tag:
101        raise ValueError('Not a key/value tag: ' + tag)
102    key, _, value = tag.partition('=')
103    return key, value
104
105
106def get_tag_value(tag):
107    """Returns the value of a key/value tag.
108
109    Raises:
110        ValueError: Tag is not a key/value type tag.
111
112    Returns: Value part of tag as a string.
113    """
114    return split_tag(tag)[1]
115
116
117def version_is_private(version):
118    """Returns True if the version name should be treated as private."""
119    return version.endswith('_PRIVATE') or version.endswith('_PLATFORM')
120
121
122def should_omit_version(version, arch, api, llndk, apex):
123    """Returns True if the version section should be ommitted.
124
125    We want to omit any sections that do not have any symbols we'll have in the
126    stub library. Sections that contain entirely future symbols or only symbols
127    for certain architectures.
128    """
129    if version_is_private(version.name):
130        return True
131    if 'platform-only' in version.tags:
132        return True
133
134    no_llndk_no_apex = ('llndk' not in version.tags
135                        and 'apex' not in version.tags)
136    keep = no_llndk_no_apex or \
137           ('llndk' in version.tags and llndk) or \
138           ('apex' in version.tags and apex)
139    if not keep:
140        return True
141    if not symbol_in_arch(version.tags, arch):
142        return True
143    if not symbol_in_api(version.tags, arch, api):
144        return True
145    return False
146
147
148def should_omit_symbol(symbol, arch, api, llndk, apex):
149    """Returns True if the symbol should be omitted."""
150    no_llndk_no_apex = 'llndk' not in symbol.tags and 'apex' not in symbol.tags
151    keep = no_llndk_no_apex or \
152           ('llndk' in symbol.tags and llndk) or \
153           ('apex' in symbol.tags and apex)
154    if not keep:
155        return True
156    if not symbol_in_arch(symbol.tags, arch):
157        return True
158    if not symbol_in_api(symbol.tags, arch, api):
159        return True
160    return False
161
162
163def symbol_in_arch(tags, arch):
164    """Returns true if the symbol is present for the given architecture."""
165    has_arch_tags = False
166    for tag in tags:
167        if tag == arch:
168            return True
169        if tag in ALL_ARCHITECTURES:
170            has_arch_tags = True
171
172    # If there were no arch tags, the symbol is available for all
173    # architectures. If there were any arch tags, the symbol is only available
174    # for the tagged architectures.
175    return not has_arch_tags
176
177
178def symbol_in_api(tags, arch, api):
179    """Returns true if the symbol is present for the given API level."""
180    introduced_tag = None
181    arch_specific = False
182    for tag in tags:
183        # If there is an arch-specific tag, it should override the common one.
184        if tag.startswith('introduced=') and not arch_specific:
185            introduced_tag = tag
186        elif tag.startswith('introduced-' + arch + '='):
187            introduced_tag = tag
188            arch_specific = True
189        elif tag == 'future':
190            return api == FUTURE_API_LEVEL
191
192    if introduced_tag is None:
193        # We found no "introduced" tags, so the symbol has always been
194        # available.
195        return True
196
197    return api >= int(get_tag_value(introduced_tag))
198
199
200def symbol_versioned_in_api(tags, api):
201    """Returns true if the symbol should be versioned for the given API.
202
203    This models the `versioned=API` tag. This should be a very uncommonly
204    needed tag, and is really only needed to fix versioning mistakes that are
205    already out in the wild.
206
207    For example, some of libc's __aeabi_* functions were originally placed in
208    the private version, but that was incorrect. They are now in LIBC_N, but
209    when building against any version prior to N we need the symbol to be
210    unversioned (otherwise it won't resolve on M where it is private).
211    """
212    for tag in tags:
213        if tag.startswith('versioned='):
214            return api >= int(get_tag_value(tag))
215    # If there is no "versioned" tag, the tag has been versioned for as long as
216    # it was introduced.
217    return True
218
219
220class ParseError(RuntimeError):
221    """An error that occurred while parsing a symbol file."""
222
223
224class MultiplyDefinedSymbolError(RuntimeError):
225    """A symbol name was multiply defined."""
226    def __init__(self, multiply_defined_symbols):
227        super(MultiplyDefinedSymbolError, self).__init__(
228            'Version script contains multiple definitions for: {}'.format(
229                ', '.join(multiply_defined_symbols)))
230        self.multiply_defined_symbols = multiply_defined_symbols
231
232
233class Version:
234    """A version block of a symbol file."""
235    def __init__(self, name, base, tags, symbols):
236        self.name = name
237        self.base = base
238        self.tags = tags
239        self.symbols = symbols
240
241    def __eq__(self, other):
242        if self.name != other.name:
243            return False
244        if self.base != other.base:
245            return False
246        if self.tags != other.tags:
247            return False
248        if self.symbols != other.symbols:
249            return False
250        return True
251
252
253class Symbol:
254    """A symbol definition from a symbol file."""
255    def __init__(self, name, tags):
256        self.name = name
257        self.tags = tags
258
259    def __eq__(self, other):
260        return self.name == other.name and set(self.tags) == set(other.tags)
261
262
263class SymbolFileParser:
264    """Parses NDK symbol files."""
265    def __init__(self, input_file, api_map, arch, api, llndk, apex):
266        self.input_file = input_file
267        self.api_map = api_map
268        self.arch = arch
269        self.api = api
270        self.llndk = llndk
271        self.apex = apex
272        self.current_line = None
273
274    def parse(self):
275        """Parses the symbol file and returns a list of Version objects."""
276        versions = []
277        while self.next_line() != '':
278            if '{' in self.current_line:
279                versions.append(self.parse_version())
280            else:
281                raise ParseError(
282                    'Unexpected contents at top level: ' + self.current_line)
283
284        self.check_no_duplicate_symbols(versions)
285        return versions
286
287    def check_no_duplicate_symbols(self, versions):
288        """Raises errors for multiply defined symbols.
289
290        This situation is the normal case when symbol versioning is actually
291        used, but this script doesn't currently handle that. The error message
292        will be a not necessarily obvious "error: redefition of 'foo'" from
293        stub.c, so it's better for us to catch this situation and raise a
294        better error.
295        """
296        symbol_names = set()
297        multiply_defined_symbols = set()
298        for version in versions:
299            if should_omit_version(version, self.arch, self.api, self.llndk,
300                                   self.apex):
301                continue
302
303            for symbol in version.symbols:
304                if should_omit_symbol(symbol, self.arch, self.api, self.llndk,
305                                      self.apex):
306                    continue
307
308                if symbol.name in symbol_names:
309                    multiply_defined_symbols.add(symbol.name)
310                symbol_names.add(symbol.name)
311        if multiply_defined_symbols:
312            raise MultiplyDefinedSymbolError(
313                sorted(list(multiply_defined_symbols)))
314
315    def parse_version(self):
316        """Parses a single version section and returns a Version object."""
317        name = self.current_line.split('{')[0].strip()
318        tags = get_tags(self.current_line)
319        tags = decode_api_level_tags(tags, self.api_map)
320        symbols = []
321        global_scope = True
322        cpp_symbols = False
323        while self.next_line() != '':
324            if '}' in self.current_line:
325                # Line is something like '} BASE; # tags'. Both base and tags
326                # are optional here.
327                base = self.current_line.partition('}')[2]
328                base = base.partition('#')[0].strip()
329                if not base.endswith(';'):
330                    raise ParseError(
331                        'Unterminated version/export "C++" block (expected ;).')
332                if cpp_symbols:
333                    cpp_symbols = False
334                else:
335                    base = base.rstrip(';').rstrip()
336                    if base == '':
337                        base = None
338                    return Version(name, base, tags, symbols)
339            elif 'extern "C++" {' in self.current_line:
340                cpp_symbols = True
341            elif not cpp_symbols and ':' in self.current_line:
342                visibility = self.current_line.split(':')[0].strip()
343                if visibility == 'local':
344                    global_scope = False
345                elif visibility == 'global':
346                    global_scope = True
347                else:
348                    raise ParseError('Unknown visiblity label: ' + visibility)
349            elif global_scope and not cpp_symbols:
350                symbols.append(self.parse_symbol())
351            else:
352                # We're in a hidden scope or in 'extern "C++"' block. Ignore
353                # everything.
354                pass
355        raise ParseError('Unexpected EOF in version block.')
356
357    def parse_symbol(self):
358        """Parses a single symbol line and returns a Symbol object."""
359        if ';' not in self.current_line:
360            raise ParseError(
361                'Expected ; to terminate symbol: ' + self.current_line)
362        if '*' in self.current_line:
363            raise ParseError(
364                'Wildcard global symbols are not permitted.')
365        # Line is now in the format "<symbol-name>; # tags"
366        name, _, _ = self.current_line.strip().partition(';')
367        tags = get_tags(self.current_line)
368        tags = decode_api_level_tags(tags, self.api_map)
369        return Symbol(name, tags)
370
371    def next_line(self):
372        """Returns the next non-empty non-comment line.
373
374        A return value of '' indicates EOF.
375        """
376        line = self.input_file.readline()
377        while line.strip() == '' or line.strip().startswith('#'):
378            line = self.input_file.readline()
379
380            # We want to skip empty lines, but '' indicates EOF.
381            if line == '':
382                break
383        self.current_line = line
384        return self.current_line
385