1#
2# Copyright (C) 2016 The Android Open Source Project
3#
4# Licensed under the Apache License, Version 2.0 (the "License");
5# you may not use this file except in compliance with the License.
6# You may obtain a copy of the License at
7#
8#      http://www.apache.org/licenses/LICENSE-2.0
9#
10# Unless required by applicable law or agreed to in writing, software
11# distributed under the License is distributed on an "AS IS" BASIS,
12# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13# See the License for the specific language governing permissions and
14# limitations under the License.
15#
16"""Parses the contents of a Unix archive file generated using the 'ar' command.
17
18The constructor returns an Archive object, which contains dictionary from
19file name to file content.
20
21
22    Typical usage example:
23
24    archive = Archive(content)
25    archive.Parse()
26"""
27
28import io
29
30class Archive(object):
31    """Archive object parses and stores Unix archive contents.
32
33    Stores the file names and contents as it parses the archive.
34
35    Attributes:
36        files: a dictionary from file name (string) to file content (binary)
37    """
38
39    GLOBAL_SIG = '!<arch>\n'  # Unix global signature
40    STRING_TABLE_ID = '//'
41    STRING_TABLE_TERMINATOR = '/\n'
42    SYM_TABLE_ID = '__.SYMDEF'
43    FILE_ID_LENGTH = 16  # Number of bytes to store file identifier
44    FILE_ID_TERMINATOR = '/'
45    FILE_TIMESTAMP_LENGTH = 12  # Number of bytes to store file mod timestamp
46    OWNER_ID_LENGTH = 6  # Number of bytes to store file owner ID
47    GROUP_ID_LENGTH = 6  # Number of bytes to store file group ID
48    FILE_MODE_LENGTH = 8  # Number of bytes to store file mode
49    CONTENT_SIZE_LENGTH = 10  # Number of bytes to store content size
50    END_TAG = '`\n'  # Header end tag
51
52    def __init__(self, file_content):
53        """Initialize and parse the archive contents.
54
55        Args:
56          file_content: Binary contents of the archive file.
57        """
58
59        self.files = {}
60        self._content = file_content
61        self._cursor = 0
62        self._string_table = dict()
63
64    def ReadBytes(self, n):
65        """Reads n bytes from the content stream.
66
67        Args:
68            n: The integer number of bytes to read.
69
70        Returns:
71            The n-bit string (binary) of data from the content stream.
72
73        Raises:
74            ValueError: invalid file format.
75        """
76        if self._cursor + n > len(self._content):
77            raise ValueError('Invalid file. EOF reached unexpectedly.')
78
79        content = self._content[self._cursor : self._cursor + n]
80        self._cursor += n
81        return content
82
83    def Parse(self):
84        """Verifies the archive header and arses the contents of the archive.
85
86        Raises:
87            ValueError: invalid file format.
88        """
89        # Check global header
90        sig = self.ReadBytes(len(self.GLOBAL_SIG))
91        if sig != self.GLOBAL_SIG:
92            raise ValueError('File is not a valid Unix archive.')
93
94        # Read files in archive
95        while self._cursor < len(self._content):
96            self.ReadFile()
97
98    def ReadFile(self):
99        """Reads a file from the archive content stream.
100
101        Raises:
102            ValueError: invalid file format.
103        """
104        name = self.ReadBytes(self.FILE_ID_LENGTH).strip()
105        self.ReadBytes(self.FILE_TIMESTAMP_LENGTH)
106        self.ReadBytes(self.OWNER_ID_LENGTH)
107        self.ReadBytes(self.GROUP_ID_LENGTH)
108        self.ReadBytes(self.FILE_MODE_LENGTH)
109        size = self.ReadBytes(self.CONTENT_SIZE_LENGTH)
110        content_size = int(size)
111
112        if self.ReadBytes(len(self.END_TAG)) != self.END_TAG:
113            raise ValueError('File is not a valid Unix archive. Missing end tag.')
114
115        content = self.ReadBytes(content_size)
116        if name == self.STRING_TABLE_ID:
117            acc = 0
118            names = content.split(self.STRING_TABLE_TERMINATOR)
119            for string in names:
120                self._string_table[acc] = string
121                acc += len(string) + len(self.STRING_TABLE_TERMINATOR)
122        elif name != self.SYM_TABLE_ID:
123            if name.endswith(self.FILE_ID_TERMINATOR):
124                name = name[:-len(self.FILE_ID_TERMINATOR)]
125            elif name.startswith(self.FILE_ID_TERMINATOR):
126                offset = int(name[len(self.FILE_ID_TERMINATOR):])
127                if offset not in self._string_table:
128                    raise ValueError('Offset %s not in string table.', offset)
129                name = self._string_table[offset]
130            self.files[name] = content
131