1#!/usr/bin/python3
2""" Generate NeuralNetworks.h or types.hal from a specification file and a template file.
3    See README.md for more details.
4"""
5
6import argparse
7import re
8
9class Reader:
10  """ Simple base class facilitates reading a file.
11      Derived class must implement handle_line() and may implement finish().
12  """
13  def __init__(self, filename):
14    self.filename = filename
15    self.line = None # most recently read line
16    self.lineno = -1 # zero-based
17  def finish(self):
18    """ Called after entire file has been read
19    """
20    pass
21  def handle_line(self):
22    """ Called after each line has been read
23    """
24    assert False
25  def read(self):
26    with open(self.filename) as f:
27      lines = f.readlines()
28    for self.lineno in range(len(lines)):
29      self.line = lines[self.lineno]
30      self.handle_line()
31    self.finish()
32  def context(self):
33    """ Error-reporting aid: Return a string describing the location
34        of the most recently read line
35    """
36    return "line " + str(self.lineno + 1) + " of " + self.filename
37
38class Specification(Reader):
39  """ Reader for specification file
40  """
41
42  # Describes %kind state
43  UNCONDITIONAL = 0   # No %kind in effect
44  CONDITIONAL_OFF = 1 # %kind in effect, lines are to be ignored
45  CONDITIONAL_ON = 2  # %kind in effect, lines are to be processed
46
47  def __init__(self, filename, kind):
48    super(Specification, self).__init__(filename)
49    self.sections = dict() # key is section name, value is array of strings (lines) in the section
50    self.section = None # name of current %section
51    self.defmacro = dict() # key is macro name, value is string (body of macro)
52    self.deflines = dict() # key is definition name, value is array of strings (lines) in the definition
53    self.deflines_key = None # name of current %define-lines
54    self.kind = kind
55    self.kinds = None # remember %define-kinds
56    self.conditional = self.UNCONDITIONAL
57
58  def finish(self):
59    assert self.section is None, "\"%section " + self.section + \
60      "\" not terminated by end of specification file"
61    assert self.deflines_key is None, "\"%define-lines " + self.deflines_key + \
62      "\" not terminated by end of specification file"
63    assert self.conditional is self.UNCONDITIONAL, "%kind not terminated by end of specification file"
64
65  def macro_substitution(self):
66    """ Performs macro substitution on self.line, and returns the result
67    """
68    LINESEARCH = "(%\{)(\S+?)(?=[\s}])\s*(.*?)\s*(\})"
69    BODYSEARCH = "(%\{)(\d+)(\})"
70
71    orig = self.line
72    out = ""
73    match = re.search(LINESEARCH, orig)
74    while match:
75      # lookup macro
76      key = match[2]
77      assert key in self.defmacro, "Missing definition of macro %{" + key + "} at " + self.context()
78
79      # handle macro arguments (read them and substitute for them in the macro body)
80      body_orig = self.defmacro[key]
81      body_out = ""
82      args = []
83      if match[3] != "":
84        args = re.split("\s+", match[3])
85      bodymatch = re.search(BODYSEARCH, body_orig)
86      while bodymatch:
87        argnum = int(bodymatch[2])
88        assert argnum >= 0, "Macro argument number must be positive (at " + self.context() + ")"
89        assert argnum <= len(args), "Macro argument number " + str(argnum) + " exceeds " + \
90          str(len(args)) + " supplied arguments at " + self.context()
91        body_out = body_out + body_orig[:bodymatch.start(1)] + args[int(bodymatch[2]) - 1]
92        body_orig = body_orig[bodymatch.end(3):]
93        bodymatch = re.search(BODYSEARCH, body_orig)
94      body_out = body_out + body_orig
95
96      # perform macro substitution
97      out = out + orig[:match.start(1)] + body_out
98      orig = orig[match.end(4):]
99      match = re.search(LINESEARCH, orig)
100    out = out + orig
101    return out
102
103  def match_kind(self, patterns_string):
104    """ Utility routine for %kind directive: Is self.kind found within patterns_string?"""
105    patterns = re.split("\s+", patterns_string.strip())
106    for pattern in patterns:
107      wildcard_match = re.search("^(.*)\*$", pattern)
108      lowest_version_match = re.search("^(.*)\+$", pattern)
109      if wildcard_match:
110        # A wildcard pattern: Ends in *, so see if it's a prefix of self.kind.
111        if re.search("^" + re.escape(wildcard_match[1]), self.kind):
112          return True
113      elif lowest_version_match:
114        # A lowest version pattern: Ends in + and we check if self.kind is equal
115        # to the kind in the pattern or to any kind which is to the right of the
116        # kind in the pattern in self.kinds.
117        assert lowest_version_match[1] in self.kinds, (
118            "Kind \"" + pattern + "\" at " + self.context() +
119            " wasn't defined in %define-kinds"
120        )
121        lowest_pos = self.kinds.index(pattern[:-1])
122        return self.kind in self.kinds[lowest_pos:]
123      else:
124        # An ordinary pattern: See if it matches self.kind.
125        if not self.kinds is None and not pattern in self.kinds:
126          # TODO: Something similar for the wildcard case above
127          print("WARNING: kind \"" + pattern + "\" at " + self.context() +
128                " would have been rejected by %define-kinds")
129        if pattern == self.kind:
130          return True
131    return False
132
133  def handle_line(self):
134    """ Most of the work occurs here.  Having read a line, we act on it immediately:
135        skip a comment, process a directive, add a line to a section or a to a multiline
136        definition, etc.
137    """
138
139    DIRECTIVES = ["%define", "%define-kinds", "%define-lines", "%/define-lines",
140                  "%else", "%insert-lines", "%kind", "%/kind", "%section",
141                  "%/section"]
142
143    # Common typos: /%directive, \%directive
144    matchbad = re.search("^[/\\\]%(\S*)", self.line)
145    if matchbad and "%/" + matchbad[1] in DIRECTIVES:
146      print("WARNING: Probable misspelled directive at " + self.context())
147
148    # Directive?
149    if re.search("^%", self.line) and not re.search("^%{", self.line):
150      # Check for comment
151      if re.search("^%%", self.line):
152        return
153
154      # Validate directive name
155      match = re.search("^(%\S*)", self.line);
156      directive = match[1]
157      if not directive in DIRECTIVES:
158        assert False, "Unknown directive \"" + directive + "\" on " + self.context()
159
160      # Check for end of multiline macro
161      match = re.search("^%/define-lines\s*(\S*)", self.line)
162      if match:
163        assert match[1] == "", "Malformed directive \"%/define-lines\" on " + self.context()
164        assert not self.deflines_key is None, "%/define-lines with no matching %define-lines on " + \
165          self.context()
166        self.deflines_key = None
167        return
168
169      # Directives are forbidden within multiline macros
170      assert self.deflines_key is None, "Directive is not permitted in definition of \"" + \
171        self.deflines_key + "\" at " + self.context()
172
173      # Check for define (multi line)
174      match = re.search("^%define-lines\s+(\S+)\s*$", self.line)
175      if match:
176        key = match[1]
177        if self.conditional is self.CONDITIONAL_OFF:
178          self.deflines_key = ""
179          return
180        assert not key in self.deflines, "Duplicate definition of \"" + key + "\" on " + self.context()
181        self.deflines[key] = []
182        self.deflines_key = key
183        # Non-directive lines will be added to self.deflines[key] as they are read
184        # until we see %/define-lines
185        return
186
187      # Check for insert
188      match = re.search("^%insert-lines\s+(\S+)\s*$", self.line)
189      if match:
190        assert not self.section is None, "%insert-lines outside %section at " + self.context()
191        key = match[1]
192        assert key in self.deflines, "Missing definition of lines \"" + key + "\" at " + self.context()
193        if self.conditional is self.CONDITIONAL_OFF:
194          return
195        self.sections[self.section].extend(self.deflines[key]);
196        return
197
198      # Check for start of section
199      match = re.search("^%section\s+(\S+)\s*$", self.line)
200      if match:
201        assert self.section is None, "Nested %section is forbidden at " + self.context()
202        assert self.conditional is self.UNCONDITIONAL, "%section within %kind is forbidden at " + self.context()
203        key = match[1]
204        assert not key in self.sections, "Duplicate definition of \"" + key + "\" on " + self.context()
205        self.sections[key] = []
206        self.section = key
207        # Non-directive lines will be added to self.sections[key] as they are read
208        # until we see %/section
209        return
210
211      # Check for end of section
212      if re.search("^%/section\s*$", self.line):
213        assert not self.section is None, "%/section with no matching %section on " + self.context()
214        assert self.conditional is self.UNCONDITIONAL # can't actually happen
215        self.section = None
216        return
217
218      # Check for start of kind
219      match = re.search("^%kind\s+((\S+)(\s+\S+)*)\s*$", self.line)
220      if match:
221        assert self.conditional is self.UNCONDITIONAL, "%kind is nested at " + self.context()
222        patterns = match[1]
223        if self.match_kind(patterns):
224          self.conditional = self.CONDITIONAL_ON
225        else:
226          self.conditional = self.CONDITIONAL_OFF
227        return
228
229      # Check for complement of kind (else)
230      if re.search("^%else\s*$", self.line):
231        assert not self.conditional is self.UNCONDITIONAL, "%else without matching %kind on " + self.context()
232        if self.conditional == self.CONDITIONAL_ON:
233          self.conditional = self.CONDITIONAL_OFF
234        else:
235          assert self.conditional == self.CONDITIONAL_OFF
236          self.conditional = self.CONDITIONAL_ON
237        # Note that we permit
238        #   %kind foo
239        #   abc
240        #   %else
241        #   def
242        #   %else
243        #   ghi
244        #   %/kind
245        # which is equivalent to
246        #   %kind foo
247        #   abc
248        #   ghi
249        #   %else
250        #   def
251        #   %/kind
252        # Probably not very useful, but easier to allow than to forbid.
253        return
254
255      # Check for end of kind
256      if re.search("^%/kind\s*$", self.line):
257        assert not self.conditional is self.UNCONDITIONAL, "%/kind without matching %kind on " + self.context()
258        self.conditional = self.UNCONDITIONAL
259        return
260
261      # Check for kinds definition
262      match = re.search("^%define-kinds\s+(\S.*?)\s*$", self.line)
263      if match:
264        assert self.conditional is self.UNCONDITIONAL, "%define-kinds within %kind is forbidden at " + \
265          self.context()
266        kinds = re.split("\s+", match[1])
267        assert self.kind in kinds, "kind \"" + self.kind + "\" is not listed on " + self.context()
268        assert self.kinds is None, "Second %define-kinds directive at " + self.context()
269        self.kinds = kinds
270        return
271
272      # Check for define
273      match = re.search("^%define\s+(\S+)(.*)$", self.line)
274      if match:
275        if self.conditional is self.CONDITIONAL_OFF:
276          return
277        key = match[1]
278        assert not key in self.defmacro, "Duplicate definition of \"" + key + "\" on " + self.context()
279        tail = match[2]
280        match = re.search("\s(.*)$", tail)
281        if match:
282          self.defmacro[key] = match[1]
283        else:
284          self.defmacro[key] = ""
285        return
286
287      # Malformed directive -- the name matched, but the syntax didn't
288      assert False, "Malformed directive \"" + directive + "\" on " + self.context()
289
290    if self.conditional is self.CONDITIONAL_OFF:
291      pass
292    elif not self.deflines_key is None:
293      self.deflines[self.deflines_key].append(self.macro_substitution())
294    elif self.section is None:
295      # Treat as comment
296      pass
297    else:
298      self.sections[self.section].append(self.macro_substitution())
299
300class Template(Reader):
301  """ Reader for template file
302  """
303
304  def __init__(self, filename, specification):
305    super(Template, self).__init__(filename)
306    self.lines = []
307    self.specification = specification
308
309  def handle_line(self):
310    """ Most of the work occurs here.  Having read a line, we act on it immediately:
311        skip a comment, process a directive, accumulate a line.
312    """
313
314    # Directive?
315    if re.search("^%", self.line):
316      # Check for comment
317      if re.search("^%%", self.line):
318        return
319
320      # Check for insertion
321      match = re.search("^%insert\s+(\S+)\s*$", self.line)
322      if match:
323        key = match[1]
324        assert key in specification.sections, "Unknown section \"" + key + "\" on " + self.context()
325        for line in specification.sections[key]:
326          if re.search("TODO", line, re.IGNORECASE):
327            print("WARNING: \"TODO\" at " + self.context())
328          self.lines.append(line)
329        return
330
331      # Bad directive
332      match = re.search("^(%\S*)", self.line)
333      assert False, "Unknown directive \"" + match[1] + "\" on " + self.context()
334
335    # Literal text
336    if re.search("TODO", self.line, re.IGNORECASE):
337      print("WARNING: \"TODO\" at " + self.context())
338    self.lines.append(self.line)
339
340if __name__ == "__main__":
341  parser = argparse.ArgumentParser(description="Create an output file by inserting sections "
342                                   "from a specification file into a template file")
343  parser.add_argument("-k", "--kind", required=True,
344                      help="token identifying kind of file to generate (per \"kind\" directive)")
345  parser.add_argument("-o", "--output", required=True,
346                      help="path to generated output file")
347  parser.add_argument("-s", "--specification", required=True,
348                      help="path to input specification file")
349  parser.add_argument("-t", "--template", required=True,
350                      help="path to input template file")
351  parser.add_argument("-v", "--verbose", action="store_true")
352  args = parser.parse_args()
353  if args.verbose:
354    print(args)
355
356  # Read the specification
357  specification = Specification(args.specification, args.kind)
358  specification.read()
359  if (args.verbose):
360    print(specification.defmacro)
361    print(specification.deflines)
362
363  # Read the template
364  template = Template(args.template, specification)
365  template.read()
366
367  # Write the output
368  with open(args.output, "w") as f:
369    f.write("".join(["".join(line) for line in template.lines]))
370
371# TODO: Write test cases for malformed specification and template files
372# TODO: Find a cleaner way to handle conditionals (%kind) or nesting in general;
373#       maybe add support for more nesting
374# TODO: Unify section/define-lines, rather than having two kinds of text regions?
375#       Could we take this further and do away with the distinction between a
376#       specification file and a template file, and add a %include directive?
377