1// Copyright 2017 Google Inc. All rights reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15package parser
16
17import (
18	"errors"
19	"fmt"
20	"io"
21	"sort"
22	"text/scanner"
23)
24
25var errTooManyErrors = errors.New("too many errors")
26
27const maxErrors = 100
28
29type ParseError struct {
30	Err error
31	Pos scanner.Position
32}
33
34func (e *ParseError) Error() string {
35	return fmt.Sprintf("%s: %s", e.Pos, e.Err)
36}
37
38const builtinDollar = "__builtin_dollar"
39
40var builtinDollarName = SimpleMakeString(builtinDollar, NoPos)
41
42func (p *parser) Parse() ([]Node, []error) {
43	defer func() {
44		if r := recover(); r != nil {
45			if r == errTooManyErrors {
46				return
47			}
48			panic(r)
49		}
50	}()
51
52	p.parseLines()
53	p.accept(scanner.EOF)
54	p.nodes = append(p.nodes, p.comments...)
55	sort.Sort(byPosition(p.nodes))
56
57	return p.nodes, p.errors
58}
59
60type parser struct {
61	scanner  scanner.Scanner
62	tok      rune
63	errors   []error
64	comments []Node
65	nodes    []Node
66	lines    []int
67}
68
69func NewParser(filename string, r io.Reader) *parser {
70	p := &parser{}
71	p.lines = []int{0}
72	p.scanner.Init(r)
73	p.scanner.Error = func(sc *scanner.Scanner, msg string) {
74		p.errorf(msg)
75	}
76	p.scanner.Whitespace = 0
77	p.scanner.IsIdentRune = func(ch rune, i int) bool {
78		return ch > 0 && ch != ':' && ch != '#' && ch != '=' && ch != '+' && ch != '$' &&
79			ch != '\\' && ch != '(' && ch != ')' && ch != '{' && ch != '}' && ch != ';' &&
80			ch != '|' && ch != '?' && ch != '\r' && !isWhitespace(ch)
81	}
82	p.scanner.Mode = scanner.ScanIdents
83	p.scanner.Filename = filename
84	p.next()
85	return p
86}
87
88func (p *parser) Unpack(pos Pos) scanner.Position {
89	offset := int(pos)
90	line := sort.Search(len(p.lines), func(i int) bool { return p.lines[i] > offset }) - 1
91	return scanner.Position{
92		Filename: p.scanner.Filename,
93		Line:     line + 1,
94		Column:   offset - p.lines[line] + 1,
95		Offset:   offset,
96	}
97}
98
99func (p *parser) pos() Pos {
100	pos := p.scanner.Position
101	if !pos.IsValid() {
102		pos = p.scanner.Pos()
103	}
104	return Pos(pos.Offset)
105}
106
107func (p *parser) errorf(format string, args ...interface{}) {
108	err := &ParseError{
109		Err: fmt.Errorf(format, args...),
110		Pos: p.scanner.Position,
111	}
112	p.errors = append(p.errors, err)
113	if len(p.errors) >= maxErrors {
114		panic(errTooManyErrors)
115	}
116}
117
118func (p *parser) accept(toks ...rune) bool {
119	for _, tok := range toks {
120		if p.tok != tok {
121			p.errorf("expected %s, found %s", scanner.TokenString(tok),
122				scanner.TokenString(p.tok))
123			return false
124		}
125		p.next()
126	}
127	return true
128}
129
130func (p *parser) next() {
131	if p.tok != scanner.EOF {
132		p.tok = p.scanner.Scan()
133		for p.tok == '\r' {
134			p.tok = p.scanner.Scan()
135		}
136	}
137	if p.tok == '\n' {
138		p.lines = append(p.lines, p.scanner.Position.Offset+1)
139	}
140}
141
142func (p *parser) parseLines() {
143	for {
144		p.ignoreWhitespace()
145
146		if p.parseDirective() {
147			continue
148		}
149
150		ident := p.parseExpression('=', '?', ':', '#', '\n')
151
152		p.ignoreSpaces()
153
154		switch p.tok {
155		case '?':
156			p.accept('?')
157			if p.tok == '=' {
158				p.parseAssignment("?=", nil, ident)
159			} else {
160				p.errorf("expected = after ?")
161			}
162		case '+':
163			p.accept('+')
164			if p.tok == '=' {
165				p.parseAssignment("+=", nil, ident)
166			} else {
167				p.errorf("expected = after +")
168			}
169		case ':':
170			p.accept(':')
171			switch p.tok {
172			case '=':
173				p.parseAssignment(":=", nil, ident)
174			default:
175				p.parseRule(ident)
176			}
177		case '=':
178			p.parseAssignment("=", nil, ident)
179		case '#', '\n', scanner.EOF:
180			ident.TrimRightSpaces()
181			if v, ok := toVariable(ident); ok {
182				p.nodes = append(p.nodes, &v)
183			} else if !ident.Empty() {
184				p.errorf("expected directive, rule, or assignment after ident " + ident.Dump())
185			}
186			switch p.tok {
187			case scanner.EOF:
188				return
189			case '\n':
190				p.accept('\n')
191			case '#':
192				p.parseComment()
193			}
194		default:
195			p.errorf("expected assignment or rule definition, found %s\n",
196				p.scanner.TokenText())
197			return
198		}
199	}
200}
201
202func (p *parser) parseDirective() bool {
203	if p.tok != scanner.Ident || !isDirective(p.scanner.TokenText()) {
204		return false
205	}
206
207	d := p.scanner.TokenText()
208	pos := p.pos()
209	p.accept(scanner.Ident)
210	endPos := NoPos
211
212	expression := SimpleMakeString("", pos)
213
214	switch d {
215	case "endif", "endef", "else":
216		// Nothing
217	case "define":
218		expression, endPos = p.parseDefine()
219	default:
220		p.ignoreSpaces()
221		expression = p.parseExpression()
222	}
223
224	p.nodes = append(p.nodes, &Directive{
225		NamePos: pos,
226		Name:    d,
227		Args:    expression,
228		EndPos:  endPos,
229	})
230	return true
231}
232
233func (p *parser) parseDefine() (*MakeString, Pos) {
234	value := SimpleMakeString("", p.pos())
235
236loop:
237	for {
238		switch p.tok {
239		case scanner.Ident:
240			value.appendString(p.scanner.TokenText())
241			if p.scanner.TokenText() == "endef" {
242				p.accept(scanner.Ident)
243				break loop
244			}
245			p.accept(scanner.Ident)
246		case '\\':
247			p.parseEscape()
248			switch p.tok {
249			case '\n':
250				value.appendString(" ")
251			case scanner.EOF:
252				p.errorf("expected escaped character, found %s",
253					scanner.TokenString(p.tok))
254				break loop
255			default:
256				value.appendString(`\` + string(p.tok))
257			}
258			p.accept(p.tok)
259		//TODO: handle variables inside defines?  result depends if
260		//define is used in make or rule context
261		//case '$':
262		//	variable := p.parseVariable()
263		//	value.appendVariable(variable)
264		case scanner.EOF:
265			p.errorf("unexpected EOF while looking for endef")
266			break loop
267		default:
268			value.appendString(p.scanner.TokenText())
269			p.accept(p.tok)
270		}
271	}
272
273	return value, p.pos()
274}
275
276func (p *parser) parseEscape() {
277	p.scanner.Mode = 0
278	p.accept('\\')
279	p.scanner.Mode = scanner.ScanIdents
280}
281
282func (p *parser) parseExpression(end ...rune) *MakeString {
283	value := SimpleMakeString("", p.pos())
284
285	endParen := false
286	for _, r := range end {
287		if r == ')' {
288			endParen = true
289		}
290	}
291	parens := 0
292
293loop:
294	for {
295		if endParen && parens > 0 && p.tok == ')' {
296			parens--
297			value.appendString(")")
298			p.accept(')')
299			continue
300		}
301
302		for _, r := range end {
303			if p.tok == r {
304				break loop
305			}
306		}
307
308		switch p.tok {
309		case '\n':
310			break loop
311		case scanner.Ident:
312			value.appendString(p.scanner.TokenText())
313			p.accept(scanner.Ident)
314		case '\\':
315			p.parseEscape()
316			switch p.tok {
317			case '\n':
318				value.appendString(" ")
319			case scanner.EOF:
320				p.errorf("expected escaped character, found %s",
321					scanner.TokenString(p.tok))
322				return value
323			default:
324				value.appendString(`\` + string(p.tok))
325			}
326			p.accept(p.tok)
327		case '#':
328			p.parseComment()
329			break loop
330		case '$':
331			var variable Variable
332			variable = p.parseVariable()
333			if variable.Name == builtinDollarName {
334				value.appendString("$")
335			} else {
336				value.appendVariable(variable)
337			}
338		case scanner.EOF:
339			break loop
340		case '(':
341			if endParen {
342				parens++
343			}
344			value.appendString("(")
345			p.accept('(')
346		default:
347			value.appendString(p.scanner.TokenText())
348			p.accept(p.tok)
349		}
350	}
351
352	if parens > 0 {
353		p.errorf("expected closing paren %s", value.Dump())
354	}
355	return value
356}
357
358func (p *parser) parseVariable() Variable {
359	pos := p.pos()
360	p.accept('$')
361	var name *MakeString
362	switch p.tok {
363	case '(':
364		return p.parseBracketedVariable('(', ')', pos)
365	case '{':
366		return p.parseBracketedVariable('{', '}', pos)
367	case '$':
368		name = builtinDollarName
369		p.accept(p.tok)
370	case scanner.EOF:
371		p.errorf("expected variable name, found %s",
372			scanner.TokenString(p.tok))
373	default:
374		name = p.parseExpression(variableNameEndRunes...)
375	}
376
377	return p.nameToVariable(name)
378}
379
380func (p *parser) parseBracketedVariable(start, end rune, pos Pos) Variable {
381	p.accept(start)
382	name := p.parseExpression(end)
383	p.accept(end)
384	return p.nameToVariable(name)
385}
386
387func (p *parser) nameToVariable(name *MakeString) Variable {
388	return Variable{
389		Name: name,
390	}
391}
392
393func (p *parser) parseRule(target *MakeString) {
394	prerequisites, newLine := p.parseRulePrerequisites(target)
395
396	recipe := ""
397	recipePos := p.pos()
398loop:
399	for {
400		if newLine {
401			if p.tok == '\t' {
402				p.accept('\t')
403				newLine = false
404				continue loop
405			} else if p.parseDirective() {
406				newLine = false
407				continue
408			} else {
409				break loop
410			}
411		}
412
413		newLine = false
414		switch p.tok {
415		case '\\':
416			p.parseEscape()
417			recipe += string(p.tok)
418			p.accept(p.tok)
419		case '\n':
420			newLine = true
421			recipe += "\n"
422			p.accept('\n')
423		case scanner.EOF:
424			break loop
425		default:
426			recipe += p.scanner.TokenText()
427			p.accept(p.tok)
428		}
429	}
430
431	if prerequisites != nil {
432		p.nodes = append(p.nodes, &Rule{
433			Target:        target,
434			Prerequisites: prerequisites,
435			Recipe:        recipe,
436			RecipePos:     recipePos,
437		})
438	}
439}
440
441func (p *parser) parseRulePrerequisites(target *MakeString) (*MakeString, bool) {
442	newLine := false
443
444	p.ignoreSpaces()
445
446	prerequisites := p.parseExpression('#', '\n', ';', ':', '=')
447
448	switch p.tok {
449	case '\n':
450		p.accept('\n')
451		newLine = true
452	case '#':
453		p.parseComment()
454		newLine = true
455	case ';':
456		p.accept(';')
457	case ':':
458		p.accept(':')
459		if p.tok == '=' {
460			p.parseAssignment(":=", target, prerequisites)
461			return nil, true
462		} else {
463			more := p.parseExpression('#', '\n', ';')
464			prerequisites.appendMakeString(more)
465		}
466	case '=':
467		p.parseAssignment("=", target, prerequisites)
468		return nil, true
469	case scanner.EOF:
470		// do nothing
471	default:
472		p.errorf("unexpected token %s after rule prerequisites", scanner.TokenString(p.tok))
473	}
474
475	return prerequisites, newLine
476}
477
478func (p *parser) parseComment() {
479	pos := p.pos()
480	p.accept('#')
481	comment := ""
482loop:
483	for {
484		switch p.tok {
485		case '\\':
486			p.parseEscape()
487			if p.tok == '\n' {
488				// Special case: '\' does not "escape" newline in comment (b/127521510)
489				comment += "\\"
490				p.accept(p.tok)
491				break loop
492			}
493			comment += "\\" + p.scanner.TokenText()
494			p.accept(p.tok)
495		case '\n':
496			p.accept('\n')
497			break loop
498		case scanner.EOF:
499			break loop
500		default:
501			comment += p.scanner.TokenText()
502			p.accept(p.tok)
503		}
504	}
505
506	p.comments = append(p.comments, &Comment{
507		CommentPos: pos,
508		Comment:    comment,
509	})
510}
511
512func (p *parser) parseAssignment(t string, target *MakeString, ident *MakeString) {
513	// The value of an assignment is everything including and after the first
514	// non-whitespace character after the = until the end of the logical line,
515	// which may included escaped newlines
516	p.accept('=')
517	value := p.parseExpression()
518	value.TrimLeftSpaces()
519	if ident.EndsWith('+') && t == "=" {
520		ident.TrimRightOne()
521		t = "+="
522	}
523
524	ident.TrimRightSpaces()
525
526	p.nodes = append(p.nodes, &Assignment{
527		Name:   ident,
528		Value:  value,
529		Target: target,
530		Type:   t,
531	})
532}
533
534type androidMkModule struct {
535	assignments map[string]string
536}
537
538type androidMkFile struct {
539	assignments map[string]string
540	modules     []androidMkModule
541	includes    []string
542}
543
544var directives = [...]string{
545	"define",
546	"else",
547	"endef",
548	"endif",
549	"ifdef",
550	"ifeq",
551	"ifndef",
552	"ifneq",
553	"include",
554	"-include",
555}
556
557var functions = [...]string{
558	"abspath",
559	"addprefix",
560	"addsuffix",
561	"basename",
562	"dir",
563	"notdir",
564	"subst",
565	"suffix",
566	"filter",
567	"filter-out",
568	"findstring",
569	"firstword",
570	"flavor",
571	"join",
572	"lastword",
573	"patsubst",
574	"realpath",
575	"shell",
576	"sort",
577	"strip",
578	"wildcard",
579	"word",
580	"wordlist",
581	"words",
582	"origin",
583	"foreach",
584	"call",
585	"info",
586	"error",
587	"warning",
588	"if",
589	"or",
590	"and",
591	"value",
592	"eval",
593	"file",
594}
595
596func init() {
597	sort.Strings(directives[:])
598	sort.Strings(functions[:])
599}
600
601func isDirective(s string) bool {
602	for _, d := range directives {
603		if s == d {
604			return true
605		} else if s < d {
606			return false
607		}
608	}
609	return false
610}
611
612func isFunctionName(s string) bool {
613	for _, f := range functions {
614		if s == f {
615			return true
616		} else if s < f {
617			return false
618		}
619	}
620	return false
621}
622
623func isWhitespace(ch rune) bool {
624	return ch == ' ' || ch == '\t' || ch == '\n'
625}
626
627func isValidVariableRune(ch rune) bool {
628	return ch != scanner.Ident && ch != ':' && ch != '=' && ch != '#'
629}
630
631var whitespaceRunes = []rune{' ', '\t', '\n'}
632var variableNameEndRunes = append([]rune{':', '=', '#', ')', '}'}, whitespaceRunes...)
633
634func (p *parser) ignoreSpaces() int {
635	skipped := 0
636	for p.tok == ' ' || p.tok == '\t' {
637		p.accept(p.tok)
638		skipped++
639	}
640	return skipped
641}
642
643func (p *parser) ignoreWhitespace() {
644	for isWhitespace(p.tok) {
645		p.accept(p.tok)
646	}
647}
648