A fast implementation of the Nix expression language
Revisão | 10999947ad7b4aefc700339a144555ab8035bdf3 (tree) |
---|---|
Hora | 2024-04-27 16:58:10 |
Autor | Corbin <cds@corb...> |
Commiter | Corbin |
regiux: Hack out about half of Nix's grammar.
Okay, maybe more like a third. There's a bunch of gnarly regexes that
probably can be ported directly for the lexer, a bunch of implicit
characters that have to be explicitly catalogued and named, and a lot of
boxes to be written for the parser.
I've used as much metaprogramming as I can here, but historically
RPython doesn't like autogeneration of box classes, so they are
handwritten. Yuck.
@@ -9,35 +9,182 @@ from rply.token import BaseBox | ||
9 | 9 | |
10 | 10 | lg = rply.LexerGenerator() |
11 | 11 | |
12 | +lg.add("ID", "[a-zA-Z\_][a-zA-Z0-9\_\'\-]*") | |
12 | 13 | lg.add("INT", "[0-9]+") |
14 | + | |
15 | +KEYWORDS = "IF THEN ELSE ASSERT WITH LET IN REC INHERIT OR".split() | |
16 | +for kw in KEYWORDS: lg.add(kw, kw.lower()) | |
17 | + | |
18 | +lg.add("ELLIPSIS", "\.\.\.") | |
19 | +lg.add("EQ", "\=\=") | |
20 | +lg.add("NEQ", "\!\=") | |
21 | +lg.add("LEQ", "\<\=") | |
22 | +lg.add("LE", "\<") | |
23 | +lg.add("GEQ", "\>\=") | |
24 | +lg.add("GE", "\>") | |
25 | +lg.add("AND", "\&\&") | |
26 | +lg.add("OR", "\|\|") | |
27 | +lg.add("IMPL", "\-\>") | |
28 | +lg.add("UPDATE", "\/\/") | |
29 | +lg.add("CONCAT", "\+\+") | |
13 | 30 | lg.add("PLUS", "\+") |
31 | +lg.add("MINUS", "-") | |
32 | +lg.add("MUL", "\*") | |
33 | +lg.add("DIV", "\/") | |
34 | +lg.add("NOT", "!") | |
35 | +lg.add("HAS", "\?") | |
14 | 36 | |
15 | -lg.ignore(" +") | |
37 | +lg.add("COLON", ":") | |
38 | +lg.add("OPEN_PAREN", "\(") | |
39 | +lg.add("CLOSE_PAREN", "\)") | |
40 | +lg.add("DOT", "\.") | |
41 | + | |
42 | +lg.ignore("[ \t\r\n]+") | |
43 | +lg.ignore("#[^\r\n]*") | |
44 | +lg.ignore("\/\*([^*]|\*+[^*/])*\*+\/") | |
16 | 45 | |
17 | 46 | lexer = lg.build() |
18 | 47 | |
48 | +class IdBox(BaseBox): | |
49 | + def __init__(self, value): self.value = value | |
50 | + def pretty(self): return self.value | |
51 | + | |
52 | +class AttrPathBox(BaseBox): | |
53 | + def __init__(self, attrs): self.attrs = attrs | |
54 | + def pretty(self): return ".".join([attr.pretty() for attr in self.attrs]) | |
55 | + def getattrs(self): return self.attrs | |
56 | + | |
19 | 57 | class IntBox(BaseBox): |
20 | 58 | def __init__(self, value): self.value = value |
21 | 59 | def pretty(self): return str(self.value) |
22 | 60 | |
23 | -class ExprOpBox(BaseBox): | |
61 | +class ExprUnaryBox(BaseBox): | |
62 | + def __init__(self, expr, op): | |
63 | + self.expr = expr | |
64 | + self.op = op.getstr() | |
65 | + def pretty(self): | |
66 | + return "(%s%s)" % (self.op, self.expr.pretty()) | |
67 | + | |
68 | +class ExprBinaryBox(BaseBox): | |
24 | 69 | def __init__(self, left, right, op): |
25 | 70 | self.left = left |
26 | 71 | self.right = right |
27 | 72 | self.op = op.getstr() |
28 | 73 | def pretty(self): |
29 | - return "%s %s %s" % (self.left.pretty(), self.op, self.right.pretty()) | |
74 | + return "(%s %s %s)" % (self.left.pretty(), self.op, self.right.pretty()) | |
75 | + | |
76 | +class HasBox(BaseBox): | |
77 | + def __init__(self, value, path): | |
78 | + self.value = value | |
79 | + self.path = path | |
80 | + def pretty(self): | |
81 | + return "(%s ? %s)" % (self.value.pretty(), self.path.pretty()) | |
82 | + | |
83 | +class LambdaBox(BaseBox): | |
84 | + def __init__(self, binding, body): | |
85 | + self.binding = binding | |
86 | + self.body = body | |
87 | + def pretty(self): return "%s: %s" % (self.binding.pretty(), self.body.pretty()) | |
88 | + | |
89 | +class AppBox(BaseBox): | |
90 | + def __init__(self, func, arg): | |
91 | + self.func = func | |
92 | + self.arg = arg | |
93 | + def pretty(self): return "(%s) (%s)" % (self.func.pretty(), self.arg.pretty()) | |
94 | + | |
95 | +class IfBox(BaseBox): | |
96 | + def __init__(self, cond, seq, alt): | |
97 | + self.cond = cond | |
98 | + self.seq = seq | |
99 | + self.alt = alt | |
100 | + def pretty(self): | |
101 | + return "if %s then %s else %s" % ( | |
102 | + self.cond.pretty(), self.seq.pretty(), self.alt.pretty()) | |
30 | 103 | |
31 | -pg = rply.ParserGenerator([ | |
32 | - "INT", | |
33 | - "PLUS", | |
104 | +pg = rply.ParserGenerator(KEYWORDS + [ | |
105 | + "ID", "INT", | |
106 | + "AND", "IMPL", "OR", | |
107 | + "EQ", "NEQ", "LE", "GE", "LEQ", "GEQ", "HAS", | |
108 | + "CONCAT", "UPDATE", | |
109 | + "DIV", "MINUS", "MUL", "PLUS", | |
110 | + "NEGATE", "NOT", | |
111 | + "COLON", "OPEN_PAREN", "CLOSE_PAREN", "DOT", | |
112 | +], precedence=[ | |
113 | + ("right", ["IMPL"]), | |
114 | + ("left", ["OR"]), | |
115 | + ("left", ["AND"]), | |
116 | + ("nonassoc", ["EQ", "NEQ"]), | |
117 | + ("nonassoc", ["LE", "GE", "LEQ", "GEQ"]), | |
118 | + ("right", ["UPDATE"]), | |
119 | + ("left", ["NOT"]), | |
120 | + ("left", ["PLUS", "MINUS"]), | |
121 | + ("left", ["MUL", "DIV"]), | |
122 | + ("right", ["CONCAT"]), | |
123 | + ("nonassoc", ["HAS"]), | |
124 | + ("nonassoc", ["NEGATE"]), | |
34 | 125 | ]) |
35 | 126 | |
36 | -@pg.production("expr : expr PLUS expr") | |
37 | -def exprOp(p): return ExprOpBox(p[0], p[2], p[1]) | |
127 | +class ParseError(Exception): | |
128 | + def __init__(self, token): self.token = token | |
38 | 129 | |
39 | -@pg.production("expr : INT") | |
40 | -def exprSimple(p): return IntBox(int(p[0].getstr())) | |
130 | +@pg.error | |
131 | +def parseError(token): raise ParseError(token) | |
132 | + | |
133 | +def precRule(sup, sub): | |
134 | + pg.production("expr%s : expr%s" % (sup, sub))(lambda p: p[0]) | |
135 | +SPINE = "", "_function", "_if", "_op", "_app", "_select", "_simple" | |
136 | +for sup, sub in zip(SPINE, SPINE[1:]): precRule(sup, sub) | |
137 | + | |
138 | +@pg.production("expr_function : ID COLON expr_function") | |
139 | +def exprLambda(p): return LambdaBox(p[0], p[2]) | |
140 | + | |
141 | +@pg.production("expr_if : IF expr THEN expr ELSE expr") | |
142 | +def exprIf(p): return IfBox(p[1], p[3], p[5]) | |
143 | + | |
144 | +@pg.production("expr_op : NEGATE expr_op") | |
145 | +@pg.production("expr_op : NOT expr_op") | |
146 | +def exprUnary(p): return ExprUnaryBox(p[1], p[0]) | |
147 | + | |
148 | +@pg.production("expr_op : expr_op AND expr_op") | |
149 | +@pg.production("expr_op : expr_op CONCAT expr_op") | |
150 | +@pg.production("expr_op : expr_op DIV expr_op") | |
151 | +@pg.production("expr_op : expr_op EQ expr_op") | |
152 | +@pg.production("expr_op : expr_op GE expr_op") | |
153 | +@pg.production("expr_op : expr_op GEQ expr_op") | |
154 | +@pg.production("expr_op : expr_op IMPL expr_op") | |
155 | +@pg.production("expr_op : expr_op LE expr_op") | |
156 | +@pg.production("expr_op : expr_op LEQ expr_op") | |
157 | +@pg.production("expr_op : expr_op MINUS expr_op") | |
158 | +@pg.production("expr_op : expr_op MUL expr_op") | |
159 | +@pg.production("expr_op : expr_op NEQ expr_op") | |
160 | +@pg.production("expr_op : expr_op OR expr_op") | |
161 | +@pg.production("expr_op : expr_op PLUS expr_op") | |
162 | +@pg.production("expr_op : expr_op UPDATE expr_op") | |
163 | +def exprBinary(p): return ExprBinaryBox(p[0], p[2], p[1]) | |
164 | + | |
165 | +@pg.production("expr_op : expr_op HAS attrpath") | |
166 | +def exprHas(p): return HasBox(p[0], p[2]) | |
167 | + | |
168 | +@pg.production("expr_app : expr_app expr_select") | |
169 | +def exprApp(p): return AppBox(p[0], p[1]) | |
170 | + | |
171 | +@pg.production("expr_simple : OPEN_PAREN expr CLOSE_PAREN") | |
172 | +def exprParens(p): return p[1] | |
173 | + | |
174 | +@pg.production("expr_simple : ID") | |
175 | +def exprSimpleId(p): return IdBox(p[0].getstr()) | |
176 | + | |
177 | +@pg.production("expr_simple : INT") | |
178 | +def exprSimpleInt(p): return IntBox(int(p[0].getstr())) | |
179 | + | |
180 | +@pg.production("attrpath : attrpath DOT attr") | |
181 | +def attrpathNil(p): return AttrPathBox(p[0].getattrs() + [p[1]]) | |
182 | + | |
183 | +@pg.production("attrpath : attr") | |
184 | +def attrpathCons(p): return AttrPathBox(p) | |
185 | + | |
186 | +@pg.production("attr : ID") | |
187 | +def attrId(p): return p[0] | |
41 | 188 | |
42 | 189 | parser = pg.build() |
43 | 190 |
@@ -47,10 +194,23 @@ def entryPoint(argv): | ||
47 | 194 | return 1 |
48 | 195 | path = argv[1] |
49 | 196 | with open(path, "rb") as handle: expr = handle.read() |
50 | - ast = parser.parse(lexer.lex(expr)) | |
51 | - print ast.pretty() | |
52 | - return 0 | |
197 | + try: | |
198 | + ast = parser.parse(lexer.lex(expr)) | |
199 | + print "Success" | |
200 | + print "AST:", ast | |
201 | + print ast.pretty() | |
202 | + return 0 | |
203 | + except ParseError as e: | |
204 | + print "Error" | |
205 | + token = e.token | |
206 | + print "Unexpected token:", token.gettokentype(), "(%s)" % token.getstr() | |
207 | + pos = token.getsourcepos() | |
208 | + if pos is None: print "No position information" | |
209 | + else: print "Error at line %d, column %d" % (pos.lineno, pos.colno) | |
210 | + return 1 | |
53 | 211 | |
54 | 212 | def target(*args): return entryPoint, None |
55 | 213 | |
56 | 214 | if __name__ == "__main__": entryPoint(sys.argv) |
215 | + | |
216 | +# 456 + 293 = 749 |