A fast implementation of the Nix expression language
Revisão | b90c3acf89fb713f952afc784a3e6a2019e8fa8d (tree) |
---|---|
Hora | 2024-06-08 10:38:44 |
Autor | Corbin <cds@corb...> |
Commiter | Corbin |
regiux: Parse and compile quasiliteral strings.
Requires a RPLY fork, so currently broken.
@@ -28,6 +28,12 @@ class HeapObject(object): | ||
28 | 28 | def unwrapInt(self): |
29 | 29 | raise WrongType("Heap object type %s isn't an integer" % |
30 | 30 | self.__class__.__name__) |
31 | + def unwrapStr(self): | |
32 | + raise WrongType("Heap object type %s isn't a string" % | |
33 | + self.__class__.__name__) | |
34 | + def unwrapList(self): | |
35 | + raise WrongType("Heap object type %s isn't a list" % | |
36 | + self.__class__.__name__) | |
31 | 37 | |
32 | 38 | class HeapTrue(HeapObject): |
33 | 39 | _immutable_ = True |
@@ -51,6 +57,7 @@ class HeapStr(HeapObject): | ||
51 | 57 | _immutable_ = True |
52 | 58 | def __init__(self, s): self.s = s |
53 | 59 | def asStr(self): return self.s |
60 | + def unwrapStr(self): return self.s | |
54 | 61 | |
55 | 62 | # The possibly-cyclic nodes. |
56 | 63 |
@@ -107,6 +114,7 @@ class HeapList(MutableObject): | ||
107 | 114 | for obj in self.objs: obj.evaluate() |
108 | 115 | return self |
109 | 116 | def length(self): return len(self.objs) |
117 | + def unwrapList(self): return self.objs | |
110 | 118 | |
111 | 119 | class HeapSelect(MutableObject): |
112 | 120 | def __init__(self, obj, path): |
@@ -148,6 +156,8 @@ builtins = HeapAttrSet({ | ||
148 | 156 | "mul": HeapBinary(lambda x, y: HeapInt(x.unwrapInt() * y.unwrapInt())), |
149 | 157 | "sub": HeapBinary(lambda x, y: HeapInt(x.unwrapInt() - y.unwrapInt())), |
150 | 158 | "length": HeapAction(lambda obj: HeapInt(obj.length())), |
159 | + "concatStringsSep": HeapBinary(lambda sep, l: | |
160 | + HeapStr(sep.unwrapStr().join([x.unwrapStr() for x in l.unwrapList()]))), | |
151 | 161 | }) |
152 | 162 | |
153 | 163 | defaultScope = { |
@@ -9,6 +9,20 @@ import heap | ||
9 | 9 | |
10 | 10 | lg = rply.LexerGenerator() |
11 | 11 | |
12 | +# Lexer states for quasiliterals. | |
13 | +class LexerState(object): pass | |
14 | +class _StateExpr(LexerState): pass | |
15 | +STATE_EXPR = _StateExpr() | |
16 | +class _StateString(LexerState): pass | |
17 | +STATE_STRING = _StateString() | |
18 | + | |
19 | +STRING_CHAR = "([^\$\"\\\\]|\$[^\{\"\\\\])" | |
20 | + | |
21 | +lg.add("STRING", "\"%s*\"" % STRING_CHAR) | |
22 | +lg.add("STRING_INIT", "\"%s*\$\{" % STRING_CHAR, push=[STATE_STRING]) | |
23 | +lg.add("STRING_PIECE", "\}%s*\$\{" % STRING_CHAR, state=STATE_STRING) | |
24 | +lg.add("STRING_END", "\}%s*\"" % STRING_CHAR, state=STATE_STRING, pop=True) | |
25 | + | |
12 | 26 | KEYWORDS = "IF THEN ELSE ASSERT WITH LET REC INHERIT OR IN".split() |
13 | 27 | for kw in KEYWORDS: lg.add(kw, kw.lower()) |
14 | 28 |
@@ -33,8 +47,8 @@ lg.add("HAS", "\?") | ||
33 | 47 | |
34 | 48 | lg.add("COLON", ":") |
35 | 49 | lg.add("SEMI", ";") |
36 | -lg.add("OPEN_BRACE", "\{") | |
37 | -lg.add("CLOSE_BRACE", "\}") | |
50 | +lg.add("OPEN_BRACE", "\{", push=[STATE_EXPR]) | |
51 | +lg.add("CLOSE_BRACE", "\}", pop=True) | |
38 | 52 | lg.add("OPEN_BRACK", "\[") |
39 | 53 | lg.add("CLOSE_BRACK", "\]") |
40 | 54 | lg.add("OPEN_PAREN", "\(") |
@@ -56,9 +70,6 @@ lg.add("HPATH", "\~(\/{0}+)+\/?".format(PATH_CHAR)) | ||
56 | 70 | lg.add("HPATH_START", "\~\/") |
57 | 71 | lg.add("SPATH", "\<{0}+(\/{0}+)*\>".format(PATH_CHAR)) |
58 | 72 | |
59 | -lg.add("OPEN_QUASI", "\$\{") | |
60 | -lg.add("STRING", "\"([^\$\"\\\\]|\$[^\{\"\\\\])*\"") | |
61 | - | |
62 | 73 | lg.ignore("[ \t\r\n]+") |
63 | 74 | lg.ignore("#[^\r\n]*") |
64 | 75 | lg.ignore("\/\*([^*]|\*+[^*/])*\*+\/") |
@@ -162,6 +173,28 @@ class AppBox(EBox): | ||
162 | 173 | def compile(self, scope): |
163 | 174 | return heap.HeapApp(self.func.compile(scope), self.arg.compile(scope)) |
164 | 175 | |
176 | +class StrQLBox(EBox): | |
177 | + def __init__(self, init, pairs): | |
178 | + self.init = init | |
179 | + self.pairs = pairs | |
180 | + def pretty(self): | |
181 | + rv = [self.init] | |
182 | + for expr, piece in self.pairs: | |
183 | + rv.append("${ ") | |
184 | + rv.append(expr.pretty()) | |
185 | + rv.append(" }") | |
186 | + rv.append(piece) | |
187 | + return '"%s"' % "".join(rv) | |
188 | + def compile(self, scope): | |
189 | + l = [heap.HeapStr(self.init)] | |
190 | + for expr, piece in self.pairs: | |
191 | + l.append(expr.compile(scope)) | |
192 | + l.append(heap.HeapStr(piece)) | |
193 | + return heap.HeapApp( | |
194 | + heap.HeapApp(heap.HeapSelect(scope["builtins"], ["concatStringsSep"]), | |
195 | + heap.HeapStr("")), | |
196 | + heap.HeapList(l)) | |
197 | + | |
165 | 198 | class BindBox(RegiuxBox): |
166 | 199 | "A box for attrset attributes." |
167 | 200 | cls = "attrs" |
@@ -269,8 +302,12 @@ class LambdaBox(BaseBox): | ||
269 | 302 | elif self.params: return "%s: %s" % (self.params.pretty(), body) |
270 | 303 | else: return "_: " + body |
271 | 304 | |
305 | +class IncompleteQL(BaseBox): | |
306 | + def __init__(self, pairs): self.pairs = pairs | |
307 | + | |
272 | 308 | pg = rply.ParserGenerator(KEYWORDS + [ |
273 | 309 | "ID", "INT", "SPATH", "STRING", "URI", |
310 | + "STRING_INIT", "STRING_PIECE", "STRING_END", | |
274 | 311 | "AND", "IMPL", "OR_OP", |
275 | 312 | "EQ", "NEQ", "LE", "GE", "LEQ", "GEQ", "HAS", |
276 | 313 | "CONCAT", "UPDATE", |
@@ -293,6 +330,12 @@ pg = rply.ParserGenerator(KEYWORDS + [ | ||
293 | 330 | ("nonassoc", ["NEGATE"]), |
294 | 331 | ]) |
295 | 332 | |
333 | +def trimBox(b, start, stop): | |
334 | + s = b.getstr() | |
335 | + if stop < 0: stop += len(s) | |
336 | + assert stop >= 0, "Invariant from lexer regex" | |
337 | + return s[start:stop] | |
338 | + | |
296 | 339 | class ParseError(Exception): |
297 | 340 | def __init__(self, token): self.token = token |
298 | 341 |
@@ -369,11 +412,24 @@ def exprSimpleId(p): return VarBox(p[0].getstr()) | ||
369 | 412 | def exprSimpleInt(p): return IntBox(int(p[0].getstr())) |
370 | 413 | |
371 | 414 | @pg.production("expr_simple : STRING") |
372 | -def exprQuoted(p): | |
373 | - s = p[0].getstr() | |
374 | - stop = len(s) - 1 | |
375 | - assert stop >= 1, "Invariant from lexer regex" | |
376 | - return StrBox(s[1:stop]) | |
415 | +def exprQuoted(p): return StrBox(trimBox(p[0], 1, -1)) | |
416 | + | |
417 | +@pg.production("expr_simple : STRING_INIT string_ql") | |
418 | +def stringQL(p): | |
419 | + s = trimBox(p[0], 1, -2) | |
420 | + incomplete = p[1] | |
421 | + assert isinstance(incomplete, IncompleteQL) | |
422 | + return StrQLBox(s, incomplete.pairs) | |
423 | + | |
424 | +@pg.production("string_ql : expr STRING_PIECE string_ql") | |
425 | +def stringQLPiece(p): | |
426 | + incomplete = p[2] | |
427 | + assert isinstance(incomplete, IncompleteQL) | |
428 | + pairs = [(p[0], trimBox(p[1], 1, -2))] + incomplete.pairs | |
429 | + return IncompleteQL(pairs) | |
430 | + | |
431 | +@pg.production("string_ql : expr STRING_END") | |
432 | +def stringQLEnd(p): return IncompleteQL([(p[0], trimBox(p[1], 1, -1))]) | |
377 | 433 | |
378 | 434 | @pg.production("expr_simple : URI") |
379 | 435 | def exprURI(p): return StrBox(p[0].getstr()) |