https://github.com/akkartik/mu/blob/main/shell/parse.mu
  1 fn parse-input tokens: (addr stream cell), out: (addr handle cell), trace: (addr trace) {
  2   rewind-stream tokens
  3   var empty?/eax: boolean <- stream-empty? tokens
  4   compare empty?, 0/false
  5   {
  6     break-if-=
  7     error trace, "nothing to parse"
  8     return
  9   }
 10   var close-paren?/eax: boolean <- copy 0/false
 11   var dummy?/ecx: boolean <- copy 0/false
 12   close-paren?, dummy? <- parse-sexpression tokens, out, trace
 13   {
 14     compare close-paren?, 0/false
 15     break-if-=
 16     error trace, "')' is not a valid expression"
 17     return
 18   }
 19   {
 20     var empty?/eax: boolean <- stream-empty? tokens
 21     compare empty?, 0/false
 22     break-if-!=
 23     error trace, "unexpected tokens at end; only type in a single expression at a time"
 24   }
 25 }
 26 
 27 # return values:
 28 #   unmatched close-paren encountered?
 29 #   dot encountered? (only used internally by recursive calls)
 30 fn parse-sexpression tokens: (addr stream cell), _out: (addr handle cell), trace: (addr trace) -> _/eax: boolean, _/ecx: boolean {
 31   trace-text trace, "parse", "parse"
 32   trace-lower trace
 33   var curr-token-storage: cell
 34   var curr-token/ecx: (addr cell) <- address curr-token-storage
 35   var empty?/eax: boolean <- stream-empty? tokens
 36   compare empty?, 0/false
 37   {
 38     break-if-=
 39     error trace, "end of stream; never found a balancing ')'"
 40     trace-higher trace
 41     return 1/true, 0/false
 42   }
 43   read-from-stream tokens, curr-token
 44   $parse-sexpression:type-check: {
 45     # single quote -> parse as list with a special car
 46     var quote-token?/eax: boolean <- quote-token? curr-token
 47     compare quote-token?, 0/false
 48     {
 49       break-if-=
 50       var out/edi: (addr handle cell) <- copy _out
 51       allocate-pair out
 52       var out-addr/eax: (addr cell) <- lookup *out
 53       var left-ah/edx: (addr handle cell) <- get out-addr, left
 54       new-symbol left-ah, "'"
 55       var right-ah/edx: (addr handle cell) <- get out-addr, right
 56       var close-paren?/eax: boolean <- copy 0/false
 57       var dot?/ecx: boolean <- copy 0/false
 58       close-paren?, dot? <- parse-sexpression tokens, right-ah, trace
 59       trace-higher trace
 60       return close-paren?, dot?
 61     }
 62     # backquote quote -> parse as list with a special car
 63     var backquote-token?/eax: boolean <- backquote-token? curr-token
 64     compare backquote-token?, 0/false
 65     {
 66       break-if-=
 67       var out/edi: (addr handle cell) <- copy _out
 68       allocate-pair out
 69       var out-addr/eax: (addr cell) <- lookup *out
 70       var left-ah/edx: (addr handle cell) <- get out-addr, left
 71       new-symbol left-ah, "`"
 72       var right-ah/edx: (addr handle cell) <- get out-addr, right
 73       var close-paren?/eax: boolean <- copy 0/false
 74       var dot?/ecx: boolean <- copy 0/false
 75       close-paren?, dot? <- parse-sexpression tokens, right-ah, trace
 76       trace-higher trace
 77       return close-paren?, dot?
 78     }
 79     # unquote -> parse as list with a special car
 80     var unquote-token?/eax: boolean <- unquote-token? curr-token
 81     compare unquote-token?, 0/false
 82     {
 83       break-if-=
 84       var out/edi: (addr handle cell) <- copy _out
 85       allocate-pair out
 86       var out-addr/eax: (addr cell) <- lookup *out
 87       var left-ah/edx: (addr handle cell) <- get out-addr, left
 88       new-symbol left-ah, ","
 89       var right-ah/edx: (addr handle cell) <- get out-addr, right
 90       var close-paren?/eax: boolean <- copy 0/false
 91       var dot?/ecx: boolean <- copy 0/false
 92       close-paren?, dot? <- parse-sexpression tokens, right-ah, trace
 93       trace-higher trace
 94       return close-paren?, dot?
 95     }
 96     # unquote-splice -> parse as list with a special car
 97     var unquote-splice-token?/eax: boolean <- unquote-splice-token? curr-token
 98     compare unquote-splice-token?, 0/false
 99     {
100       break-if-=
101       var out/edi: (addr handle cell) <- copy _out
102       allocate-pair out
103       var out-addr/eax: (addr cell) <- lookup *out
104       var left-ah/edx: (addr handle cell) <- get out-addr, left
105       new-symbol left-ah, ",@"
106       var right-ah/edx: (addr handle cell) <- get out-addr, right
107       var close-paren?/eax: boolean <- copy 0/false
108       var dot?/ecx: boolean <- copy 0/false
109       close-paren?, dot? <- parse-sexpression tokens, right-ah, trace
110       trace-higher trace
111       return close-paren?, dot?
112     }
113     # dot -> return
114     var dot?/eax: boolean <- dot-token? curr-token
115     compare dot?, 0/false
116     {
117       break-if-=
118       trace-higher trace
119       return 0/false, 1/true
120     }
121     # not bracket -> parse atom
122     var bracket-token?/eax: boolean <- bracket-token? curr-token
123     compare bracket-token?, 0/false
124     {
125       break-if-!=
126       parse-atom curr-token, _out, trace
127       break $parse-sexpression:type-check
128     }
129     # open paren -> parse list
130     var open-paren?/eax: boolean <- open-paren-token? curr-token
131     compare open-paren?, 0/false
132     {
133       break-if-=
134       var curr/esi: (addr handle cell) <- copy _out
135       allocate-pair curr
136       var curr-addr/eax: (addr cell) <- lookup *curr
137       var left/edx: (addr handle cell) <- get curr-addr, left
138       {
139         var close-paren?/eax: boolean <- copy 0/false
140         var dot?/ecx: boolean <- copy 0/false
141         close-paren?, dot? <- parse-sexpression tokens, left, trace
142         {
143           compare dot?, 0/false
144           break-if-=
145           error trace, "'.' cannot be at the start of a list"
146           return 1/true, dot?
147         }
148         compare close-paren?, 0/false
149         break-if-!=
150         var curr-addr/eax: (addr cell) <- lookup *curr
151         curr <- get curr-addr, right
152         var tmp-storage: (handle cell)
153         var tmp/edx: (addr handle cell) <- address tmp-storage
154         $parse-sexpression:list-loop: {
155           var close-paren?/eax: boolean <- copy 0/false
156           var dot?/ecx: boolean <- copy 0/false
157           close-paren?, dot? <- parse-sexpression tokens, tmp, trace
158           # '.' -> clean up right here and return
159           compare dot?, 0/false
160           {
161             break-if-=
162             parse-dot-tail tokens, curr, trace
163             return 0/false, 0/false
164           }
165           allocate-pair curr
166           # ')' -> return
167           compare close-paren?, 0/false
168           break-if-!=
169           var curr-addr/eax: (addr cell) <- lookup *curr
170           var left/ecx: (addr handle cell) <- get curr-addr, left
171           copy-object tmp, left
172           #
173           curr <- get curr-addr, right
174           loop
175         }
176       }
177       break $parse-sexpression:type-check
178     }
179     # close paren -> return
180     var close-paren?/eax: boolean <- close-paren-token? curr-token
181     compare close-paren?, 0/false
182     {
183       break-if-=
184       trace-higher trace
185       return 1/true, 0/false
186     }
187     # otherwise abort
188     var stream-storage: (stream byte 0x400)
189     var stream/edx: (addr stream byte) <- address stream-storage
190     write stream, "unexpected token "
191     var curr-token-data-ah/eax: (addr handle stream byte) <- get curr-token, text-data
192     var curr-token-data/eax: (addr stream byte) <- lookup *curr-token-data-ah
193     rewind-stream curr-token-data
194     write-stream stream, curr-token-data
195     error-stream trace, stream
196   }
197   trace-higher trace
198   return 0/false, 0/false
199 }
200 
201 fn parse-atom _curr-token: (addr cell), _out: (addr handle cell), trace: (addr trace) {
202   trace-text trace, "parse", "parse atom"
203   var curr-token/ecx: (addr cell) <- copy _curr-token
204   var curr-token-data-ah/eax: (addr handle stream byte) <- get curr-token, text-data
205   var _curr-token-data/eax: (addr stream byte) <- lookup *curr-token-data-ah
206   var curr-token-data/esi: (addr stream byte) <- copy _curr-token-data
207   trace trace, "parse", curr-token-data
208   # number
209   var number-token?/eax: boolean <- number-token? curr-token
210   compare number-token?, 0/false
211   {
212     break-if-=
213     rewind-stream curr-token-data
214     var _val/eax: int <- parse-decimal-int-from-stream curr-token-data
215     var val/ecx: int <- copy _val
216     var val-float/xmm0: float <- convert val
217     allocate-number _out
218     var out/eax: (addr handle cell) <- copy _out
219     var out-addr/eax: (addr cell) <- lookup *out
220     var dest/edi: (addr float) <- get out-addr, number-data
221     copy-to *dest, val-float
222     {
223       {
224         var should-trace?/eax: boolean <- should-trace? trace
225         compare should-trace?, 0/false
226       }
227       break-if-=
228       var stream-storage: (stream byte 0x400)
229       var stream/ecx: (addr stream byte) <- address stream-storage
230       write stream, "=> number "
231       var nested-trace-storage: trace
232       var nested-trace/edi: (addr trace) <- address nested-trace-storage
233       initialize-trace nested-trace, 1/only-errors, 0x10/capacity, 0/visible
234       print-number out-addr, stream, nested-trace
235       trace trace, "parse", stream
236     }
237     return
238   }
239   # default: copy either to a symbol or a stream
240   # stream token -> literal
241   var stream-token?/eax: boolean <- stream-token? curr-token
242   compare stream-token?, 0/false
243   {
244     break-if-=
245     allocate-stream _out
246   }
247   compare stream-token?, 0/false
248   {
249     break-if-!=
250     allocate-symbol _out
251   }
252   # copy token data
253   var out/eax: (addr handle cell) <- copy _out
254   var out-addr/eax: (addr cell) <- lookup *out
255   var curr-token-data-ah/ecx: (addr handle stream byte) <- get curr-token, text-data
256   var dest-ah/edx: (addr handle stream byte) <- get out-addr, text-data
257   copy-object curr-token-data-ah, dest-ah
258   {
259     {
260       var should-trace?/eax: boolean <- should-trace? trace
261       compare should-trace?, 0/false
262     }
263     break-if-=
264     var stream-storage: (stream byte 0x400)
265     var stream/ecx: (addr stream byte) <- address stream-storage
266     write stream, "=> symbol "
267     var nested-trace-storage: trace
268     var nested-trace/edi: (addr trace) <- address nested-trace-storage
269     initialize-trace nested-trace, 1/only-errors, 0x10/capacity, 0/visible
270     print-symbol out-addr, stream, nested-trace
271     trace trace, "parse", stream
272   }
273 }
274 
275 fn parse-dot-tail tokens: (addr stream cell), _out: (addr handle cell), trace: (addr trace) {
276   var out/edi: (addr handle cell) <- copy _out
277   var close-paren?/eax: boolean <- copy 0/false
278   var dot?/ecx: boolean <- copy 0/false
279   close-paren?, dot? <- parse-sexpression tokens, out, trace
280   compare close-paren?, 0/false
281   {
282     break-if-=
283     error trace, "'. )' makes no sense"
284     return
285   }
286   compare dot?, 0/false
287   {
288     break-if-=
289     error trace, "'. .' makes no sense"
290     return
291   }
292   #
293   var dummy: (handle cell)
294   var dummy-ah/edi: (addr handle cell) <- address dummy
295   close-paren?, dot? <- parse-sexpression tokens, dummy-ah, trace
296   compare close-paren?, 0/false
297   {
298     break-if-!=
299     error trace, "cannot have multiple expressions between '.' and ')'"
300     return
301   }
302   compare dot?, 0/false
303   {
304     break-if-=
305     error trace, "cannot have two dots in a single list"
306     return
307   }
308 }