https://github.com/akkartik/mu/blob/main/shell/infix.mu
  1 fn transform-infix x-ah: (addr handle cell), trace: (addr trace) {
  2   trace-text trace, "infix", "transform infix"
  3   trace-lower trace
  4 #?   trace-text trace, "infix", "todo"
  5 #?   draw-text-wrapping-right-then-down-from-cursor-over-full-screen 0/screen, "a:", 2/fg 0/bg
  6 #?   dump-cell-from-cursor-over-full-screen x-ah, 7/fg 0/bg
  7   transform-infix-2 x-ah, trace, 1/at-head-of-list
  8   trace-higher trace
  9 }
 10 
 11 # Break any symbols containing operators down in place into s-expressions
 12 # Transform (... sym op sym ...) greedily in place into (... (op sym sym) ...)
 13 # Lisp code typed in at the keyboard will never have cycles
 14 fn transform-infix-2 _x-ah: (addr handle cell), trace: (addr trace), at-head-of-list?: boolean {
 15   var x-ah/edi: (addr handle cell) <- copy _x-ah
 16   var x/eax: (addr cell) <- lookup *x-ah
 17 +-- 14 lines: # trace x-ah --------------------------------------------------------------------------------------------------------------------------------------------------------------
 31   trace-lower trace
 32 #?   {
 33 #?     var foo/eax: int <- copy x
 34 #?     draw-int32-hex-wrapping-right-then-down-from-cursor-over-full-screen 0/screen, foo, 7/fg 0/bg
 35 #?   }
 36 #?   dump-cell-from-cursor-over-full-screen x-ah, 5/fg 0/bg
 37   # null? return
 38   compare x, 0
 39   {
 40     break-if-!=
 41     trace-higher trace
 42     trace-text trace, "infix", "=> NULL"
 43     return
 44   }
 45   # nil? return
 46   {
 47     var nil?/eax: boolean <- nil? x
 48     compare nil?, 0/false
 49     break-if-=
 50     trace-higher trace
 51     trace-text trace, "infix", "=> nil"
 52     return
 53   }
 54   var x-type/ecx: (addr int) <- get x, type
 55   # symbol? maybe break it down into a pair
 56   {
 57     compare *x-type, 2/symbol
 58     break-if-!=
 59     tokenize-infix x-ah, trace
 60   }
 61   # not a pair? return
 62 #?   draw-text-wrapping-right-then-down-from-cursor-over-full-screen 0/screen, "a", 4/fg 0/bg
 63 #?   draw-int32-decimal-wrapping-right-then-down-from-cursor-over-full-screen 0/screen, *x-type, 5/fg 0/bg
 64   {
 65     compare *x-type, 0/pair
 66     break-if-=
 67     trace-higher trace
 68 +-- 15 lines: # trace "=> " x-ah --------------------------------------------------------------------------------------------------------------------------------------------------------
 83 #?     draw-text-wrapping-right-then-down-from-cursor-over-full-screen 0/screen, "^", 4/fg 0/bg
 84     return
 85   }
 86 #?   draw-text-wrapping-right-then-down-from-cursor-over-full-screen 0/screen, "b", 4/fg 0/bg
 87   # singleton operator? unwrap
 88   {
 89     var first-ah/ecx: (addr handle cell) <- get x, left
 90     {
 91       var first/eax: (addr cell) <- lookup *first-ah
 92       var operator?/eax: boolean <- operator-symbol? first
 93       compare operator?, 0/false
 94     }
 95     break-if-=
 96     var rest-ah/eax: (addr handle cell) <- get x, right
 97     var rest/eax: (addr cell) <- lookup *rest-ah
 98     var rest-nil?/eax: boolean <- nil? rest
 99     compare rest-nil?, 0/false
100     break-if-=
101     copy-object first-ah, x-ah
102     trace-higher trace
103 +-- 15 lines: # trace "=> " x-ah --------------------------------------------------------------------------------------------------------------------------------------------------------
118     return
119   }
120 #?   draw-text-wrapping-right-then-down-from-cursor-over-full-screen 0/screen, "c", 4/fg 0/bg
121   ## non-singleton pair
122   # try to "pinch out" (op expr op ...) into ((op expr) op ...)
123   # (op expr expr ...) => operator in prefix position; do nothing
124   {
125     compare at-head-of-list?, 0/false
126     break-if-=
127     var first-ah/ecx: (addr handle cell) <- get x, left
128     var rest-ah/esi: (addr handle cell) <- get x, right
129     var first/eax: (addr cell) <- lookup *first-ah
130     var first-operator?/eax: boolean <- operator-symbol? first
131     compare first-operator?, 0/false
132     break-if-=
133     var rest/eax: (addr cell) <- lookup *rest-ah
134     {
135       var continue?/eax: boolean <- not-null-not-nil-pair? rest
136       compare continue?, 0/false
137     }
138     break-if-=
139     var second-ah/edx: (addr handle cell) <- get rest, left
140     rest-ah <- get rest, right
141     var rest/eax: (addr cell) <- lookup *rest-ah
142     {
143       var continue?/eax: boolean <- not-null-not-nil-pair? rest
144       compare continue?, 0/false
145     }
146     break-if-=
147     var third-ah/ebx: (addr handle cell) <- get rest, left
148     {
149       var third/eax: (addr cell) <- lookup *third-ah
150       var third-is-operator?/eax: boolean <- operator-symbol? third
151       compare third-is-operator?, 0/false
152     }
153     break-if-=
154     # if first and third are operators, bud out first two
155     var saved-rest-h: (handle cell)
156     var saved-rest-ah/eax: (addr handle cell) <- address saved-rest-h
157     copy-object rest-ah, saved-rest-ah
158     nil rest-ah
159     var result-h: (handle cell)
160     var result-ah/eax: (addr handle cell) <- address result-h
161     new-pair result-ah, *x-ah, saved-rest-h
162     # save
163     copy-object result-ah, x-ah
164     # there was a mutation; rerun
165     transform-infix-2 x-ah, trace, 1/at-head-of-list
166   }
167   # try to "pinch out" (... expr op expr ...) pattern
168   $transform-infix-2:pinch: {
169     # scan past first three elements
170     var first-ah/ecx: (addr handle cell) <- get x, left
171     var rest-ah/esi: (addr handle cell) <- get x, right
172     {
173       var quote-or-unquote?/eax: boolean <- quote-or-unquote? first-ah
174       compare quote-or-unquote?, 0/false
175     }
176     break-if-!=
177     var rest/eax: (addr cell) <- lookup *rest-ah
178     {
179       var continue?/eax: boolean <- not-null-not-nil-pair? rest
180       compare continue?, 0/false
181     }
182     break-if-=
183 #?     draw-text-wrapping-right-then-down-from-cursor-over-full-screen 0/screen, "d", 4/fg 0/bg
184 #?     dump-cell-from-cursor-over-full-screen rest-ah, 7/fg 0/bg
185     var second-ah/edx: (addr handle cell) <- get rest, left
186     rest-ah <- get rest, right
187     var rest/eax: (addr cell) <- lookup *rest-ah
188     {
189       var continue?/eax: boolean <- not-null-not-nil-pair? rest
190       compare continue?, 0/false
191     }
192     break-if-=
193 #?     draw-text-wrapping-right-then-down-from-cursor-over-full-screen 0/screen, "e", 4/fg 0/bg
194     var third-ah/ebx: (addr handle cell) <- get rest, left
195     rest-ah <- get rest, right
196     # if second is not an operator, break
197     {
198       var second/eax: (addr cell) <- lookup *second-ah
199       var infix?/eax: boolean <- operator-symbol? second
200       compare infix?, 0/false
201     }
202     break-if-=
203 #?     draw-text-wrapping-right-then-down-from-cursor-over-full-screen 0/screen, "f", 4/fg 0/bg
204     # swap the top 2
205     swap-cells first-ah, second-ah
206     ## if we're at the head of the list and there's just three elements, stop there
207     {
208       compare at-head-of-list?, 0/false
209       break-if-=
210       rest <- lookup *rest-ah
211       var rest-nil?/eax: boolean <- nil? rest
212       compare rest-nil?, 0/false
213       break-if-!= $transform-infix-2:pinch
214     }
215     ## otherwise perform a more complex 'rotation'
216 #?     draw-text-wrapping-right-then-down-from-cursor-over-full-screen 0/screen, "g", 4/fg 0/bg
217     # save and clear third->right
218     var saved-rest-h: (handle cell)
219     var saved-rest-ah/eax: (addr handle cell) <- address saved-rest-h
220     copy-object rest-ah, saved-rest-ah
221     nil rest-ah
222     # create new-node out of first..third and rest
223     var result-h: (handle cell)
224     var result-ah/eax: (addr handle cell) <- address result-h
225     new-pair result-ah, *x-ah, saved-rest-h
226     # save
227     copy-object result-ah, x-ah
228     # there was a mutation; rerun
229     transform-infix-2 x-ah, trace, 1/at-head-of-list
230     return
231   }
232   # recurse
233 #?   dump-cell-from-cursor-over-full-screen x-ah, 1/fg 0/bg
234   var left-ah/ecx: (addr handle cell) <- get x, left
235 #?   draw-text-wrapping-right-then-down-from-cursor-over-full-screen 0/screen, "x", 1/fg 0/bg
236 #?   dump-cell-from-cursor-over-full-screen left-ah, 2/fg 0/bg
237   transform-infix-2 left-ah, trace, 1/at-head-of-list
238   var right-ah/edx: (addr handle cell) <- get x, right
239 #?   draw-text-wrapping-right-then-down-from-cursor-over-full-screen 0/screen, "y", 1/fg 0/bg
240 #?   dump-cell-from-cursor-over-full-screen right-ah, 3/fg 0/bg
241   var right-at-head-of-list?/eax: boolean <- copy at-head-of-list?
242   {
243     compare right-at-head-of-list?, 0/false
244     break-if-=
245     # if left is a quote or unquote, cdr is still head of list
246     {
247       var left-is-quote-or-unquote?/eax: boolean <- quote-or-unquote? left-ah
248       compare left-is-quote-or-unquote?, 0/false
249     }
250     break-if-!=
251     right-at-head-of-list? <- copy 0/false
252   }
253   transform-infix-2 right-ah, trace, right-at-head-of-list?
254 #?   draw-text-wrapping-right-then-down-from-cursor-over-full-screen 0/screen, "z", 1/fg 0/bg
255   trace-higher trace
256 +-- 15 lines: # trace "=> " x-ah --------------------------------------------------------------------------------------------------------------------------------------------------------
271 }
272 
273 fn not-null-not-nil-pair? _x: (addr cell) -> _/eax: boolean {
274   var x/esi: (addr cell) <- copy _x
275   compare x, 0
276   {
277     break-if-!=
278     return 0/false
279   }
280   var x-type/eax: (addr int) <- get x, type
281   compare *x-type, 0/pair
282   {
283     break-if-=
284     return 0/false
285   }
286   var nil?/eax: boolean <- nil? x
287   compare nil?, 0/false
288   {
289     break-if-=
290     return 0/false
291   }
292   return 1/true
293 }
294 
295 fn swap-cells a-ah: (addr handle cell), b-ah: (addr handle cell) {
296   var tmp-h: (handle cell)
297   var tmp-ah/eax: (addr handle cell) <- address tmp-h
298   copy-object a-ah, tmp-ah
299   copy-object b-ah, a-ah
300   copy-object tmp-ah, b-ah
301 }
302 
303 fn tokenize-infix _sym-ah: (addr handle cell), trace: (addr trace) {
304   var sym-ah/eax: (addr handle cell) <- copy _sym-ah
305   var sym/eax: (addr cell) <- lookup *sym-ah
306   var sym-data-ah/eax: (addr handle stream byte) <- get sym, text-data
307   var _sym-data/eax: (addr stream byte) <- lookup *sym-data-ah
308   var sym-data/esi: (addr stream byte) <- copy _sym-data
309   rewind-stream sym-data
310   # read sym into a gap buffer and insert spaces in a few places
311   var buffer-storage: gap-buffer
312   var buffer/edi: (addr gap-buffer) <- address buffer-storage
313   initialize-gap-buffer buffer, 0x40/max-symbol-size
314   # scan for first non-$
315   var g/eax: code-point-utf8 <- read-code-point-utf8 sym-data
316   add-code-point-utf8-at-gap buffer, g
317   {
318     compare g, 0x24/dollar
319     break-if-!=
320     {
321       var done?/eax: boolean <- stream-empty? sym-data
322       compare done?, 0/false
323       break-if-=
324       return  # symbol is all '$'s; do nothing
325     }
326     g <- read-code-point-utf8 sym-data
327     add-code-point-utf8-at-gap buffer, g
328     loop
329   }
330   var tokenization-needed?: boolean
331   var _operator-so-far?/eax: boolean <- operator-code-point-utf8? g
332   var operator-so-far?/ecx: boolean <- copy _operator-so-far?
333   {
334     var done?/eax: boolean <- stream-empty? sym-data
335     compare done?, 0/false
336     break-if-!=
337     var g/eax: code-point-utf8 <- read-code-point-utf8 sym-data
338     {
339       var curr-operator?/eax: boolean <- operator-code-point-utf8? g
340       compare curr-operator?, operator-so-far?
341       break-if-=
342       # state change; insert a space
343       add-code-point-utf8-at-gap buffer, 0x20/space
344       operator-so-far? <- copy curr-operator?
345       copy-to tokenization-needed?, 1/true
346     }
347     add-code-point-utf8-at-gap buffer, g
348     loop
349   }
350   compare tokenization-needed?, 0/false
351   break-if-=
352 #?   {
353 #?     var dummy1/eax: int <- copy 0
354 #?     var dummy2/ecx: int <- copy 0
355 #?     dummy1, dummy2 <- render-gap-buffer-wrapping-right-then-down 0/screen, buffer, 0x20/xmin 5/ymin, 0x80/xmax 0x30/ymax, 0/no-cursor, 3/fg 0/bg
356 #?     {
357 #?       loop
358 #?     }
359 #?   }
360   # recursively process buffer
361   # this time we're guaranteed we won't enter tokenize-infix
362   read-cell buffer, _sym-ah, trace
363 }
364 
365 fn test-infix {
366   check-infix "abc", "abc", "F - test-infix/regular-symbol"
367   check-infix "-3", "-3", "F - test-infix/negative-integer-literal"
368   check-infix "[a b+c]", "[a b+c]", "F - test-infix/string-literal"
369   check-infix "$", "$", "F - test-infix/dollar-sym"
370   check-infix "$$", "$$", "F - test-infix/dollar-sym-2"
371   check-infix "$a", "$a", "F - test-infix/dollar-var"
372   check-infix "$+", "$+", "F - test-infix/dollar-operator"
373   check-infix "(+)", "+", "F - test-infix/operator-without-args"
374   check-infix "(= (+) 3)", "(= + 3)", "F - test-infix/operator-without-args-2"
375   check-infix "($+)", "$+", "F - test-infix/dollar-operator-without-args"
376   check-infix "',(a + b)", "',(+ a b)", "F - test-infix/nested-quotes"
377   check-infix "',(+)", "',+", "F - test-infix/nested-quotes-2"
378   check-infix "(a + b)", "(+ a b)", "F - test-infix/simple-list"
379   check-infix "(a (+) b)", "(a + b)", "F - test-infix/wrapped-operator"
380   check-infix "(+ a b)", "(+ a b)", "F - test-infix/prefix-operator"
381   check-infix "(a . b)", "(a . b)", "F - test-infix/dot-operator"
382   check-infix "(a b . c)", "(a b . c)", "F - test-infix/dotted-list"
383   check-infix "(+ . b)", "(+ . b)", "F - test-infix/dotted-list-with-operator"
384   check-infix "(+ a)", "(+ a)", "F - test-infix/unary-operator"
385   check-infix "((a + b))", "((+ a b))", "F - test-infix/nested-list"
386   check-infix "(do (a + b))", "(do (+ a b))", "F - test-infix/nested-list-2"
387   check-infix "(a = (a + 1))", "(= a (+ a 1))", "F - test-infix/nested-list-3"
388   check-infix "(a + b + c)", "(+ (+ a b) c)", "F - test-infix/left-associative"
389   check-infix "(f a + b)", "(f (+ a b))", "F - test-infix/higher-precedence-than-call"
390   check-infix "(f a + b c + d)", "(f (+ a b) (+ c d))", "F - test-infix/multiple"
391   check-infix "+a", "(+ a)", "F - test-infix/unary-operator-2"
392   check-infix "(+a)", "((+ a))", "F - test-infix/unary-operator-3"
393   check-infix "-a", "(- a)", "F - test-infix/unary-operator-4"
394   check-infix "a+b", "(+ a b)", "F - test-infix/no-spaces"
395   check-infix "3+1", "(+ 3 1)", "F - test-infix/no-spaces-starting-with-digit"
396   check-infix "',a+b", "',(+ a b)", "F - test-infix/no-spaces-with-nested-quotes"
397   check-infix "$a+b", "(+ $a b)", "F - test-infix/no-spaces-2"
398   check-infix "-a+b", "(+ (- a) b)", "F - test-infix/unary-over-binary"
399   check-infix "~a+b", "(+ (~ a) b)", "F - test-infix/unary-complement"
400   check-infix "(n * n-1)", "(* n (- n 1))", "F - test-infix/no-spaces-over-spaces"
401   check-infix "`(a + b)", "`(+ a b)", "F - test-infix/backquote"
402   check-infix "`(+ a b)", "`(+ a b)", "F - test-infix/backquote-2"
403   check-infix ",@a+b", ",@(+ a b)", "F - test-infix/unquote-splice"
404   check-infix ",@(a + b)", ",@(+ a b)", "F - test-infix/unquote-splice-2"
405 }
406 
407 # helpers
408 
409 # return true if x is composed entirely of operator code-point-utf8s, optionally prefixed with some '$'s
410 # some operator, some non-operator => pre-tokenized symbol; return false
411 # all '$'s => return false
412 fn operator-symbol? _x: (addr cell) -> _/eax: boolean {
413   var x/esi: (addr cell) <- copy _x
414   {
415     var x-type/eax: (addr int) <- get x, type
416     compare *x-type, 2/symbol
417     break-if-=
418     return 0/false
419   }
420   var x-data-ah/eax: (addr handle stream byte) <- get x, text-data
421   var _x-data/eax: (addr stream byte) <- lookup *x-data-ah
422   var x-data/esi: (addr stream byte) <- copy _x-data
423   rewind-stream x-data
424   var g/eax: code-point-utf8 <- read-code-point-utf8 x-data
425   # special case: '$' is reserved for gensyms, and can work with either
426   # operator or non-operator symbols.
427   {
428     compare g, 0x24/dollar
429     break-if-!=
430     {
431       var all-dollars?/eax: boolean <- stream-empty? x-data
432       compare all-dollars?, 0/false
433       break-if-=
434       # '$', '$$', '$$$', etc. are regular symbols
435       return 0/false
436     }
437     g <- read-code-point-utf8 x-data
438     loop
439   }
440   {
441     {
442       var result/eax: boolean <- operator-code-point-utf8? g
443       compare result, 0/false
444       break-if-!=
445       return 0/false
446     }
447     {
448       var done?/eax: boolean <- stream-empty? x-data
449       compare done?, 0/false
450     }
451     break-if-!=
452     g <- read-code-point-utf8 x-data
453     loop
454   }
455   return 1/true
456 }
457 
458 fn operator-code-point-utf8? g: code-point-utf8 -> _/eax: boolean {
459   # '$' is special and can be in either a symbol or operator; here we treat it as a symbol
460   compare g, 0x25/percent
461   {
462     break-if-!=
463     return 1/true
464   }
465   compare g, 0x26/ampersand
466   {
467     break-if-!=
468     return 1/true
469   }
470   compare g, 0x2a/asterisk
471   {
472     break-if-!=
473     return 1/true
474   }
475   compare g, 0x2b/plus
476   {
477     break-if-!=
478     return 1/true
479   }
480   compare g, 0x2d/dash  # '-' not allowed in symbols
481   {
482     break-if-!=
483     return 1/true
484   }
485   compare g, 0x2e/period
486   {
487     break-if-!=
488     return 1/true
489   }
490   compare g, 0x2f/slash
491   {
492     break-if-!=
493     return 1/true
494   }
495   compare g, 0x3a/colon
496   {
497     break-if-!=
498     return 1/true
499   }
500   compare g, 0x3b/semi-colon
501   {
502     break-if-!=
503     return 1/true
504   }
505   compare g, 0x3c/less-than
506   {
507     break-if-!=
508     return 1/true
509   }
510   compare g, 0x3d/equal
511   {
512     break-if-!=
513     return 1/true
514   }
515   compare g, 0x3e/greater-than
516   {
517     break-if-!=
518     return 1/true
519   }
520   # '?' is a symbol char
521   compare g, 0x5c/backslash
522   {
523     break-if-!=
524     return 1/true
525   }
526   compare g, 0x5e/caret
527   {
528     break-if-!=
529     return 1/true
530   }
531   # '_' is a symbol char
532   compare g, 0x7c/vertical-line
533   {
534     break-if-!=
535     return 1/true
536   }
537   compare g, 0x7e/tilde
538   {
539     break-if-!=
540     return 1/true
541   }
542   return 0/false
543 }
544 
545 fn quote-or-unquote? _x-ah: (addr handle cell) -> _/eax: boolean {
546   var x-ah/eax: (addr handle cell) <- copy _x-ah
547   var x/eax: (addr cell) <- lookup *x-ah
548   {
549     var quote?/eax: boolean <- symbol-equal? x, "'"
550     compare quote?, 0/false
551     break-if-=
552     return 1/true
553   }
554   {
555     var backquote?/eax: boolean <- symbol-equal? x, "`"
556     compare backquote?, 0/false
557     break-if-=
558     return 1/true
559   }
560   {
561     var unquote?/eax: boolean <- symbol-equal? x, ","
562     compare unquote?, 0/false
563     break-if-=
564     return 1/true
565   }
566   {
567     var unquote-splice?/eax: boolean <- symbol-equal? x, ",@"
568     compare unquote-splice?, 0/false
569     break-if-=
570     return 1/true
571   }
572   return 0/false
573 }
574 
575 # helpers for tests
576 
577 fn check-infix actual: (addr array byte), expected: (addr array byte), message: (addr array byte) {
578   var trace-storage: trace
579   var trace/edx: (addr trace) <- address trace-storage
580 #?   initialize-trace trace, 1/only-errors, 0x10/capacity, 0/visible
581   initialize-trace trace, 0x10/levels, 0x1000/capacity, 0/visible
582   #
583   var actual-buffer-storage: gap-buffer
584   var actual-buffer/eax: (addr gap-buffer) <- address actual-buffer-storage
585   initialize-gap-buffer-with actual-buffer, actual
586   var actual-tree-h: (handle cell)
587   var actual-tree-ah/esi: (addr handle cell) <- address actual-tree-h
588   read-cell actual-buffer, actual-tree-ah, trace
589 #?   dump-trace-with-label trace, "infix"
590 #?   dump-cell-from-cursor-over-full-screen actual-tree-ah, 7/fg 0/bg
591   var _actual-tree/eax: (addr cell) <- lookup *actual-tree-ah
592   var actual-tree/esi: (addr cell) <- copy _actual-tree
593   #
594   var expected-buffer-storage: gap-buffer
595   var expected-buffer/eax: (addr gap-buffer) <- address expected-buffer-storage
596   initialize-gap-buffer-with expected-buffer, expected
597   var expected-tree-h: (handle cell)
598   var expected-tree-ah/edi: (addr handle cell) <- address expected-tree-h
599   read-without-infix expected-buffer, expected-tree-ah, trace
600   var expected-tree/eax: (addr cell) <- lookup *expected-tree-ah
601   #
602   var match?/eax: boolean <- cell-isomorphic? actual-tree, expected-tree, trace
603   check match?, message
604 }
605 
606 fn read-without-infix in: (addr gap-buffer), out: (addr handle cell), trace: (addr trace) {
607   # eagerly tokenize everything so that the phases are easier to see in the trace
608   var tokens-storage: (stream token 0x400)
609   var tokens/edx: (addr stream token) <- address tokens-storage
610   tokenize in, tokens, trace
611   var error?/eax: boolean <- has-errors? trace
612   compare error?, 0/false
613   {
614     break-if-=
615     dump-trace trace
616     return
617   }
618   # insert more parens based on indentation
619   var parenthesized-tokens-storage: (stream token 0x400)
620   var parenthesized-tokens/ecx: (addr stream token) <- address parenthesized-tokens-storage
621   parenthesize tokens, parenthesized-tokens, trace
622   var error?/eax: boolean <- has-errors? trace
623   compare error?, 0/false
624   {
625     break-if-=
626     dump-trace trace
627     return
628   }
629   parse-input parenthesized-tokens, out, trace
630 }