From 9a5cdde850ba5a6185d5524ebf8acc25dfd00762 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Hugo=20H=C3=B6rnquist?= <hugo@lysator.liu.se>
Date: Thu, 30 Jun 2022 01:48:21 +0200
Subject: C parser add basic float support.

---
 module/c/lex.scm   | 34 +++++++++++++++++++++-------------
 module/c/parse.scm | 40 ++++++++++++++++++++++++++++++++++++++--
 2 files changed, 59 insertions(+), 15 deletions(-)

(limited to 'module')

diff --git a/module/c/lex.scm b/module/c/lex.scm
index 2b024f1c..b6523a87 100644
--- a/module/c/lex.scm
+++ b/module/c/lex.scm
@@ -43,22 +43,23 @@
 (define-peg-pattern integer all
   (and (or base-8 base-16 base-10) (? integer-suffix)))
 
-;; (define-peg-pattern float-suffix all
-;;   (* (or "f" "F" "l" "L")))
+(define-peg-pattern float-suffix all
+  (* (or "f" "F" "l" "L")))
 
-;; (define-peg-pattern exponent all
-;;   (and (ignore (or "e" "E")) (? (or "+" "-")) base-10))
+(define-peg-pattern exponent all
+  (and (ignore (or "e" "E")) (? (or "+" "-")) base-10))
 
-;; (define-peg-pattern float all
-;;   (or
-;;    (and base-10 exponent (? float-suffix))
-;;    (and base-10 (ignore ".") (? exponent) (? float-suffix))
-;;    (and (? base-10) (ignore ".") base-10 (? exponent) (? float-suffix))))
+;; Helper patterns for creating named groups in float
+(define-peg-pattern float-integer all base-10)
+(define-peg-pattern float-decimal all base-10)
+
+(define-peg-pattern float all
+  (or (and    float-integer                                exponent  (? float-suffix))
+      (and (? float-integer) (ignore ".") float-decimal (? exponent) (? float-suffix))
+      (and    float-integer  (ignore ".")               (? exponent) (? float-suffix))))
 
 (define-peg-pattern number body
-  (or ; float
-   integer
-   ))
+  (or float integer))
 
 (define-peg-pattern group all
   (and (ignore "(") expr (ignore ")")))
@@ -145,7 +146,14 @@
 
 ;;; main parser
 (define-peg-pattern expr body
-  (+ (and sp (or infix postfix prefix funcall group literal variable)
+  (+ (and sp (or
+              ;; float must be BEFORE infix, otherwise 3.2 is parsed as (infix 3 (operator ".") 2)
+              ;; that however breaks the infix logic, meaning that floating point numbers can't be
+              ;; used in basic arithmetic.
+              ;; TODO remove all implicit order of operations handling in the lexer, and move it to
+              ;; the parser. This should also fix the case of typecasts being applied incorrectly.
+              float
+              infix postfix prefix funcall group literal variable)
           sp)))
 
 
diff --git a/module/c/parse.scm b/module/c/parse.scm
index 15240bc1..09ede544 100644
--- a/module/c/parse.scm
+++ b/module/c/parse.scm
@@ -42,6 +42,11 @@
                   "Invalid integer suffix ~s"
                   (list str) #f)))
 
+(define (parse-float-suffix str)
+  (case (string->symbol str)
+    ((f F) '(float))
+    ((l L) '(long double))))
+
 (define (group-body->type vars)
   (concatenate
    (map
@@ -63,6 +68,29 @@
     (bytevector-u8-set! bv (bytevector-length bv*) 0)
     bv))
 
+(define (parse-float-form float-form)
+  (let ((float-string
+         (fold (lambda (arg str)
+                 (string-append
+                  str
+                  (match arg
+                    (('float-integer ('base-10 n)) n)
+                    (('float-decimal ('base-10 n)) (string-append "." n))
+                    (('exponent "+"  ('base-10 n)) (string-append "e"  n))
+                    (('exponent      ('base-10 n)) (string-append "e"  n))
+                    (('exponent "-"  ('base-10 n)) (string-append "e-" n)))))
+               "" float-form)))
+    ;; exact->inexact is a no-op if we already have an inexact number, but
+    ;; ensures we get an inexact number when we have an exact number (which we
+    ;; can get from the "1." case). Returning an inexact number here is important
+    ;; to avoid arithmetic suprises later.
+    (exact->inexact
+     (or (string->number float-string)
+         (scm-error 'c-parse-error "parse-lexeme-tree"
+                    "Couldn't parse expression as float: ~s"
+                    (list `(float ,@args)) #f)))))
+
+
 (define (parse-lexeme-tree tree)
   (match tree
     ['() '()]
@@ -75,11 +103,19 @@
     [('integer n ('integer-suffix suffix))
      `(as-type
        ,(parse-integer-suffix suffix)
-       ,(parse-lexeme-tree n))
-     ]
+       ,(parse-lexeme-tree n))]
+
     [('integer n)
      (parse-lexeme-tree n)]
 
+
+    [('float args ... ('float-suffix suffix))
+     `(as-type ,(parse-float-suffix suffix)
+               ;; parse rest of float as if it lacked a suffix
+               ,(parse-lexeme-tree `(float ,@args)))]
+
+    [('float args ...) (parse-float-form args)]
+
     ;; Character literals, stored as raw integers
     ;; so mathematical operations keep working on them.
     [('char ('escaped-char ('base-8-char n)))
-- 
cgit v1.2.3