From 9a5cdde850ba5a6185d5524ebf8acc25dfd00762 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Hugo=20H=C3=B6rnquist?= <hugo@lysator.liu.se>
Date: Thu, 30 Jun 2022 01:48:21 +0200
Subject: C parser add basic float support.

---
 module/c/lex.scm   | 34 +++++++++++++--------
 module/c/parse.scm | 40 +++++++++++++++++++++++--
 tests/test/cpp.scm | 87 +++++++++++++++++++++++++++++++++++++++++++++++-------
 3 files changed, 136 insertions(+), 25 deletions(-)

diff --git a/module/c/lex.scm b/module/c/lex.scm
index 2b024f1c..b6523a87 100644
--- a/module/c/lex.scm
+++ b/module/c/lex.scm
@@ -43,22 +43,23 @@
 (define-peg-pattern integer all
   (and (or base-8 base-16 base-10) (? integer-suffix)))
 
-;; (define-peg-pattern float-suffix all
-;;   (* (or "f" "F" "l" "L")))
+(define-peg-pattern float-suffix all
+  (* (or "f" "F" "l" "L")))
 
-;; (define-peg-pattern exponent all
-;;   (and (ignore (or "e" "E")) (? (or "+" "-")) base-10))
+(define-peg-pattern exponent all
+  (and (ignore (or "e" "E")) (? (or "+" "-")) base-10))
 
-;; (define-peg-pattern float all
-;;   (or
-;;    (and base-10 exponent (? float-suffix))
-;;    (and base-10 (ignore ".") (? exponent) (? float-suffix))
-;;    (and (? base-10) (ignore ".") base-10 (? exponent) (? float-suffix))))
+;; Helper patterns for creating named groups in float
+(define-peg-pattern float-integer all base-10)
+(define-peg-pattern float-decimal all base-10)
+
+(define-peg-pattern float all
+  (or (and    float-integer                                exponent  (? float-suffix))
+      (and (? float-integer) (ignore ".") float-decimal (? exponent) (? float-suffix))
+      (and    float-integer  (ignore ".")               (? exponent) (? float-suffix))))
 
 (define-peg-pattern number body
-  (or ; float
-   integer
-   ))
+  (or float integer))
 
 (define-peg-pattern group all
   (and (ignore "(") expr (ignore ")")))
@@ -145,7 +146,14 @@
 
 ;;; main parser
 (define-peg-pattern expr body
-  (+ (and sp (or infix postfix prefix funcall group literal variable)
+  (+ (and sp (or
+              ;; float must be BEFORE infix, otherwise 3.2 is parsed as (infix 3 (operator ".") 2)
+              ;; that however breaks the infix logic, meaning that floating point numbers can't be
+              ;; used in basic arithmetic.
+              ;; TODO remove all implicit order of operations handling in the lexer, and move it to
+              ;; the parser. This should also fix the case of typecasts being applied incorrectly.
+              float
+              infix postfix prefix funcall group literal variable)
           sp)))
 
 
diff --git a/module/c/parse.scm b/module/c/parse.scm
index 15240bc1..09ede544 100644
--- a/module/c/parse.scm
+++ b/module/c/parse.scm
@@ -42,6 +42,11 @@
                   "Invalid integer suffix ~s"
                   (list str) #f)))
 
+(define (parse-float-suffix str)
+  (case (string->symbol str)
+    ((f F) '(float))
+    ((l L) '(long double))))
+
 (define (group-body->type vars)
   (concatenate
    (map
@@ -63,6 +68,29 @@
     (bytevector-u8-set! bv (bytevector-length bv*) 0)
     bv))
 
+(define (parse-float-form float-form)
+  (let ((float-string
+         (fold (lambda (arg str)
+                 (string-append
+                  str
+                  (match arg
+                    (('float-integer ('base-10 n)) n)
+                    (('float-decimal ('base-10 n)) (string-append "." n))
+                    (('exponent "+"  ('base-10 n)) (string-append "e"  n))
+                    (('exponent      ('base-10 n)) (string-append "e"  n))
+                    (('exponent "-"  ('base-10 n)) (string-append "e-" n)))))
+               "" float-form)))
+    ;; exact->inexact is a no-op if we already have an inexact number, but
+    ;; ensures we get an inexact number when we have an exact number (which we
+    ;; can get from the "1." case). Returning an inexact number here is important
+    ;; to avoid arithmetic suprises later.
+    (exact->inexact
+     (or (string->number float-string)
+         (scm-error 'c-parse-error "parse-lexeme-tree"
+                    "Couldn't parse expression as float: ~s"
+                    (list `(float ,@args)) #f)))))
+
+
 (define (parse-lexeme-tree tree)
   (match tree
     ['() '()]
@@ -75,11 +103,19 @@
     [('integer n ('integer-suffix suffix))
      `(as-type
        ,(parse-integer-suffix suffix)
-       ,(parse-lexeme-tree n))
-     ]
+       ,(parse-lexeme-tree n))]
+
     [('integer n)
      (parse-lexeme-tree n)]
 
+
+    [('float args ... ('float-suffix suffix))
+     `(as-type ,(parse-float-suffix suffix)
+               ;; parse rest of float as if it lacked a suffix
+               ,(parse-lexeme-tree `(float ,@args)))]
+
+    [('float args ...) (parse-float-form args)]
+
     ;; Character literals, stored as raw integers
     ;; so mathematical operations keep working on them.
     [('char ('escaped-char ('base-8-char n)))
diff --git a/tests/test/cpp.scm b/tests/test/cpp.scm
index 2c51f616..32072932 100644
--- a/tests/test/cpp.scm
+++ b/tests/test/cpp.scm
@@ -15,13 +15,16 @@
 
 ;; __asm__ always has strings as arguments
 (test-skip "__asm__")
-;; not implemented
-(test-skip "Token concatenation")
-;; not implemented
-(test-skip "Floating point numbers")
+
+;; Lexer produces garbage when attempted. Fixing this would also fix cast
+;; operations.
+(test-skip "Float in infix expression")
 ;; order of operation is wrong, leading to an incorrect result
 (test-skip "Cast with operation")
 
+;; not implemented
+(test-skip "Token concatenation")
+
 ;; A string follewed by a macro (which expands to a string)
 ;; should be concatenated. This is however not yet implemented
 (test-skip "Implicit concatenation of string and macro")
@@ -278,13 +281,77 @@
     (test-equal '0 (run form))))
 
 (test-group "Floating point numbers"
-  (let ((form "4.9406564584124654e-324"))
-    (test-equal '(float (base-10 "4") (base-10)) (lex form))
-    (test-equal '0 (run form)))
 
-  (let ((form "1.7976931348623157e+308"))
-    (test-equal '() (lex form))
-    (test-equal '0 (run form))))
+  (test-group "Diffent forms"
+    (test-group "No decimal point, exponent, no suffix"
+      (let ((form "10e10"))
+        (test-equal '(float (float-integer (base-10 "10"))
+                            (exponent (base-10 "10")))
+          (lex form))
+        (test-equal 10e10 (run form))))
+
+    (test-group "No decimal point, negative exponent"
+      (let ((form "10e-10"))
+        (test-equal '(float (float-integer (base-10 "10"))
+                            (exponent "-" (base-10 "10")))
+          (lex form))
+        (test-equal 10e-10 (run form))))
+
+    (test-group "No decimal point, exponent and suffix"
+      (let ((form "10e10L"))
+        (test-equal '(float (float-integer (base-10 "10"))
+                            (exponent (base-10 "10"))
+                            (float-suffix "L"))
+          (lex form))
+        (test-equal '(as-type (long double) 10e10)
+          (run form))))
+
+    (test-group "Leading period, no exponent or suffix"
+      (let ((form ".1"))
+        (test-equal '(float (float-decimal (base-10 "1"))) (lex form))
+        (test-equal 0.1 (run form))))
+
+    (test-group "Trailing period, no exponent or suffix"
+      (let ((form "1."))
+        (test-equal '(float (float-integer (base-10 "1"))) (lex form))
+        (test-equal 1.0 (run form)))))
+
+
+  (test-group "Negative float"
+    (let ((form "-1.0"))
+      (test-equal '(prefix (prefix-operator "-")
+                           (float (float-integer (base-10 "1"))
+                                  (float-decimal (base-10 "0"))))
+        (lex form))
+      (test-equal '(- 1.0) (run form))))
+
+
+
+  (test-group "Real world examples"
+    (let ((form "4.9406564584124654e-324"))
+      (test-equal '(float (float-integer (base-10 "4"))
+                          (float-decimal (base-10 "9406564584124654"))
+                          (exponent "-" (base-10 "324")))
+        (lex form))
+      (test-equal 4.9406564584124654e-324 (run form)))
+
+    (let ((form "1.7976931348623157e+308"))
+      (test-equal '(float (float-integer (base-10 "1"))
+                          (float-decimal (base-10 "7976931348623157"))
+                          (exponent "+" (base-10 "308")))
+        (lex form))
+      (test-equal 1.7976931348623157e+308 (run form))))
+
+  (test-group "Float in infix expression"
+    (test-group "Simple case"
+      (let ((form "1. + .1"))
+        (test-equal '(infix (float (float-integer (base-10 "1")))
+                            (operator "+")
+                            (float (float-decimal (base-10 "1"))))
+          (lex form))
+        (test-equal '(+ 1.0 0.1) (run form))))
+    ;; (test-group "Complicated case")
+    ))
 
 (test-group "Typecasts"
 
-- 
cgit v1.2.3