aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorHugo Hörnquist <hugo@lysator.liu.se>2022-06-30 01:48:21 +0200
committerHugo Hörnquist <hugo@lysator.liu.se>2022-07-07 21:17:22 +0200
commit9a5cdde850ba5a6185d5524ebf8acc25dfd00762 (patch)
treef7b3ce6bbce8266cc3fd40f05ef0280990cc7cbe
parentChar parse tests, these were broken by strings. (diff)
downloadcalp-9a5cdde850ba5a6185d5524ebf8acc25dfd00762.tar.gz
calp-9a5cdde850ba5a6185d5524ebf8acc25dfd00762.tar.xz
C parser add basic float support.
-rw-r--r--module/c/lex.scm34
-rw-r--r--module/c/parse.scm40
-rw-r--r--tests/test/cpp.scm87
3 files changed, 136 insertions, 25 deletions
diff --git a/module/c/lex.scm b/module/c/lex.scm
index 2b024f1c..b6523a87 100644
--- a/module/c/lex.scm
+++ b/module/c/lex.scm
@@ -43,22 +43,23 @@
(define-peg-pattern integer all
(and (or base-8 base-16 base-10) (? integer-suffix)))
-;; (define-peg-pattern float-suffix all
-;; (* (or "f" "F" "l" "L")))
+(define-peg-pattern float-suffix all
+ (* (or "f" "F" "l" "L")))
-;; (define-peg-pattern exponent all
-;; (and (ignore (or "e" "E")) (? (or "+" "-")) base-10))
+(define-peg-pattern exponent all
+ (and (ignore (or "e" "E")) (? (or "+" "-")) base-10))
-;; (define-peg-pattern float all
-;; (or
-;; (and base-10 exponent (? float-suffix))
-;; (and base-10 (ignore ".") (? exponent) (? float-suffix))
-;; (and (? base-10) (ignore ".") base-10 (? exponent) (? float-suffix))))
+;; Helper patterns for creating named groups in float
+(define-peg-pattern float-integer all base-10)
+(define-peg-pattern float-decimal all base-10)
+
+(define-peg-pattern float all
+ (or (and float-integer exponent (? float-suffix))
+ (and (? float-integer) (ignore ".") float-decimal (? exponent) (? float-suffix))
+ (and float-integer (ignore ".") (? exponent) (? float-suffix))))
(define-peg-pattern number body
- (or ; float
- integer
- ))
+ (or float integer))
(define-peg-pattern group all
(and (ignore "(") expr (ignore ")")))
@@ -145,7 +146,14 @@
;;; main parser
(define-peg-pattern expr body
- (+ (and sp (or infix postfix prefix funcall group literal variable)
+ (+ (and sp (or
+ ;; float must be BEFORE infix, otherwise 3.2 is parsed as (infix 3 (operator ".") 2)
+ ;; that however breaks the infix logic, meaning that floating point numbers can't be
+ ;; used in basic arithmetic.
+ ;; TODO remove all implicit order of operations handling in the lexer, and move it to
+ ;; the parser. This should also fix the case of typecasts being applied incorrectly.
+ float
+ infix postfix prefix funcall group literal variable)
sp)))
diff --git a/module/c/parse.scm b/module/c/parse.scm
index 15240bc1..09ede544 100644
--- a/module/c/parse.scm
+++ b/module/c/parse.scm
@@ -42,6 +42,11 @@
"Invalid integer suffix ~s"
(list str) #f)))
+(define (parse-float-suffix str)
+ (case (string->symbol str)
+ ((f F) '(float))
+ ((l L) '(long double))))
+
(define (group-body->type vars)
(concatenate
(map
@@ -63,6 +68,29 @@
(bytevector-u8-set! bv (bytevector-length bv*) 0)
bv))
+(define (parse-float-form float-form)
+ (let ((float-string
+ (fold (lambda (arg str)
+ (string-append
+ str
+ (match arg
+ (('float-integer ('base-10 n)) n)
+ (('float-decimal ('base-10 n)) (string-append "." n))
+ (('exponent "+" ('base-10 n)) (string-append "e" n))
+ (('exponent ('base-10 n)) (string-append "e" n))
+ (('exponent "-" ('base-10 n)) (string-append "e-" n)))))
+ "" float-form)))
+ ;; exact->inexact is a no-op if we already have an inexact number, but
+ ;; ensures we get an inexact number when we have an exact number (which we
+ ;; can get from the "1." case). Returning an inexact number here is important
+ ;; to avoid arithmetic suprises later.
+ (exact->inexact
+ (or (string->number float-string)
+ (scm-error 'c-parse-error "parse-lexeme-tree"
+ "Couldn't parse expression as float: ~s"
+ (list `(float ,@args)) #f)))))
+
+
(define (parse-lexeme-tree tree)
(match tree
['() '()]
@@ -75,11 +103,19 @@
[('integer n ('integer-suffix suffix))
`(as-type
,(parse-integer-suffix suffix)
- ,(parse-lexeme-tree n))
- ]
+ ,(parse-lexeme-tree n))]
+
[('integer n)
(parse-lexeme-tree n)]
+
+ [('float args ... ('float-suffix suffix))
+ `(as-type ,(parse-float-suffix suffix)
+ ;; parse rest of float as if it lacked a suffix
+ ,(parse-lexeme-tree `(float ,@args)))]
+
+ [('float args ...) (parse-float-form args)]
+
;; Character literals, stored as raw integers
;; so mathematical operations keep working on them.
[('char ('escaped-char ('base-8-char n)))
diff --git a/tests/test/cpp.scm b/tests/test/cpp.scm
index 2c51f616..32072932 100644
--- a/tests/test/cpp.scm
+++ b/tests/test/cpp.scm
@@ -15,13 +15,16 @@
;; __asm__ always has strings as arguments
(test-skip "__asm__")
-;; not implemented
-(test-skip "Token concatenation")
-;; not implemented
-(test-skip "Floating point numbers")
+
+;; Lexer produces garbage when attempted. Fixing this would also fix cast
+;; operations.
+(test-skip "Float in infix expression")
;; order of operation is wrong, leading to an incorrect result
(test-skip "Cast with operation")
+;; not implemented
+(test-skip "Token concatenation")
+
;; A string follewed by a macro (which expands to a string)
;; should be concatenated. This is however not yet implemented
(test-skip "Implicit concatenation of string and macro")
@@ -278,13 +281,77 @@
(test-equal '0 (run form))))
(test-group "Floating point numbers"
- (let ((form "4.9406564584124654e-324"))
- (test-equal '(float (base-10 "4") (base-10)) (lex form))
- (test-equal '0 (run form)))
- (let ((form "1.7976931348623157e+308"))
- (test-equal '() (lex form))
- (test-equal '0 (run form))))
+ (test-group "Diffent forms"
+ (test-group "No decimal point, exponent, no suffix"
+ (let ((form "10e10"))
+ (test-equal '(float (float-integer (base-10 "10"))
+ (exponent (base-10 "10")))
+ (lex form))
+ (test-equal 10e10 (run form))))
+
+ (test-group "No decimal point, negative exponent"
+ (let ((form "10e-10"))
+ (test-equal '(float (float-integer (base-10 "10"))
+ (exponent "-" (base-10 "10")))
+ (lex form))
+ (test-equal 10e-10 (run form))))
+
+ (test-group "No decimal point, exponent and suffix"
+ (let ((form "10e10L"))
+ (test-equal '(float (float-integer (base-10 "10"))
+ (exponent (base-10 "10"))
+ (float-suffix "L"))
+ (lex form))
+ (test-equal '(as-type (long double) 10e10)
+ (run form))))
+
+ (test-group "Leading period, no exponent or suffix"
+ (let ((form ".1"))
+ (test-equal '(float (float-decimal (base-10 "1"))) (lex form))
+ (test-equal 0.1 (run form))))
+
+ (test-group "Trailing period, no exponent or suffix"
+ (let ((form "1."))
+ (test-equal '(float (float-integer (base-10 "1"))) (lex form))
+ (test-equal 1.0 (run form)))))
+
+
+ (test-group "Negative float"
+ (let ((form "-1.0"))
+ (test-equal '(prefix (prefix-operator "-")
+ (float (float-integer (base-10 "1"))
+ (float-decimal (base-10 "0"))))
+ (lex form))
+ (test-equal '(- 1.0) (run form))))
+
+
+
+ (test-group "Real world examples"
+ (let ((form "4.9406564584124654e-324"))
+ (test-equal '(float (float-integer (base-10 "4"))
+ (float-decimal (base-10 "9406564584124654"))
+ (exponent "-" (base-10 "324")))
+ (lex form))
+ (test-equal 4.9406564584124654e-324 (run form)))
+
+ (let ((form "1.7976931348623157e+308"))
+ (test-equal '(float (float-integer (base-10 "1"))
+ (float-decimal (base-10 "7976931348623157"))
+ (exponent "+" (base-10 "308")))
+ (lex form))
+ (test-equal 1.7976931348623157e+308 (run form))))
+
+ (test-group "Float in infix expression"
+ (test-group "Simple case"
+ (let ((form "1. + .1"))
+ (test-equal '(infix (float (float-integer (base-10 "1")))
+ (operator "+")
+ (float (float-decimal (base-10 "1"))))
+ (lex form))
+ (test-equal '(+ 1.0 0.1) (run form))))
+ ;; (test-group "Complicated case")
+ ))
(test-group "Typecasts"