From 9a5cdde850ba5a6185d5524ebf8acc25dfd00762 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Hugo=20H=C3=B6rnquist?= Date: Thu, 30 Jun 2022 01:48:21 +0200 Subject: C parser add basic float support. --- module/c/lex.scm | 34 +++++++++++++-------- module/c/parse.scm | 40 +++++++++++++++++++++++-- tests/test/cpp.scm | 87 +++++++++++++++++++++++++++++++++++++++++++++++------- 3 files changed, 136 insertions(+), 25 deletions(-) diff --git a/module/c/lex.scm b/module/c/lex.scm index 2b024f1c..b6523a87 100644 --- a/module/c/lex.scm +++ b/module/c/lex.scm @@ -43,22 +43,23 @@ (define-peg-pattern integer all (and (or base-8 base-16 base-10) (? integer-suffix))) -;; (define-peg-pattern float-suffix all -;; (* (or "f" "F" "l" "L"))) +(define-peg-pattern float-suffix all + (* (or "f" "F" "l" "L"))) -;; (define-peg-pattern exponent all -;; (and (ignore (or "e" "E")) (? (or "+" "-")) base-10)) +(define-peg-pattern exponent all + (and (ignore (or "e" "E")) (? (or "+" "-")) base-10)) -;; (define-peg-pattern float all -;; (or -;; (and base-10 exponent (? float-suffix)) -;; (and base-10 (ignore ".") (? exponent) (? float-suffix)) -;; (and (? base-10) (ignore ".") base-10 (? exponent) (? float-suffix)))) +;; Helper patterns for creating named groups in float +(define-peg-pattern float-integer all base-10) +(define-peg-pattern float-decimal all base-10) + +(define-peg-pattern float all + (or (and float-integer exponent (? float-suffix)) + (and (? float-integer) (ignore ".") float-decimal (? exponent) (? float-suffix)) + (and float-integer (ignore ".") (? exponent) (? float-suffix)))) (define-peg-pattern number body - (or ; float - integer - )) + (or float integer)) (define-peg-pattern group all (and (ignore "(") expr (ignore ")"))) @@ -145,7 +146,14 @@ ;;; main parser (define-peg-pattern expr body - (+ (and sp (or infix postfix prefix funcall group literal variable) + (+ (and sp (or + ;; float must be BEFORE infix, otherwise 3.2 is parsed as (infix 3 (operator ".") 2) + ;; that however breaks the infix logic, meaning that floating point numbers can't be + ;; used in basic arithmetic. + ;; TODO remove all implicit order of operations handling in the lexer, and move it to + ;; the parser. This should also fix the case of typecasts being applied incorrectly. + float + infix postfix prefix funcall group literal variable) sp))) diff --git a/module/c/parse.scm b/module/c/parse.scm index 15240bc1..09ede544 100644 --- a/module/c/parse.scm +++ b/module/c/parse.scm @@ -42,6 +42,11 @@ "Invalid integer suffix ~s" (list str) #f))) +(define (parse-float-suffix str) + (case (string->symbol str) + ((f F) '(float)) + ((l L) '(long double)))) + (define (group-body->type vars) (concatenate (map @@ -63,6 +68,29 @@ (bytevector-u8-set! bv (bytevector-length bv*) 0) bv)) +(define (parse-float-form float-form) + (let ((float-string + (fold (lambda (arg str) + (string-append + str + (match arg + (('float-integer ('base-10 n)) n) + (('float-decimal ('base-10 n)) (string-append "." n)) + (('exponent "+" ('base-10 n)) (string-append "e" n)) + (('exponent ('base-10 n)) (string-append "e" n)) + (('exponent "-" ('base-10 n)) (string-append "e-" n))))) + "" float-form))) + ;; exact->inexact is a no-op if we already have an inexact number, but + ;; ensures we get an inexact number when we have an exact number (which we + ;; can get from the "1." case). Returning an inexact number here is important + ;; to avoid arithmetic suprises later. + (exact->inexact + (or (string->number float-string) + (scm-error 'c-parse-error "parse-lexeme-tree" + "Couldn't parse expression as float: ~s" + (list `(float ,@args)) #f))))) + + (define (parse-lexeme-tree tree) (match tree ['() '()] @@ -75,11 +103,19 @@ [('integer n ('integer-suffix suffix)) `(as-type ,(parse-integer-suffix suffix) - ,(parse-lexeme-tree n)) - ] + ,(parse-lexeme-tree n))] + [('integer n) (parse-lexeme-tree n)] + + [('float args ... ('float-suffix suffix)) + `(as-type ,(parse-float-suffix suffix) + ;; parse rest of float as if it lacked a suffix + ,(parse-lexeme-tree `(float ,@args)))] + + [('float args ...) (parse-float-form args)] + ;; Character literals, stored as raw integers ;; so mathematical operations keep working on them. [('char ('escaped-char ('base-8-char n))) diff --git a/tests/test/cpp.scm b/tests/test/cpp.scm index 2c51f616..32072932 100644 --- a/tests/test/cpp.scm +++ b/tests/test/cpp.scm @@ -15,13 +15,16 @@ ;; __asm__ always has strings as arguments (test-skip "__asm__") -;; not implemented -(test-skip "Token concatenation") -;; not implemented -(test-skip "Floating point numbers") + +;; Lexer produces garbage when attempted. Fixing this would also fix cast +;; operations. +(test-skip "Float in infix expression") ;; order of operation is wrong, leading to an incorrect result (test-skip "Cast with operation") +;; not implemented +(test-skip "Token concatenation") + ;; A string follewed by a macro (which expands to a string) ;; should be concatenated. This is however not yet implemented (test-skip "Implicit concatenation of string and macro") @@ -278,13 +281,77 @@ (test-equal '0 (run form)))) (test-group "Floating point numbers" - (let ((form "4.9406564584124654e-324")) - (test-equal '(float (base-10 "4") (base-10)) (lex form)) - (test-equal '0 (run form))) - (let ((form "1.7976931348623157e+308")) - (test-equal '() (lex form)) - (test-equal '0 (run form)))) + (test-group "Diffent forms" + (test-group "No decimal point, exponent, no suffix" + (let ((form "10e10")) + (test-equal '(float (float-integer (base-10 "10")) + (exponent (base-10 "10"))) + (lex form)) + (test-equal 10e10 (run form)))) + + (test-group "No decimal point, negative exponent" + (let ((form "10e-10")) + (test-equal '(float (float-integer (base-10 "10")) + (exponent "-" (base-10 "10"))) + (lex form)) + (test-equal 10e-10 (run form)))) + + (test-group "No decimal point, exponent and suffix" + (let ((form "10e10L")) + (test-equal '(float (float-integer (base-10 "10")) + (exponent (base-10 "10")) + (float-suffix "L")) + (lex form)) + (test-equal '(as-type (long double) 10e10) + (run form)))) + + (test-group "Leading period, no exponent or suffix" + (let ((form ".1")) + (test-equal '(float (float-decimal (base-10 "1"))) (lex form)) + (test-equal 0.1 (run form)))) + + (test-group "Trailing period, no exponent or suffix" + (let ((form "1.")) + (test-equal '(float (float-integer (base-10 "1"))) (lex form)) + (test-equal 1.0 (run form))))) + + + (test-group "Negative float" + (let ((form "-1.0")) + (test-equal '(prefix (prefix-operator "-") + (float (float-integer (base-10 "1")) + (float-decimal (base-10 "0")))) + (lex form)) + (test-equal '(- 1.0) (run form)))) + + + + (test-group "Real world examples" + (let ((form "4.9406564584124654e-324")) + (test-equal '(float (float-integer (base-10 "4")) + (float-decimal (base-10 "9406564584124654")) + (exponent "-" (base-10 "324"))) + (lex form)) + (test-equal 4.9406564584124654e-324 (run form))) + + (let ((form "1.7976931348623157e+308")) + (test-equal '(float (float-integer (base-10 "1")) + (float-decimal (base-10 "7976931348623157")) + (exponent "+" (base-10 "308"))) + (lex form)) + (test-equal 1.7976931348623157e+308 (run form)))) + + (test-group "Float in infix expression" + (test-group "Simple case" + (let ((form "1. + .1")) + (test-equal '(infix (float (float-integer (base-10 "1"))) + (operator "+") + (float (float-decimal (base-10 "1")))) + (lex form)) + (test-equal '(+ 1.0 0.1) (run form)))) + ;; (test-group "Complicated case") + )) (test-group "Typecasts" -- cgit v1.2.3