From 9a5cdde850ba5a6185d5524ebf8acc25dfd00762 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Hugo=20H=C3=B6rnquist?= Date: Thu, 30 Jun 2022 01:48:21 +0200 Subject: C parser add basic float support. --- module/c/lex.scm | 34 +++++++++++++++++++++------------- module/c/parse.scm | 40 ++++++++++++++++++++++++++++++++++++++-- 2 files changed, 59 insertions(+), 15 deletions(-) (limited to 'module') diff --git a/module/c/lex.scm b/module/c/lex.scm index 2b024f1c..b6523a87 100644 --- a/module/c/lex.scm +++ b/module/c/lex.scm @@ -43,22 +43,23 @@ (define-peg-pattern integer all (and (or base-8 base-16 base-10) (? integer-suffix))) -;; (define-peg-pattern float-suffix all -;; (* (or "f" "F" "l" "L"))) +(define-peg-pattern float-suffix all + (* (or "f" "F" "l" "L"))) -;; (define-peg-pattern exponent all -;; (and (ignore (or "e" "E")) (? (or "+" "-")) base-10)) +(define-peg-pattern exponent all + (and (ignore (or "e" "E")) (? (or "+" "-")) base-10)) -;; (define-peg-pattern float all -;; (or -;; (and base-10 exponent (? float-suffix)) -;; (and base-10 (ignore ".") (? exponent) (? float-suffix)) -;; (and (? base-10) (ignore ".") base-10 (? exponent) (? float-suffix)))) +;; Helper patterns for creating named groups in float +(define-peg-pattern float-integer all base-10) +(define-peg-pattern float-decimal all base-10) + +(define-peg-pattern float all + (or (and float-integer exponent (? float-suffix)) + (and (? float-integer) (ignore ".") float-decimal (? exponent) (? float-suffix)) + (and float-integer (ignore ".") (? exponent) (? float-suffix)))) (define-peg-pattern number body - (or ; float - integer - )) + (or float integer)) (define-peg-pattern group all (and (ignore "(") expr (ignore ")"))) @@ -145,7 +146,14 @@ ;;; main parser (define-peg-pattern expr body - (+ (and sp (or infix postfix prefix funcall group literal variable) + (+ (and sp (or + ;; float must be BEFORE infix, otherwise 3.2 is parsed as (infix 3 (operator ".") 2) + ;; that however breaks the infix logic, meaning that floating point numbers can't be + ;; used in basic arithmetic. + ;; TODO remove all implicit order of operations handling in the lexer, and move it to + ;; the parser. This should also fix the case of typecasts being applied incorrectly. + float + infix postfix prefix funcall group literal variable) sp))) diff --git a/module/c/parse.scm b/module/c/parse.scm index 15240bc1..09ede544 100644 --- a/module/c/parse.scm +++ b/module/c/parse.scm @@ -42,6 +42,11 @@ "Invalid integer suffix ~s" (list str) #f))) +(define (parse-float-suffix str) + (case (string->symbol str) + ((f F) '(float)) + ((l L) '(long double)))) + (define (group-body->type vars) (concatenate (map @@ -63,6 +68,29 @@ (bytevector-u8-set! bv (bytevector-length bv*) 0) bv)) +(define (parse-float-form float-form) + (let ((float-string + (fold (lambda (arg str) + (string-append + str + (match arg + (('float-integer ('base-10 n)) n) + (('float-decimal ('base-10 n)) (string-append "." n)) + (('exponent "+" ('base-10 n)) (string-append "e" n)) + (('exponent ('base-10 n)) (string-append "e" n)) + (('exponent "-" ('base-10 n)) (string-append "e-" n))))) + "" float-form))) + ;; exact->inexact is a no-op if we already have an inexact number, but + ;; ensures we get an inexact number when we have an exact number (which we + ;; can get from the "1." case). Returning an inexact number here is important + ;; to avoid arithmetic suprises later. + (exact->inexact + (or (string->number float-string) + (scm-error 'c-parse-error "parse-lexeme-tree" + "Couldn't parse expression as float: ~s" + (list `(float ,@args)) #f))))) + + (define (parse-lexeme-tree tree) (match tree ['() '()] @@ -75,11 +103,19 @@ [('integer n ('integer-suffix suffix)) `(as-type ,(parse-integer-suffix suffix) - ,(parse-lexeme-tree n)) - ] + ,(parse-lexeme-tree n))] + [('integer n) (parse-lexeme-tree n)] + + [('float args ... ('float-suffix suffix)) + `(as-type ,(parse-float-suffix suffix) + ;; parse rest of float as if it lacked a suffix + ,(parse-lexeme-tree `(float ,@args)))] + + [('float args ...) (parse-float-form args)] + ;; Character literals, stored as raw integers ;; so mathematical operations keep working on them. [('char ('escaped-char ('base-8-char n))) -- cgit v1.2.3