From 6f9af58e183e40a3c876230e41c3221155e4dcc2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Hugo=20H=C3=B6rnquist?= Date: Thu, 30 Jun 2022 07:07:11 +0200 Subject: C parser minor cleanup. --- module/c/cpp.scm | 22 +++++++++++++++++++--- module/c/lex.scm | 2 ++ module/c/operators.scm | 2 +- module/c/parse.scm | 24 ++++++++++++++---------- 4 files changed, 36 insertions(+), 14 deletions(-) (limited to 'module') diff --git a/module/c/cpp.scm b/module/c/cpp.scm index 86130167..861b8ee2 100644 --- a/module/c/cpp.scm +++ b/module/c/cpp.scm @@ -5,6 +5,7 @@ :use-module (ice-9 match) :use-module (ice-9 regex) :use-module ((rnrs io ports) :select (call-with-port)) + :use-module ((rnrs bytevectors) :select (bytevector?)) :use-module (ice-9 format) :use-module ((hnh util io) :select (read-lines)) :use-module (hnh util graph) @@ -44,12 +45,27 @@ ;; Direct values. Lisp also has quoted symbols in this group. (define (immediate? x) (or (number? x) - (char? x) - (string? x))) + (bytevector? x))) +;; TODO replace this with something sensible +;; like a correct list extracted from (c eval) +;; and not thinging that types are variables ;; built in symbols. Should never be marked as dependencies (define (primitive? x) - (memv x (cons 'funcall binary-operators))) + (memv x `( + ;; language primitives + sizeof + + ;; special forms introduced by parser + funcall ternary struct-type as-type + + ;; unary operatons which aren't also binary operators + ++ -- ! ~ + not compl dereference pointer + pre-increment pre-decrement + post-increment post-decrement + ,@binary-operators + ))) diff --git a/module/c/lex.scm b/module/c/lex.scm index b3c82001..0bde5c9e 100644 --- a/module/c/lex.scm +++ b/module/c/lex.scm @@ -112,6 +112,8 @@ ;; It's important that ++ and -- are BEFORE + and - ;; otherwise the first + is found, leaving the second +, which fails ;; to lex since it's an invalid token + ;; TODO sizeof can be written as a prefix operator + ;; (without parenthesis) if the operand is an expression. (or "*" "&" "++" "--" "!" "~" "+" "-")) diff --git a/module/c/operators.scm b/module/c/operators.scm index 131c702c..910dc8a9 100644 --- a/module/c/operators.scm +++ b/module/c/operators.scm @@ -11,7 +11,7 @@ ;; apparently part of C ;; https://en.cppreference.com/w/cpp/language/operator_alternative (define wordy-binary-operators - '(bitand and_eq and bitor or_eq or xor_eq xor)) + '(bitand and_eq and bitor or_eq or xor_eq xor not_eq)) (define symbol-binary-operators (append (map (lambda (x) (symbol-append x '=)) simple-operators) diff --git a/module/c/parse.scm b/module/c/parse.scm index d8cfd7cd..7d11ea17 100644 --- a/module/c/parse.scm +++ b/module/c/parse.scm @@ -8,8 +8,7 @@ :use-module (rnrs bytevectors) :export (parse-lexeme-tree)) -;;; Rename this -(define (perms set) +(define (permutations set) (concatenate (map (lambda (key) (map (lambda (o) (cons key o)) @@ -25,17 +24,17 @@ (define valid-sequences (delete 'dummy (lset-union eq? '(dummy) - (map symbol-concat (perms '(() U L))) - (map symbol-concat (perms '(() U LL)))))) + (map symbol-concat (permutations '(() U L))) + (map symbol-concat (permutations '(() U LL)))))) ;; => (LLU ULL LL LU UL L U) (aif (memv (string->symbol (string-upcase str)) valid-sequences) (case (car it) - [(LLU ULL) '(unsigned long-long)] + [(LLU ULL) '(unsigned long long)] [(LU UL) '(unsigned long)] - [(LL) '(long-long)] + [(LL) '(long long)] [(L) '(long)] [(U) '(unsigned)]) (scm-error 'c-parse-error "parse-integer-suffix" @@ -176,6 +175,8 @@ [('variable var) (string->symbol var)] + ;; normalize some binary operators to their wordy equivalent + ;; (which also happens to match better with scheme) [('operator "&&") 'and] [('operator "&=") 'and_eq] [('operator "&") 'bitand] @@ -185,6 +186,9 @@ [('operator "|=") 'or_eq] [('operator "^") 'xor] [('operator "^=") 'xor_eq] + ;; Change these names to something scheme can handle better + [('operator ".") 'object-slot] + [('operator "->") 'dereference-slot] [('operator op) (string->symbol op)] [('prefix-operator op) @@ -211,10 +215,6 @@ [('group args ...) (parse-lexeme-tree args)] - ;; Atomic item. Used by flatten-infix - [('atom body) - (parse-lexeme-tree body)] - [('prefix op arg) `(,(parse-lexeme-tree op) ,(parse-lexeme-tree arg))] @@ -240,6 +240,8 @@ + ;; Is it OK for literal strings to be "stored" inline? + ;; Or must they be a pointer? ['string #vu8(0)] [('string str ...) (-> (map resolve-string-fragment str) @@ -371,6 +373,8 @@ ,(resolve-order-of-operations (cons 'fixed-infix b) (cdr order)) ,(resolve-order-of-operations (cons 'fixed-infix c) (cdr order)))] [(first rest ...) + ;; TODO this is only valid for the associative operators (+, ...) + ;; but not some other (<, ...) (if (apply eq? (map car rest)) (let ((op (caar rest))) `((resolved-operator ,op) -- cgit v1.2.3