From 690034ca8abcd931e2fb6bb8129450deee701179 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Hugo=20H=C3=B6rnquist?= Date: Tue, 28 Jun 2022 23:46:08 +0200 Subject: Major work on c parser. --- module/c/parse.scm | 138 ++++++++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 120 insertions(+), 18 deletions(-) (limited to 'module/c/parse.scm') diff --git a/module/c/parse.scm b/module/c/parse.scm index 8030da77..d407f5ac 100644 --- a/module/c/parse.scm +++ b/module/c/parse.scm @@ -38,6 +38,19 @@ "Invalid integer suffix ~s" (list str) #f))) +(define (group-body->type vars) + (concatenate + (map + (match-lambda (('variable var) (list (parse-lexeme-tree `(variable ,var)))) + (('postfix ('variable var) + ('postfix-operator "*")) + (list (parse-lexeme-tree `(variable ,var)) + '*)) + (else (scm-error 'c-parse-error "parse-lexeme-tree" + "Invalid token ~s in typecast form: ~s" + (list else vars) #f))) + vars))) + (define (parse-lexeme-tree tree) (match tree ['() '()] @@ -74,25 +87,43 @@ ((#\v) #\vtab) ((#\\) #\\) ((#\') #\')))] + [('char c) (char->integer (string-ref c 0))] [('variable var) (string->symbol var)] + + [('operator "&&") 'and] + [('operator "&=") 'and_eq] + [('operator "&") 'bitand] + [('operator "|") 'bitor] + [('operator "!=") 'not_eq] + [('operator "||") 'or] + [('operator "|=") 'or_eq] + [('operator "^") 'xor] + [('operator "^=") 'xor_eq] [('operator op) (string->symbol op)] + [('prefix-operator op) (case (string->symbol op) + ((!) 'not) + ((~) 'compl) ((*) 'dereference) ((&) 'pointer) ((++) 'pre-increment) ((--) 'pre-decrement) - (else => identity))] + (else (scm-error 'c-parse-error "parse-lexeme-tree" + "Unknown prefix operator ~s" + (list op) #f)))] [('postfix-operator op) (case (string->symbol op) [(++) 'post-increment] [(--) 'post-decrement] - [else => identity])] + [else (scm-error 'c-parse-error "parse-lexeme-tree" + "Unknown postfix operator ~s" + (list op) #f)])] ;; Parenthesis grouping - [('group args) + [('group args ...) (parse-lexeme-tree args)] ;; Atomic item. Used by flatten-infix @@ -108,37 +139,104 @@ ,(parse-lexeme-tree arg))] [('infix args ...) - (resolve-order-of-operations - (flatten-infix (cons 'infix args)))] + (let ((r + (resolve-order-of-operations + (flatten-infix (cons 'infix args))))) + (match r + (`(: (? ,op ,true) + ,false) + `(ternary ,op ,true ,false)) + (_ r)))] [('funcall function ('group arguments)) `(funcall ,(parse-lexeme-tree function) ,(parse-lexeme-tree arguments))] - [bare (scm-error 'c-parse-error - "parse-lexeme-tree" - "Naked literal in lex-tree: ~s" - (list bare) - #f)])) + [(('variable "struct") ('variable value) ..1) + `(struct-type ,@(map string->symbol value)) + ] + + ;; A list of variables. Most likely a type signature + ;; [(('variable value) ..1) + ;; ] + + ;; A typecast with only variables must (?) be a typecast? + [(('group groups ..1) ... value) + (fold-right (lambda (type done) `(as-type ,type ,done)) + (parse-lexeme-tree value) + (map group-body->type groups))] + + ;; Type name resolution? + ;; https://en.wikipedia.org/wiki/C_data_types + ;; base types with spaces: + ;; ======================= + ;; [[un]signed] char + ;; [[un]signed] short [int] + ;; [[un]signed] int + ;; [un]signed [int] + ;; [[un]signed] long [int] + ;; [[un]signed] long long [int] + ;; float + ;; [long] double + + ;; https://en.wikipedia.org/wiki/Type_qualifier + ;; qualifiers + ;; const + ;; volatile + ;; restrict + ;; _Atomic + + + ;; Storage specifiers + ;; auto + ;; register + ;; static + ;; extern + + ;; struct + ;; enum + ;; union + + ;; https://en.wikipedia.org/wiki/C_syntax + ;; int (*ptr_to_array)[100] + + + [(? symbol? bare) + (scm-error 'c-parse-error + "parse-lexeme-tree" + "Naked literal in lex-tree: ~s" + (list bare) + #f)] + + [form + (scm-error 'c-parse-error + "parse-lexeme-tree" + "Unknown form in lex-tree: ~s" + (list form) #f) + ])) ;; https://en.wikipedia.org/wiki/Operators_in_C_and_C%2B%2B +;; https://en.cppreference.com/w/c/language/operator_precedence (define order-of-operations (reverse (concatenate ;; This is only for binary operations `((-> ,(symbol #\.)) + ;; All unary procedures go here, incnluding typecasts, and sizeof (* / %) (+ -) (<< >>) (< <= > >=) - (== !=) - (&) - (^) - (,(symbol #\|)) - (&&) - (,(symbol #\| #\|)) - (= += -= *= /= %= <<= >>= &= ^= ,(symbol #\| #\=)) + (== != not_eq) + (& bitand) + (^ xorg) + (,(symbol #\|) bitor) + (&& and) + (,(symbol #\| #\|) or) + (? :) + (= += -= *= /= %= <<= >>= &= ^= ,(symbol #\| #\=) + and_eq or_eq xor_eq) (,(symbol #\,)) )))) @@ -148,8 +246,12 @@ (define* (resolve-order-of-operations tree optional: (order order-of-operations)) + (format (current-error-port) "~s~%" tree) (if (null? order) - (car tree) + (scm-error 'c-parse-error + "resolve-order-of-operations" + "Out of operations to resolve when resolving expression ~s" + (list tree) #f) (match tree [('*other* body ...) body] [(form) (resolve-order-of-operations form order)] -- cgit v1.2.3