(define-module (c parse) :use-module (hnh util) :use-module (srfi srfi-1) :use-module (ice-9 match) :export (parse-lexeme-tree)) ;;; Rename this (define (perms set) (concatenate (map (lambda (key) (map (lambda (o) (cons key o)) (delete key set))) set))) (define (symbol-concat pair) (cond [(null? (car pair)) (cdr pair)] [(null? (cdr pair)) (car pair)] [else (symbol-append (car pair) (cdr pair))])) (define (parse-integer-suffix str) (define valid-sequences (delete 'dummy (lset-union eq? '(dummy) (map symbol-concat (perms '(() U L))) (map symbol-concat (perms '(() U LL)))))) ;; => (LLU ULL LL LU UL L U) (aif (memv (string->symbol (string-upcase str)) valid-sequences) (case (car it) [(LLU ULL) '(unsigned long-long)] [(LU UL) '(unsigned long)] [(LL) '(long-long)] [(L) '(long)] [(U) '(unsigned)]) (scm-error 'c-parse-error "parse-integer-suffix" "Invalid integer suffix ~s" (list str) #f))) (define (group-body->type vars) (concatenate (map (match-lambda (('variable var) (list (parse-lexeme-tree `(variable ,var)))) (('postfix ('variable var) ('postfix-operator "*")) (list (parse-lexeme-tree `(variable ,var)) '*)) (else (scm-error 'c-parse-error "parse-lexeme-tree" "Invalid token ~s in typecast form: ~s" (list else vars) #f))) vars))) (define (parse-lexeme-tree tree) (match tree ['() '()] ;; Number constants [('base-10 n) (string->number n 10)] [('base-8 n) (string->number n 8)] [('base-16 n) (string->number n 16)] [('integer n ('integer-suffix suffix)) `(as-type ,(parse-integer-suffix suffix) ,(parse-lexeme-tree n)) ] [('integer n) (parse-lexeme-tree n)] ;; Character literals, stored as raw integers ;; so mathematical operations keep working on them. [('char ('escaped-char ('base-8-char n))) (-> n (string->number 8) #; integer->char)] [('char ('escaped-char ('base-16-char n))) (-> n (string->number 16) #; integer->char)] [('char ('escaped-char c)) (char->integer (case (string-ref c 0) ((#\a) #\alarm) ((#\b) #\backspace) ((#\e) #\esc) ((#\f) #\page) ((#\n) #\newline) ((#\r) #\return) ((#\t) #\tab) ((#\v) #\vtab) ((#\\) #\\) ((#\') #\')))] [('char c) (char->integer (string-ref c 0))] [('variable var) (string->symbol var)] [('operator "&&") 'and] [('operator "&=") 'and_eq] [('operator "&") 'bitand] [('operator "|") 'bitor] [('operator "!=") 'not_eq] [('operator "||") 'or] [('operator "|=") 'or_eq] [('operator "^") 'xor] [('operator "^=") 'xor_eq] [('operator op) (string->symbol op)] [('prefix-operator op) (case (string->symbol op) ((!) 'not) ((~) 'compl) ((*) 'dereference) ((&) 'pointer) ((++) 'pre-increment) ((--) 'pre-decrement) (else (scm-error 'c-parse-error "parse-lexeme-tree" "Unknown prefix operator ~s" (list op) #f)))] [('postfix-operator op) (case (string->symbol op) [(++) 'post-increment] [(--) 'post-decrement] [else (scm-error 'c-parse-error "parse-lexeme-tree" "Unknown postfix operator ~s" (list op) #f)])] ;; Parenthesis grouping [('group args ...) (parse-lexeme-tree args)] ;; Atomic item. Used by flatten-infix [('atom body) (parse-lexeme-tree body)] [('prefix op arg) `(,(parse-lexeme-tree op) ,(parse-lexeme-tree arg))] [('postfix arg op) `(,(parse-lexeme-tree op) ,(parse-lexeme-tree arg))] [('infix args ...) (let ((r (resolve-order-of-operations (flatten-infix (cons 'infix args))))) (match r (`(: (? ,op ,true) ,false) `(ternary ,op ,true ,false)) (_ r)))] [('funcall function ('group arguments)) `(funcall ,(parse-lexeme-tree function) ,(parse-lexeme-tree arguments))] [(('variable "struct") ('variable value) ..1) `(struct-type ,@(map string->symbol value)) ] ;; A list of variables. Most likely a type signature ;; [(('variable value) ..1) ;; ] ;; A typecast with only variables must (?) be a typecast? [(('group groups ..1) ... value) (fold-right (lambda (type done) `(as-type ,type ,done)) (parse-lexeme-tree value) (map group-body->type groups))] ;; Type name resolution? ;; https://en.wikipedia.org/wiki/C_data_types ;; base types with spaces: ;; ======================= ;; [[un]signed] char ;; [[un]signed] short [int] ;; [[un]signed] int ;; [un]signed [int] ;; [[un]signed] long [int] ;; [[un]signed] long long [int] ;; float ;; [long] double ;; https://en.wikipedia.org/wiki/Type_qualifier ;; qualifiers ;; const ;; volatile ;; restrict ;; _Atomic ;; Storage specifiers ;; auto ;; register ;; static ;; extern ;; struct ;; enum ;; union ;; https://en.wikipedia.org/wiki/C_syntax ;; int (*ptr_to_array)[100] [(? symbol? bare) (scm-error 'c-parse-error "parse-lexeme-tree" "Naked literal in lex-tree: ~s" (list bare) #f)] [form (scm-error 'c-parse-error "parse-lexeme-tree" "Unknown form in lex-tree: ~s" (list form) #f) ])) ;; https://en.wikipedia.org/wiki/Operators_in_C_and_C%2B%2B ;; https://en.cppreference.com/w/c/language/operator_precedence (define order-of-operations (reverse (concatenate ;; This is only for binary operations `((-> ,(symbol #\.)) ;; All unary procedures go here, incnluding typecasts, and sizeof (* / %) (+ -) (<< >>) (< <= > >=) (== != not_eq) (& bitand) (^ xorg) (,(symbol #\|) bitor) (&& and) (,(symbol #\| #\|) or) (? :) (= += -= *= /= %= <<= >>= &= ^= ,(symbol #\| #\=) and_eq or_eq xor_eq) (,(symbol #\,)) )))) (define (mark-other form) (if (list? form) (cons '*other* form) form)) (define* (resolve-order-of-operations tree optional: (order order-of-operations)) (format (current-error-port) "~s~%" tree) (if (null? order) (scm-error 'c-parse-error "resolve-order-of-operations" "Out of operations to resolve when resolving expression ~s" (list tree) #f) (match tree [('*other* body ...) body] [(form) (resolve-order-of-operations form order)] [(forms ...) (match (split-by forms (car order)) [(group) (resolve-order-of-operations group (cdr order))] [groups (cons (car order) (map (lambda (form) (resolve-order-of-operations form order-of-operations)) groups))])] [a a]))) ;; Flatens a tree of infix triples. Stops when it should. ;; (parenthesis, function calls, ...) (define (flatten-infix form) (match form [('infix left op ('infix right ...)) (cons* (parse-lexeme-tree left) (parse-lexeme-tree op) (flatten-infix (cons 'infix right)))] [('infix left op right) (list (mark-other (parse-lexeme-tree left)) (parse-lexeme-tree op) (mark-other (parse-lexeme-tree right)))] [other (scm-error 'c-parse-error "flatten-infix" "Not an infix tree ~a" (list other) #f)]))