diff options
Diffstat (limited to 'module/c')
-rw-r--r-- | module/c/cpp.scm | 138 | ||||
-rw-r--r-- | module/c/lex.scm | 127 | ||||
-rw-r--r-- | module/c/operators.scm | 20 | ||||
-rw-r--r-- | module/c/parse.scm | 181 |
4 files changed, 466 insertions, 0 deletions
diff --git a/module/c/cpp.scm b/module/c/cpp.scm new file mode 100644 index 00000000..69e21d27 --- /dev/null +++ b/module/c/cpp.scm @@ -0,0 +1,138 @@ +(define-module (c cpp) + :use-module (util) + :use-module (srfi srfi-1) + :use-module (ice-9 popen) + :use-module (ice-9 match) + :use-module (ice-9 regex) + :use-module ((rnrs io ports) :select (call-with-port)) + :use-module (ice-9 pretty-print) ; used by one error handler + :use-module ((util io) :select (read-lines)) + :use-module (util graph) + :use-module (c lex) + :use-module (c parse) + :use-module (c operators) + ) + + +;; input "#define F(x, y) x + y" +;; 1 full define | F(x, y) +;; 2 macro name | F +;; 3 macro args | (x, y) +;; 4 macro body | x + y +(define define-re (make-regexp "^#define ((\\w+)(\\([^)]*\\))?) (.*)")) + +(define (tokenize-define-line header-line) + (aif (regexp-exec define-re header-line) + (cons (match:substring it 1) + (match:substring it 4)) + (error "Line dosen't match" header-line))) + + +(define symb-map + `((,(symbol #\|) . logior) + (funcall . (@@ ,(module-name (current-module)) do-funcall)) + (&& . and) + (& . logand) + (== . =) + (!= . (negate =)) + )) + +(define (do-funcall function arguments) + (if (list? arguments) + (apply function arguments) + (function arguments))) + +(define-public (replace-symbols tree dict) + (if (not (list? tree)) + (or (assoc-ref dict tree) tree) + (map (lambda (node) (replace-symbols node dict)) + tree))) + +(define (atom? x) + ;; NOT symbol + (or (number? x) + (char? x) + (string? x))) + +;; built in symbols. Should never be marked as dependencies +(define (primitive? x) + (memv x (cons 'funcall binary-operators))) + + + +;; (symbol . value) -> (list (dependencies . symbol . value) +(define (parse-cpp-define pair) + (define f (compose parse-lexeme-tree lex)) + (define left (f (car pair))) + (define proc-args + (match (and (pair? left) + (eq? 'funcall (car left)) + (caddr left)) + [#f '()] + [(_ args ...) args] + [arg (list arg)])) + + (define right (f (cdr pair))) + (define alt-right (replace-symbols right symb-map)) + (define dependencies + (lset-difference + eq? + (remove primitive? + (remove atom? + (flatten (if (list? right) + right (list right))))) + proc-args)) + + (cons + dependencies + (match left + [('funcall name ('#{,}# args ...)) + (cons name `(lambda ,args ,alt-right))] + + [('funcall name arg) + (cons name `(lambda (,arg) ,alt-right))] + + [name (cons name alt-right)]))) + + +(define (parse-cpp-file lines) + (map (lambda (line) + (catch #t + (lambda () (parse-cpp-define line)) + (lambda (err caller fmt args . _) + (format #t "~a ~?~%" fmt args) + #f))) + lines)) + +(define (private-c-symbol? string) + (char=? #\_ (string-ref string 0))) + +(define (tokenize-header-file header-file) + (map tokenize-define-line + (call-with-port + (open-input-pipe + (string-append "cpp -dM " header-file)) + read-lines))) + +(define-macro (include# header-file) + (define lines (remove (compose private-c-symbol? car) + (tokenize-header-file header-file))) + + (define forms (parse-cpp-file lines)) + + (define graph* + (fold (lambda (node graph) + (add-node graph (cdr node) (car node))) + (make-graph car) + (filter identity forms))) + + ;; Hack for termios since this symbol isn't defined. + ;; (including in the above removed private c symbols) + (define graph (add-node graph* (cons '_POSIX_VDISABLE #f) '())) + + `(begin + ,@(map (lambda (pair) `(define ,(car pair) ,(cdr pair))) + (resolve-dependency-graph graph)))) + + +(export include#) diff --git a/module/c/lex.scm b/module/c/lex.scm new file mode 100644 index 00000000..322e20ed --- /dev/null +++ b/module/c/lex.scm @@ -0,0 +1,127 @@ +(define-module (c lex) + :use-module (ice-9 peg) + :use-module (c operators) + :export (lex)) + + +;; Like the regular define-peg-pattern. But evaluates the +;; pattern before treating it as a peg rule. +(define-macro (define-define-peg-pattern name capture expr) + `(define-peg-pattern ,name ,capture + ;; NOTE how does this work if we are in a different module? + ;; It currently however isn't a problem since we don't export + ;; this macro. + ,(eval expr (current-module)))) + + + +(define-peg-pattern base-8-digit body + (range #\0 #\7)) + +(define-peg-pattern base-10-digit body + (range #\0 #\9)) + +(define-peg-pattern base-16-digit body + (or (range #\0 #\9) + (range #\A #\F) + (range #\a #\f))) + +;; https://en.cppreference.com/w/cpp/language/integer_literal +(define-peg-pattern base-10 all (+ base-10-digit)) +(define-peg-pattern base-8 all (and (ignore "0") (+ base-8-digit))) +(define-peg-pattern base-16 all (and (ignore (and "0" (or "x" "X"))) + (+ base-16-digit))) + +;; accept anything now, ensure correctnes later +(define-peg-pattern integer-suffix all + (* (or "u" "U" "l" "L"))) + +(define-peg-pattern integer all + (and (or base-8 base-16 base-10) (? integer-suffix))) + +(define-peg-pattern number body + (or integer)) + +(define-peg-pattern group all + (and (ignore "(") expr (ignore ")"))) + +(define-peg-pattern base-8-char all + (and base-8-digit + (? base-8-digit) + (? base-8-digit))) + +(define-peg-pattern base-16-char all + (and (ignore "x") base-16-digit (? base-16-digit))) + +(define-peg-pattern escaped-char all + (and (ignore "\\") (or base-16-char + base-8-char + peg-any))) + +(define-peg-pattern char all + (and (ignore "'") (or escaped-char peg-any) (ignore "'"))) + +(define-define-peg-pattern operator all + `(or ,@(map symbol->string symbol-binary-operators) + ,@(map (lambda (op) `(and ,(symbol->string op) ws)) + wordy-binary-operators))) + +;; whitespace +(define-peg-pattern ws none + (or " " " " "\n")) + +;; space (for when whitespace is optional) +(define-peg-pattern sp none (* ws)) + +(define-peg-pattern safe-letter body + (or "_" + (range #\A #\Z) + (range #\a #\z))) + +(define-peg-pattern variable all + (and safe-letter + (* (or safe-letter + base-10-digit)))) + +;; No further subparsing can be done. +;; NOTE that strings are generally also in this category. +(define-peg-pattern atom all + (or base-8 base-10 base-16 number char variable)) + +(define-peg-pattern prefix-operator all + (or "!" "~" "*" "&" "++" "--" "+" "-")) + +;;; Note that stacked pre or postfix operators without parenthesis +;;; dosen't work. So `*&C' is invalid, while `*(&C)' is valid. + +(define-peg-pattern prefix all + (and prefix-operator sp (or variable group funcall #; postfix + ))) + +(define-peg-pattern postfix-operator all + (or "++" "--")) + +(define-peg-pattern postfix all + ;; literals can't be in-place incremented and decremented + ;; Make sure we don't match postfix-operator here, since + ;; that also gives us an infinite loop. + (and (or prefix funcall group variable) sp postfix-operator)) + +(define-peg-pattern infix all + ;; first case is "same" as expr, but in different order to prevent + ;; infinite self reference. Pre and postfix not here, solved by having + ;; them before infix in expr + (and (or funcall postfix prefix group char number variable) + sp operator sp expr)) + +(define-peg-pattern funcall all + (and variable sp group)) + +;;; main parser +(define-peg-pattern expr body + (+ (and sp (or infix postfix prefix funcall group char number variable) + sp))) + + +(define (lex string) + (peg:tree (match-pattern expr string))) diff --git a/module/c/operators.scm b/module/c/operators.scm new file mode 100644 index 00000000..f6fa3da9 --- /dev/null +++ b/module/c/operators.scm @@ -0,0 +1,20 @@ +(define-module (c operators)) + + +;;; Simple operators are those which can be combined with '=' +(define simple-operators + `(+ - * / & ,(symbol #\|) ^ << >> % < > =)) + +;; apparently part of C +(define-public wordy-binary-operators + '(bitand and_eq and bitor or_eq or xor_eq xor)) + +(define-public symbol-binary-operators + (append (map (lambda (x) (symbol-append x '=)) simple-operators) + `(&& ,(symbol #\| #\|) != ,(symbol #\,) + -> ,(symbol #\.)) + simple-operators)) + +(define-public binary-operators + (append symbol-binary-operators + wordy-binary-operators)) diff --git a/module/c/parse.scm b/module/c/parse.scm new file mode 100644 index 00000000..42f2c13a --- /dev/null +++ b/module/c/parse.scm @@ -0,0 +1,181 @@ +(define-module (c parse) + :use-module (util) + :use-module (srfi srfi-1) + :use-module (ice-9 match) + :export (parse-lexeme-tree)) + +;;; Rename this +(define (perms set) + (concatenate + (map (lambda (key) + (map (lambda (o) (cons key o)) + (delete key set))) + set))) + +(define (symbol-concat pair) + (cond [(null? (car pair)) (cdr pair)] + [(null? (cdr pair)) (car pair)] + [else (symbol-append (car pair) (cdr pair))])) + +(define (parse-integer-suffix str) + (define valid-sequences + (delete 'dummy + (lset-union eq? '(dummy) + (map symbol-concat (perms '(() U L))) + (map symbol-concat (perms '(() U LL)))))) + + ;; => (LLU ULL LL LU UL L U) + + (aif (memv (string->symbol (string-upcase str)) + valid-sequences) + (case (car it) + [(LLU ULL) '(unsigned long-long)] + [(LU UL) '(unsigned long)] + [(LL) '(long-long)] + [(L) '(long)] + [(U) '(unsigned)]) + (error "Invalid integer suffix"))) + +(define (parse-lexeme-tree tree) + (match tree + ['() '()] + + ;; Number constants + [('base-10 n) (string->number n 10)] + [('base-8 n) (string->number n 8)] + [('base-16 n) (string->number n 16)] + + [('integer n ('integer-suffix suffix)) + `(as-type + ,(parse-integer-suffix suffix) + ,(parse-lexeme-tree n)) + ] + [('integer n) + (parse-lexeme-tree n)] + + ;; Character literals, stored as raw integers + ;; so mathematical operations keep working on them. + [('char ('escaped-char ('base-8-char n))) + (-> n (string->number 8) #; integer->char)] + [('char ('escaped-char ('base-16-char n))) + (-> n (string->number 16) #; integer->char)] + [('char ('escaped-char c)) + (char->integer + (case (string-ref c 0) + ((#\a) #\alarm) + ((#\b) #\backspace) + ((#\e) #\esc) + ((#\f) #\page) + ((#\n) #\newline) + ((#\r) #\return) + ((#\t) #\tab) + ((#\v) #\vtab) + ((#\\) #\\) + ((#\') #\')))] + [('char c) (char->integer (string-ref c 0))] + + [('variable var) (string->symbol var)] + [('operator op) (string->symbol op)] + [('prefix-operator op) + (case (string->symbol op) + ((*) 'dereference) + ((&) 'pointer) + ((++) 'pre-increment) + ((--) 'pre-decrement) + (else => identity))] + [('postfix-operator op) + (case (string->symbol op) + [(++) 'post-increment] + [(--) 'post-decrement] + [else => identity])] + + ;; Parenthesis grouping + [('group args) + (parse-lexeme-tree args)] + + ;; Atomic item. Used by flatten-infix + [('atom body) + (parse-lexeme-tree body)] + + [('prefix op arg) + `(,(parse-lexeme-tree op) + ,(parse-lexeme-tree arg))] + + [('postfix arg op) + `(,(parse-lexeme-tree op) + ,(parse-lexeme-tree arg))] + + [('infix args ...) + (resolve-order-of-operations + (flatten-infix (cons 'infix args)))] + + [('funcall function ('group arguments)) + `(funcall ,(parse-lexeme-tree function) + ,(parse-lexeme-tree arguments))] + + [bare (throw 'parse-error + 'parse-lexeme-tree + "Naked literal in lex-tree. How did that get there?" + '() + bare)])) + +;; https://en.wikipedia.org/wiki/Operators_in_C_and_C%2B%2B + +(define order-of-operations + (reverse + (concatenate + ;; This is only for binary operations + `((-> ,(symbol #\.)) + (* / %) + (+ -) + (<< >>) + (< <= > >=) + (== !=) + (&) + (^) + (,(symbol #\|)) + (&&) + (,(symbol #\| #\|)) + (= += -= *= /= %= <<= >>= &= ^= ,(symbol #\| #\=)) + (,(symbol #\,)) + )))) + +(define (mark-other form) + (if (list? form) (cons '*other* form) form)) + +(define* (resolve-order-of-operations + tree optional: (order order-of-operations)) + + (if (null? order) + (car tree) + (match tree + [('*other* body ...) body] + [(form) (resolve-order-of-operations form order)] + [(forms ...) + (match (split-by forms (car order)) + [(group) (resolve-order-of-operations group (cdr order))] + [groups + (cons (car order) + (map (lambda (form) (resolve-order-of-operations + form order-of-operations)) + groups))])] + [a a]))) + +;; Flatens a tree of infix triples. Stops when it should. +;; (parenthesis, function calls, ...) +(define (flatten-infix form) + (match form + [('infix left op ('infix right ...)) + (cons* (parse-lexeme-tree left) + (parse-lexeme-tree op) + (flatten-infix (cons 'infix right)))] + + [('infix left op right) + (list (mark-other (parse-lexeme-tree left)) + (parse-lexeme-tree op) + (mark-other (parse-lexeme-tree right)))] + + [other (error "Not an infix tree ~a" other)])) + + + |