From d5c0f0232d7351ed83b571aae649ee7971be2682 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Hugo=20H=C3=B6rnquist?= Date: Mon, 20 Jul 2020 03:51:48 +0200 Subject: Split parse-cpp into multiple modules. --- module/vulgar/parse-cpp.scm | 459 -------------------------------------------- 1 file changed, 459 deletions(-) delete mode 100644 module/vulgar/parse-cpp.scm (limited to 'module/vulgar') diff --git a/module/vulgar/parse-cpp.scm b/module/vulgar/parse-cpp.scm deleted file mode 100644 index 2a58d158..00000000 --- a/module/vulgar/parse-cpp.scm +++ /dev/null @@ -1,459 +0,0 @@ -(define-module (vulgar parse-cpp) - :use-module (util) - :use-module (srfi srfi-1) - :use-module (ice-9 popen) - :use-module (ice-9 peg) - :use-module (ice-9 match) - ;; required by define-define-peg-pattern - :use-module ((system base compile) :select (compile)) - :use-module ((rnrs io ports) :select (call-with-port)) - :use-module (ice-9 pretty-print) ; used by one error handler - :use-module ((util io) :select (read-lines)) - :use-module (util graph) - ) - - - - -;;; Simple operators are those which can be combined with '=' -(define simple-operators - `(+ - * / & ,(symbol #\|) ^ << >> % < > =)) - -;; apparently part of C -(define wordy-binary-operators - '(bitand and_eq and bitor or_eq or xor_eq xor)) - -(define symbol-binary-operators - (append (map (lambda (x) (symbol-append x '=)) simple-operators) - `(&& ,(symbol #\| #\|) != ,(symbol #\,) - -> ,(symbol #\.)) - simple-operators)) - -(define binary-operators - (append symbol-binary-operators - wordy-binary-operators)) - - -;;; Lexer - -;; Like the regular define-peg-pattern. But evaluates the -;; pattern before treating it as a peg rule. -(define-macro (define-define-peg-pattern name capture expr) - `(define-peg-pattern ,name ,capture - ;; NOTE how does this work if we are in a different module? - ;; It currently however isn't a problem since we don't export - ;; this macro. - ,(eval expr (current-module)))) - -(define-peg-pattern base-8-digit body - (range #\0 #\7)) - -(define-peg-pattern base-10-digit body - (range #\0 #\9)) - -(define-peg-pattern base-16-digit body - (or (range #\0 #\9) - (range #\A #\F) - (range #\a #\f))) - -;; https://en.cppreference.com/w/cpp/language/integer_literal -(define-peg-pattern base-10 all (+ base-10-digit)) -(define-peg-pattern base-8 all (and (ignore "0") (+ base-8-digit))) -(define-peg-pattern base-16 all (and (ignore (and "0" (or "x" "X"))) - (+ base-16-digit))) - -;; accept anything now, ensure correctnes later -(define-peg-pattern integer-suffix all - (* (or "u" "U" "l" "L"))) - -(define-peg-pattern integer all - (and (or base-8 base-16 base-10) (? integer-suffix))) - -(define-peg-pattern number body - (or integer)) - -(define-peg-pattern group all - (and (ignore "(") expr (ignore ")"))) - -(define-peg-pattern base-8-char all - (and base-8-digit - (? base-8-digit) - (? base-8-digit))) - -(define-peg-pattern base-16-char all - (and (ignore "x") base-16-digit (? base-16-digit))) - -(define-peg-pattern escaped-char all - (and (ignore "\\") (or base-16-char - base-8-char - peg-any))) - -(define-peg-pattern char all - (and (ignore "'") (or escaped-char peg-any) (ignore "'"))) - -(define-define-peg-pattern operator all - `(or ,@(map symbol->string symbol-binary-operators) - ,@(map (lambda (op) `(and ,(symbol->string op) ws)) - wordy-binary-operators))) - -;; whitespace -(define-peg-pattern ws none - (or " " " " "\n")) - -;; space (for when whitespace is optional) -(define-peg-pattern sp none (* ws)) - -(define-peg-pattern safe-letter body - (or "_" - (range #\A #\Z) - (range #\a #\z))) - -(define-peg-pattern variable all - (and safe-letter - (* (or safe-letter - base-10-digit)))) - -;; No further subparsing can be done. -;; NOTE that strings are generally also in this category. -(define-peg-pattern atom all - (or base-8 base-10 base-16 number char variable)) - -(define-peg-pattern prefix-operator all - (or "!" "~" "*" "&" "++" "--" "+" "-")) - -;;; Note that stacked pre or postfix operators without parenthesis -;;; dosen't work. So `*&C' is invalid, while `*(&C)' is valid. - -(define-peg-pattern prefix all - (and prefix-operator sp (or variable group funcall #; postfix - ))) - -(define-peg-pattern postfix-operator all - (or "++" "--")) - -(define-peg-pattern postfix all - ;; literals can't be in-place incremented and decremented - ;; Make sure we don't match postfix-operator here, since - ;; that also gives us an infinite loop. - (and (or prefix funcall group variable) sp postfix-operator)) - -(define-peg-pattern infix all - ;; first case is "same" as expr, but in different order to prevent - ;; infinite self reference. Pre and postfix not here, solved by having - ;; them before infix in expr - (and (or funcall postfix prefix group char number variable) - sp operator sp expr)) - -(define-peg-pattern funcall all - (and variable sp group)) - -;;; main parser -(define-peg-pattern expr body - (+ (and sp (or infix postfix prefix funcall group char number variable) - sp))) - - -(define (lex string) - (peg:tree (match-pattern expr string))) - - -;;; Parser - -(define (perms set) - (concatenate - (map (lambda (key) - (map (lambda (o) (cons key o)) - (delete key set))) - set))) - -(define (symbol-concat pair) - (cond [(null? (car pair)) (cdr pair)] - [(null? (cdr pair)) (car pair)] - [else (symbol-append (car pair) (cdr pair))])) - -(define (parse-integer-suffix str) - - (define valid-sequences - (delete 'dummy - (lset-union eq? '(dummy) - (map symbol-concat (perms '(() U L))) - (map symbol-concat (perms '(() U LL)))))) - - ;; => (LLU ULL LL LU UL L U) - - (aif (memv (string->symbol (string-upcase str)) - valid-sequences) - (case (car it) - [(LLU ULL) '(unsigned long-long)] - [(LU UL) '(unsigned long)] - [(LL) '(long-long)] - [(L) '(long)] - [(U) '(unsigned)]) - (error "Invalid integer suffix"))) - -(define (parse-lexeme-tree tree) - (match tree - ['() '()] - - ;; Number constants - [('base-10 n) (string->number n 10)] - [('base-8 n) (string->number n 8)] - [('base-16 n) (string->number n 16)] - - [('integer n ('integer-suffix suffix)) - `(as-type - ,(parse-integer-suffix suffix) - ,(parse-lexeme-tree n)) - ] - [('integer n) - (parse-lexeme-tree n)] - - ;; Character literals, stored as raw integers - ;; so mathematical operations keep working on them. - [('char ('escaped-char ('base-8-char n))) - (-> n (string->number 8) #; integer->char)] - [('char ('escaped-char ('base-16-char n))) - (-> n (string->number 16) #; integer->char)] - [('char ('escaped-char c)) - (char->integer - (case (string-ref c 0) - ((#\a) #\alarm) - ((#\b) #\backspace) - ((#\e) #\esc) - ((#\f) #\page) - ((#\n) #\newline) - ((#\r) #\return) - ((#\t) #\tab) - ((#\v) #\vtab) - ((#\\) #\\) - ((#\') #\')))] - [('char c) (char->integer (string-ref c 0))] - - [('variable var) (string->symbol var)] - [('operator op) (string->symbol op)] - [('prefix-operator op) - (case (string->symbol op) - ((*) 'dereference) - ((&) 'pointer) - ((++) 'pre-increment) - ((--) 'pre-decrement) - (else => identity))] - [('postfix-operator op) - (case (string->symbol op) - [(++) 'post-increment] - [(--) 'post-decrement] - [else => identity])] - - ;; Parenthesis grouping - [('group args) - (parse-lexeme-tree args)] - - ;; Atomic item. Used by flatten-infix - [('atom body) - (parse-lexeme-tree body)] - - [('prefix op arg) - `(,(parse-lexeme-tree op) - ,(parse-lexeme-tree arg))] - - [('postfix arg op) - `(,(parse-lexeme-tree op) - ,(parse-lexeme-tree arg))] - - [('infix args ...) - (resolve-order-of-operations - (flatten-infix (cons 'infix args)))] - - [('funcall function ('group arguments)) - `(funcall ,(parse-lexeme-tree function) - ,(parse-lexeme-tree arguments))] - - [bare (throw 'parse-error - 'parse-lexeme-tree - "Naked literal in lex-tree. How did that get there?" - '() - bare)])) - -;; https://en.wikipedia.org/wiki/Operators_in_C_and_C%2B%2B - -(define order-of-operations - (reverse - (apply append - ;; This is only for binary operations - `((-> ,(symbol #\.)) - (* / %) - (+ -) - (<< >>) - (< <= > >=) - (== !=) - (&) - (^) - (,(symbol #\|)) - (&&) - (,(symbol #\| #\|)) - (= += -= *= /= %= <<= >>= &= ^= ,(symbol #\| #\=)) - (,(symbol #\,)) - )))) - -(define* (resolve-order-of-operations - tree optional: (order order-of-operations)) - - (if (null? order) - (car tree) - (match tree - [('*other* body ...) body] - [(form) (resolve-order-of-operations form order)] - [(forms ...) - (match (split-by forms (car order)) - [(group) (resolve-order-of-operations group (cdr order))] - [groups - (cons (car order) - (map (lambda (form) (resolve-order-of-operations form order-of-operations)) - groups))])] - [a a]))) - -(define (mark-other form) - (if (list? form) (cons '*other* form) form)) - -;; Flatens a tree of infix triples. Stops when it should. -;; (parenthesis, function calls, ...) -(define (flatten-infix form) - (match form - [('infix left op ('infix right ...)) - (cons* (parse-lexeme-tree left) - (parse-lexeme-tree op) - (flatten-infix (cons 'infix right)))] - [('infix left op right) - (list (mark-other (parse-lexeme-tree left)) - (parse-lexeme-tree op) - (mark-other (parse-lexeme-tree right))) - #; (map parse-lexeme-tree (list left op right))] - [other (error "Not an infix tree ~a" other)])) - - -(define (do-funcall function arguments) - (if (list? arguments) - (apply function arguments) - (function arguments))) - -(define-public (replace-symbols tree dict) - (if (not (list? tree)) - (or (assoc-ref dict tree) tree) - (map (lambda (node) (replace-symbols node dict)) - tree))) - - - - -;; input "#define F(x, y) x + y" -;; 1 full define | F(x, y) -;; 2 macro name | F -;; 3 macro args | (x, y) -;; 4 macro body | x + y -(define define-re (make-regexp "^#define ((\\w+)(\\([^)]*\\))?) (.*)")) - -(define (tokenize-define-line header-line) - (aif (regexp-exec define-re header-line) - (cons (match:substring it 1) - (match:substring it 4)) - (error "Line dosen't match" header-line))) - -(define (tokenize-header-file header-file) - (map tokenize-define-line - (call-with-port - (open-input-pipe - (string-append "cpp -dM " header-file)) - read-lines))) - -(define symb-map - `((,(symbol #\|) . logior) - (funcall . do-funcall) - (&& . and) - (& . logand) - (== . =) - (!= . (negate =)) - )) - -(define (atom? x) - ;; NOT symbol - (or (number? x) - (char? x) - (string? x))) - -;; built in symbols. Should never be marked as dependencies -(define (primitive? x) - (memv x (cons 'funcall binary-operators))) - -;; -> (list (dependencies . symbol . value) -(define (parse-cpp-define pair) - (define f (compose parse-lexeme-tree lex)) - (define left (f (car pair))) - (define proc-args - (match (and (pair? left) - (eq? 'funcall (car left)) - (caddr left)) - [#f '()] - [(_ args ...) args] - [arg (list arg)])) - - (define right (f (cdr pair))) - (define alt-right (replace-symbols right symb-map)) - (define dependencies - (lset-difference - eq? - (remove primitive? - (remove atom? - (flatten (if (list? right) - right (list right))))) - proc-args)) - - (cons - dependencies - (match left - [('funcall name ('#{,}# args ...)) - (cons name `(lambda ,args ,alt-right))] - - [('funcall name arg) - (cons name `(lambda (,arg) ,alt-right))] - - [name (cons name alt-right)]))) - - -(define source-form (make-object-property)) - -(define (parse-cpp-file lines) - (for (i line) in (enumerate lines) - (catch #t - (lambda () - (let ((def (parse-cpp-define line))) - (set! (source-form def) - (format #f "#define ~a ~a" (car line) (cdr line))) - def)) - (lambda (err caller fmt args . _) - (format #t "~a ~?~%" i fmt args) #f)))) - -(define (private-c-symbol? string) - (char=? #\_ (string-ref string 0))) - -(define-macro (include# header-file) - (define lines (remove (compose private-c-symbol? car) - (tokenize-header-file header-file))) - - (define forms (parse-cpp-file lines)) - - (define graph* - (fold (lambda (node graph) - (set! (source-form (cdr node)) - (source-form node)) - (add-node graph (cdr node) (car node))) - (make-graph car) - (filter identity forms))) - - ;; Hack for termios since this symbol isn't defined. - ;; (including in the above removed private c symbols) - (define graph (add-node graph* (cons '_POSIX_VDISABLE #f) '())) - - `(begin - ,@(for (key . value) in (resolve-dependency-graph graph) - `(define ,key ,value))))) - -- cgit v1.2.3