From e06ab51e5ebc461ae6a2330fcefb09c0762c4d01 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Hugo=20H=C3=B6rnquist?= Date: Mon, 20 Jul 2020 03:23:31 +0200 Subject: Bunch of work on cpp. --- module/vulgar/parse-cpp.scm | 446 ++++++++++++++++++++++++++------------------ 1 file changed, 266 insertions(+), 180 deletions(-) diff --git a/module/vulgar/parse-cpp.scm b/module/vulgar/parse-cpp.scm index 62d1e41a..2a58d158 100644 --- a/module/vulgar/parse-cpp.scm +++ b/module/vulgar/parse-cpp.scm @@ -1,42 +1,49 @@ (define-module (vulgar parse-cpp) :use-module (util) + :use-module (srfi srfi-1) :use-module (ice-9 popen) - :use-module (ice-9 rdelim) :use-module (ice-9 peg) :use-module (ice-9 match) + ;; required by define-define-peg-pattern + :use-module ((system base compile) :select (compile)) + :use-module ((rnrs io ports) :select (call-with-port)) + :use-module (ice-9 pretty-print) ; used by one error handler + :use-module ((util io) :select (read-lines)) + :use-module (util graph) ) -(define (read-lines port) - (with-input-from-port port - (lambda () - (let loop ((line (read-line))) - (if (eof-object? line) - '() (cons line (loop (read-line)))))))) - -(define (parse-header header-file) - (map (lambda (line) - (let* ((symbol (string-index line #\space)) - (value (string-index line #\space (1+ symbol)))) - (cons (substring line (1+ symbol) value) - (substring line (1+ value))))) - (read-lines (open-input-pipe (string-append "cpp -dM " header-file)))) - - #; - (let* (((_ key . values) (string-split line #\space))) - (if (char=? #\_ (string-ref key 0)) - (loop (read-line)) - (cons (cons key (string-join values " " 'infix)) - (loop (read-line))))) + +;;; Simple operators are those which can be combined with '=' +(define simple-operators + `(+ - * / & ,(symbol #\|) ^ << >> % < > =)) +;; apparently part of C +(define wordy-binary-operators + '(bitand and_eq and bitor or_eq or xor_eq xor)) - ) +(define symbol-binary-operators + (append (map (lambda (x) (symbol-append x '=)) simple-operators) + `(&& ,(symbol #\| #\|) != ,(symbol #\,) + -> ,(symbol #\.)) + simple-operators)) +(define binary-operators + (append symbol-binary-operators + wordy-binary-operators)) ;;; Lexer +;; Like the regular define-peg-pattern. But evaluates the +;; pattern before treating it as a peg rule. +(define-macro (define-define-peg-pattern name capture expr) + `(define-peg-pattern ,name ,capture + ;; NOTE how does this work if we are in a different module? + ;; It currently however isn't a problem since we don't export + ;; this macro. + ,(eval expr (current-module)))) (define-peg-pattern base-8-digit body (range #\0 #\7)) @@ -49,12 +56,21 @@ (range #\A #\F) (range #\a #\f))) +;; https://en.cppreference.com/w/cpp/language/integer_literal (define-peg-pattern base-10 all (+ base-10-digit)) -(define-peg-pattern base-8 all (and "0" (+ base-8-digit))) -(define-peg-pattern base-16 all (and "0x" (+ base-16-digit))) +(define-peg-pattern base-8 all (and (ignore "0") (+ base-8-digit))) +(define-peg-pattern base-16 all (and (ignore (and "0" (or "x" "X"))) + (+ base-16-digit))) + +;; accept anything now, ensure correctnes later +(define-peg-pattern integer-suffix all + (* (or "u" "U" "l" "L"))) + +(define-peg-pattern integer all + (and (or base-8 base-16 base-10) (? integer-suffix))) (define-peg-pattern number body - (or base-8 base-16 base-10)) + (or integer)) (define-peg-pattern group all (and (ignore "(") expr (ignore ")"))) @@ -75,34 +91,17 @@ (define-peg-pattern char all (and (ignore "'") (or escaped-char peg-any) (ignore "'"))) -;; (define-peg-pattern string body -;; (and "\"" (+ (or "\\\"" peg-any)) "\"")) - -;;; Simple operators are those which can be combined with '=' -(define-peg-pattern simple-operator body - (or "+" "-" "*" "/" "&" "|" "^" "<<" ">>" "%" - "<" ">" "=")) - -(define-peg-pattern operator all - (or (and simple-operator "=") - "&&" "||" - simple-operator - "!=" "," - "and" "bitand" "and_eq" - "or" "bitor" "or_eq" - "xor" "xor_eq" - ;; "->" "." ; special cases since can only be used with variables - ;; Todo Ternaries - )) - +(define-define-peg-pattern operator all + `(or ,@(map symbol->string symbol-binary-operators) + ,@(map (lambda (op) `(and ,(symbol->string op) ws)) + wordy-binary-operators))) ;; whitespace (define-peg-pattern ws none (or " " " " "\n")) ;; space (for when whitespace is optional) -(define-peg-pattern sp none - (* ws)) +(define-peg-pattern sp none (* ws)) (define-peg-pattern safe-letter body (or "_" @@ -119,13 +118,15 @@ (define-peg-pattern atom all (or base-8 base-10 base-16 number char variable)) -;;; ++ and -- both pre and postfix - (define-peg-pattern prefix-operator all (or "!" "~" "*" "&" "++" "--" "+" "-")) +;;; Note that stacked pre or postfix operators without parenthesis +;;; dosen't work. So `*&C' is invalid, while `*(&C)' is valid. + (define-peg-pattern prefix all - (and prefix-operator sp expr)) + (and prefix-operator sp (or variable group funcall #; postfix + ))) (define-peg-pattern postfix-operator all (or "++" "--")) @@ -134,24 +135,22 @@ ;; literals can't be in-place incremented and decremented ;; Make sure we don't match postfix-operator here, since ;; that also gives us an infinite loop. - (and (or prefix infix funcall group variable) sp postfix-operator)) + (and (or prefix funcall group variable) sp postfix-operator)) -;; 5 + 3 * 9 -;; (5 + 3) * 9 -;; 5 + (3 * 9) (define-peg-pattern infix all ;; first case is "same" as expr, but in different order to prevent - ;; infinite self reference. - (and (or funcall group char prefix #; postfix number variable - ) sp operator sp expr)) + ;; infinite self reference. Pre and postfix not here, solved by having + ;; them before infix in expr + (and (or funcall postfix prefix group char number variable) + sp operator sp expr)) (define-peg-pattern funcall all (and variable sp group)) ;;; main parser (define-peg-pattern expr body - (+ (and sp (or prefix #; postfix infix funcall group char number variable - ) sp))) + (+ (and sp (or infix postfix prefix funcall group char number variable) + sp))) (define (lex string) @@ -160,6 +159,37 @@ ;;; Parser +(define (perms set) + (concatenate + (map (lambda (key) + (map (lambda (o) (cons key o)) + (delete key set))) + set))) + +(define (symbol-concat pair) + (cond [(null? (car pair)) (cdr pair)] + [(null? (cdr pair)) (car pair)] + [else (symbol-append (car pair) (cdr pair))])) + +(define (parse-integer-suffix str) + + (define valid-sequences + (delete 'dummy + (lset-union eq? '(dummy) + (map symbol-concat (perms '(() U L))) + (map symbol-concat (perms '(() U LL)))))) + + ;; => (LLU ULL LL LU UL L U) + + (aif (memv (string->symbol (string-upcase str)) + valid-sequences) + (case (car it) + [(LLU ULL) '(unsigned long-long)] + [(LU UL) '(unsigned long)] + [(LL) '(long-long)] + [(L) '(long)] + [(U) '(unsigned)]) + (error "Invalid integer suffix"))) (define (parse-lexeme-tree tree) (match tree @@ -170,24 +200,34 @@ [('base-8 n) (string->number n 8)] [('base-16 n) (string->number n 16)] - ;; Character literals + [('integer n ('integer-suffix suffix)) + `(as-type + ,(parse-integer-suffix suffix) + ,(parse-lexeme-tree n)) + ] + [('integer n) + (parse-lexeme-tree n)] + + ;; Character literals, stored as raw integers + ;; so mathematical operations keep working on them. [('char ('escaped-char ('base-8-char n))) - (-> n (string->number 8) integer->char)] + (-> n (string->number 8) #; integer->char)] [('char ('escaped-char ('base-16-char n))) - (-> n (string->number 16) integer->char)] + (-> n (string->number 16) #; integer->char)] [('char ('escaped-char c)) - (case (string-ref c 0) - ((#\a) #\alarm) - ((#\b) #\backspace) - ((#\e) #\esc) - ((#\f) #\page) - ((#\n) #\newline) - ((#\r) #\return) - ((#\t) #\tab) - ((#\v) #\vtab) - ((#\\) #\\) - ((#\') #\'))] - [('char c) (string-ref c 0)] + (char->integer + (case (string-ref c 0) + ((#\a) #\alarm) + ((#\b) #\backspace) + ((#\e) #\esc) + ((#\f) #\page) + ((#\n) #\newline) + ((#\r) #\return) + ((#\t) #\tab) + ((#\v) #\vtab) + ((#\\) #\\) + ((#\') #\')))] + [('char c) (char->integer (string-ref c 0))] [('variable var) (string->symbol var)] [('operator op) (string->symbol op)] @@ -228,27 +268,11 @@ `(funcall ,(parse-lexeme-tree function) ,(parse-lexeme-tree arguments))] - [bare (error "Naked literal in lex-tree. How did that get there?" - bare)])) - -;;; TODO -;; (f "*C++") -;; $427 = (dereference (post-increment C)) - -(define (group-by list item) - (let loop ((done '()) - (current '()) - (rem list)) - (cond [(null? rem) - (reverse (cons (reverse current) done))] - [(eqv? item (car rem)) - (loop (cons (reverse current) done) + [bare (throw 'parse-error + 'parse-lexeme-tree + "Naked literal in lex-tree. How did that get there?" '() - (cdr rem))] - [else - (loop done - (cons (car rem) current) - (cdr rem))]))) + bare)])) ;; https://en.wikipedia.org/wiki/Operators_in_C_and_C%2B%2B @@ -256,47 +280,43 @@ (reverse (apply append ;; This is only for binary operations - `((* / %) - (+ -) - (<< >>) - (< <= > >=) - (== !=) - (&) - (^) - (,(symbol #\|)) - (&&) - (,(symbol #\| #\|)) - (= += -= *= /= %= <<= >>= &= ^= ,(symbol #\| #\=)) - (,(symbol #\,)) - )))) - - -;; (f "2 + (2*2)") -;; :5967:23: In procedure resolve-order-of-operations: -;; In procedure car: Wrong type argument in position 1 (expecting pair): () - -;; Entering a new prompt. Type `,bt' for a backtrace or `,q' to continue. + `((-> ,(symbol #\.)) + (* / %) + (+ -) + (<< >>) + (< <= > >=) + (== !=) + (&) + (^) + (,(symbol #\|)) + (&&) + (,(symbol #\| #\|)) + (= += -= *= /= %= <<= >>= &= ^= ,(symbol #\| #\=)) + (,(symbol #\,)) + )))) (define* (resolve-order-of-operations tree optional: (order order-of-operations)) - (cond [(null? order) (car tree)] - [(not (list? tree)) tree] - [(= 1 (length tree)) (resolve-order-of-operations - (car tree) order)] - [else - (let ((groups (group-by tree (car order)))) - (cond [(= 1 (length groups)) - (resolve-order-of-operations - (car groups) (cdr order))] - [else - (cons (car order) - (append - (map (lambda (g) (resolve-order-of-operations - g (cdr order))) - groups)))]))])) + (if (null? order) + (car tree) + (match tree + [('*other* body ...) body] + [(form) (resolve-order-of-operations form order)] + [(forms ...) + (match (split-by forms (car order)) + [(group) (resolve-order-of-operations group (cdr order))] + [groups + (cons (car order) + (map (lambda (form) (resolve-order-of-operations form order-of-operations)) + groups))])] + [a a]))) + +(define (mark-other form) + (if (list? form) (cons '*other* form) form)) ;; Flatens a tree of infix triples. Stops when it should. +;; (parenthesis, function calls, ...) (define (flatten-infix form) (match form [('infix left op ('infix right ...)) @@ -304,12 +324,11 @@ (parse-lexeme-tree op) (flatten-infix (cons 'infix right)))] [('infix left op right) - (map parse-lexeme-tree (list left op right))] - [other (parse-lexeme-tree other)])) - - -;; scheme@(vulgar parse-cpp)> (match-pattern expr "a xorb") -;; $10 = # + (list (mark-other (parse-lexeme-tree left)) + (parse-lexeme-tree op) + (mark-other (parse-lexeme-tree right))) + #; (map parse-lexeme-tree (list left op right))] + [other (error "Not an infix tree ~a" other)])) (define (do-funcall function arguments) @@ -323,51 +342,118 @@ (map (lambda (node) (replace-symbols node dict)) tree))) -(define f (compose parse-lexeme-tree lex)) + -;;; Right, when left simple binding -;; direct constant (int|char) -;; (op forms ...) -;; (do-funcall forms ...) -;; direct variable +;; input "#define F(x, y) x + y" +;; 1 full define | F(x, y) +;; 2 macro name | F +;; 3 macro args | (x, y) +;; 4 macro body | x + y +(define define-re (make-regexp "^#define ((\\w+)(\\([^)]*\\))?) (.*)")) + +(define (tokenize-define-line header-line) + (aif (regexp-exec define-re header-line) + (cons (match:substring it 1) + (match:substring it 4)) + (error "Line dosen't match" header-line))) + +(define (tokenize-header-file header-file) + (map tokenize-define-line + (call-with-port + (open-input-pipe + (string-append "cpp -dM " header-file)) + read-lines))) + +(define symb-map + `((,(symbol #\|) . logior) + (funcall . do-funcall) + (&& . and) + (& . logand) + (== . =) + (!= . (negate =)) + )) + +(define (atom? x) + ;; NOT symbol + (or (number? x) + (char? x) + (string? x))) + +;; built in symbols. Should never be marked as dependencies +(define (primitive? x) + (memv x (cons 'funcall binary-operators))) + +;; -> (list (dependencies . symbol . value) (define (parse-cpp-define pair) + (define f (compose parse-lexeme-tree lex)) (define left (f (car pair))) - (define right (replace-symbols - (f (cdr pair)) - `((,(symbol #\|) . logior) - (funcall . do-funcall) - (&& . and) - (& . logand) - (== . =) - (!= . (negate =)) - ))) - - (match left - [('funcall name ('#{,}# args ...)) - `(define (,name ,@args) - ,right)] - - [('funcall name arg) - `(define (,name ,arg) - ,right)] - - [name `(define ,name ,right)])) - -;;; TODO order of these, to resolve dependencies -(define (parse-cpp-file file) - ;; (map parse-cpp-define (parse-header file)) - (map (lambda (i line) (catch #t (lambda () (parse-cpp-define line)) - (lambda (err caller fmt args . _) (format #t "~a ~?~%" i fmt args) #f))) - (iota (length (parse-header file)) 1) - (parse-header file))) - -;; (parse-cpp-file "/usr/include/termios.h") - -(begin - (define file (open-output-file "/tmp/termios.scm")) - (define lines (parse-cpp-file "/usr/include/termios.h")) - - (for-each (lambda (line) (format file "~y" line)) - lines) - (close-port file)) + (define proc-args + (match (and (pair? left) + (eq? 'funcall (car left)) + (caddr left)) + [#f '()] + [(_ args ...) args] + [arg (list arg)])) + + (define right (f (cdr pair))) + (define alt-right (replace-symbols right symb-map)) + (define dependencies + (lset-difference + eq? + (remove primitive? + (remove atom? + (flatten (if (list? right) + right (list right))))) + proc-args)) + + (cons + dependencies + (match left + [('funcall name ('#{,}# args ...)) + (cons name `(lambda ,args ,alt-right))] + + [('funcall name arg) + (cons name `(lambda (,arg) ,alt-right))] + + [name (cons name alt-right)]))) + + +(define source-form (make-object-property)) + +(define (parse-cpp-file lines) + (for (i line) in (enumerate lines) + (catch #t + (lambda () + (let ((def (parse-cpp-define line))) + (set! (source-form def) + (format #f "#define ~a ~a" (car line) (cdr line))) + def)) + (lambda (err caller fmt args . _) + (format #t "~a ~?~%" i fmt args) #f)))) + +(define (private-c-symbol? string) + (char=? #\_ (string-ref string 0))) + +(define-macro (include# header-file) + (define lines (remove (compose private-c-symbol? car) + (tokenize-header-file header-file))) + + (define forms (parse-cpp-file lines)) + + (define graph* + (fold (lambda (node graph) + (set! (source-form (cdr node)) + (source-form node)) + (add-node graph (cdr node) (car node))) + (make-graph car) + (filter identity forms))) + + ;; Hack for termios since this symbol isn't defined. + ;; (including in the above removed private c symbols) + (define graph (add-node graph* (cons '_POSIX_VDISABLE #f) '())) + + `(begin + ,@(for (key . value) in (resolve-dependency-graph graph) + `(define ,key ,value))))) + -- cgit v1.2.3