aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorHugo Hörnquist <hugo@lysator.liu.se>2020-07-20 03:23:31 +0200
committerHugo Hörnquist <hugo@lysator.liu.se>2020-07-20 04:19:03 +0200
commite06ab51e5ebc461ae6a2330fcefb09c0762c4d01 (patch)
tree5826396c049c6f0f64baeaee5534a19ac555808c
parentAdd missing format dependency for exceptions. (diff)
downloadcalp-e06ab51e5ebc461ae6a2330fcefb09c0762c4d01.tar.gz
calp-e06ab51e5ebc461ae6a2330fcefb09c0762c4d01.tar.xz
Bunch of work on cpp.
-rw-r--r--module/vulgar/parse-cpp.scm446
1 files changed, 266 insertions, 180 deletions
diff --git a/module/vulgar/parse-cpp.scm b/module/vulgar/parse-cpp.scm
index 62d1e41a..2a58d158 100644
--- a/module/vulgar/parse-cpp.scm
+++ b/module/vulgar/parse-cpp.scm
@@ -1,42 +1,49 @@
(define-module (vulgar parse-cpp)
:use-module (util)
+ :use-module (srfi srfi-1)
:use-module (ice-9 popen)
- :use-module (ice-9 rdelim)
:use-module (ice-9 peg)
:use-module (ice-9 match)
+ ;; required by define-define-peg-pattern
+ :use-module ((system base compile) :select (compile))
+ :use-module ((rnrs io ports) :select (call-with-port))
+ :use-module (ice-9 pretty-print) ; used by one error handler
+ :use-module ((util io) :select (read-lines))
+ :use-module (util graph)
)
-(define (read-lines port)
- (with-input-from-port port
- (lambda ()
- (let loop ((line (read-line)))
- (if (eof-object? line)
- '() (cons line (loop (read-line))))))))
-
-(define (parse-header header-file)
- (map (lambda (line)
- (let* ((symbol (string-index line #\space))
- (value (string-index line #\space (1+ symbol))))
- (cons (substring line (1+ symbol) value)
- (substring line (1+ value)))))
- (read-lines (open-input-pipe (string-append "cpp -dM " header-file))))
-
- #;
- (let* (((_ key . values) (string-split line #\space)))
- (if (char=? #\_ (string-ref key 0))
- (loop (read-line))
- (cons (cons key (string-join values " " 'infix))
- (loop (read-line)))))
+
+;;; Simple operators are those which can be combined with '='
+(define simple-operators
+ `(+ - * / & ,(symbol #\|) ^ << >> % < > =))
+;; apparently part of C
+(define wordy-binary-operators
+ '(bitand and_eq and bitor or_eq or xor_eq xor))
- )
+(define symbol-binary-operators
+ (append (map (lambda (x) (symbol-append x '=)) simple-operators)
+ `(&& ,(symbol #\| #\|) != ,(symbol #\,)
+ -> ,(symbol #\.))
+ simple-operators))
+(define binary-operators
+ (append symbol-binary-operators
+ wordy-binary-operators))
;;; Lexer
+;; Like the regular define-peg-pattern. But evaluates the
+;; pattern before treating it as a peg rule.
+(define-macro (define-define-peg-pattern name capture expr)
+ `(define-peg-pattern ,name ,capture
+ ;; NOTE how does this work if we are in a different module?
+ ;; It currently however isn't a problem since we don't export
+ ;; this macro.
+ ,(eval expr (current-module))))
(define-peg-pattern base-8-digit body
(range #\0 #\7))
@@ -49,12 +56,21 @@
(range #\A #\F)
(range #\a #\f)))
+;; https://en.cppreference.com/w/cpp/language/integer_literal
(define-peg-pattern base-10 all (+ base-10-digit))
-(define-peg-pattern base-8 all (and "0" (+ base-8-digit)))
-(define-peg-pattern base-16 all (and "0x" (+ base-16-digit)))
+(define-peg-pattern base-8 all (and (ignore "0") (+ base-8-digit)))
+(define-peg-pattern base-16 all (and (ignore (and "0" (or "x" "X")))
+ (+ base-16-digit)))
+
+;; accept anything now, ensure correctnes later
+(define-peg-pattern integer-suffix all
+ (* (or "u" "U" "l" "L")))
+
+(define-peg-pattern integer all
+ (and (or base-8 base-16 base-10) (? integer-suffix)))
(define-peg-pattern number body
- (or base-8 base-16 base-10))
+ (or integer))
(define-peg-pattern group all
(and (ignore "(") expr (ignore ")")))
@@ -75,34 +91,17 @@
(define-peg-pattern char all
(and (ignore "'") (or escaped-char peg-any) (ignore "'")))
-;; (define-peg-pattern string body
-;; (and "\"" (+ (or "\\\"" peg-any)) "\""))
-
-;;; Simple operators are those which can be combined with '='
-(define-peg-pattern simple-operator body
- (or "+" "-" "*" "/" "&" "|" "^" "<<" ">>" "%"
- "<" ">" "="))
-
-(define-peg-pattern operator all
- (or (and simple-operator "=")
- "&&" "||"
- simple-operator
- "!=" ","
- "and" "bitand" "and_eq"
- "or" "bitor" "or_eq"
- "xor" "xor_eq"
- ;; "->" "." ; special cases since can only be used with variables
- ;; Todo Ternaries
- ))
-
+(define-define-peg-pattern operator all
+ `(or ,@(map symbol->string symbol-binary-operators)
+ ,@(map (lambda (op) `(and ,(symbol->string op) ws))
+ wordy-binary-operators)))
;; whitespace
(define-peg-pattern ws none
(or " " " " "\n"))
;; space (for when whitespace is optional)
-(define-peg-pattern sp none
- (* ws))
+(define-peg-pattern sp none (* ws))
(define-peg-pattern safe-letter body
(or "_"
@@ -119,13 +118,15 @@
(define-peg-pattern atom all
(or base-8 base-10 base-16 number char variable))
-;;; ++ and -- both pre and postfix
-
(define-peg-pattern prefix-operator all
(or "!" "~" "*" "&" "++" "--" "+" "-"))
+;;; Note that stacked pre or postfix operators without parenthesis
+;;; dosen't work. So `*&C' is invalid, while `*(&C)' is valid.
+
(define-peg-pattern prefix all
- (and prefix-operator sp expr))
+ (and prefix-operator sp (or variable group funcall #; postfix
+ )))
(define-peg-pattern postfix-operator all
(or "++" "--"))
@@ -134,24 +135,22 @@
;; literals can't be in-place incremented and decremented
;; Make sure we don't match postfix-operator here, since
;; that also gives us an infinite loop.
- (and (or prefix infix funcall group variable) sp postfix-operator))
+ (and (or prefix funcall group variable) sp postfix-operator))
-;; 5 + 3 * 9
-;; (5 + 3) * 9
-;; 5 + (3 * 9)
(define-peg-pattern infix all
;; first case is "same" as expr, but in different order to prevent
- ;; infinite self reference.
- (and (or funcall group char prefix #; postfix number variable
- ) sp operator sp expr))
+ ;; infinite self reference. Pre and postfix not here, solved by having
+ ;; them before infix in expr
+ (and (or funcall postfix prefix group char number variable)
+ sp operator sp expr))
(define-peg-pattern funcall all
(and variable sp group))
;;; main parser
(define-peg-pattern expr body
- (+ (and sp (or prefix #; postfix infix funcall group char number variable
- ) sp)))
+ (+ (and sp (or infix postfix prefix funcall group char number variable)
+ sp)))
(define (lex string)
@@ -160,6 +159,37 @@
;;; Parser
+(define (perms set)
+ (concatenate
+ (map (lambda (key)
+ (map (lambda (o) (cons key o))
+ (delete key set)))
+ set)))
+
+(define (symbol-concat pair)
+ (cond [(null? (car pair)) (cdr pair)]
+ [(null? (cdr pair)) (car pair)]
+ [else (symbol-append (car pair) (cdr pair))]))
+
+(define (parse-integer-suffix str)
+
+ (define valid-sequences
+ (delete 'dummy
+ (lset-union eq? '(dummy)
+ (map symbol-concat (perms '(() U L)))
+ (map symbol-concat (perms '(() U LL))))))
+
+ ;; => (LLU ULL LL LU UL L U)
+
+ (aif (memv (string->symbol (string-upcase str))
+ valid-sequences)
+ (case (car it)
+ [(LLU ULL) '(unsigned long-long)]
+ [(LU UL) '(unsigned long)]
+ [(LL) '(long-long)]
+ [(L) '(long)]
+ [(U) '(unsigned)])
+ (error "Invalid integer suffix")))
(define (parse-lexeme-tree tree)
(match tree
@@ -170,24 +200,34 @@
[('base-8 n) (string->number n 8)]
[('base-16 n) (string->number n 16)]
- ;; Character literals
+ [('integer n ('integer-suffix suffix))
+ `(as-type
+ ,(parse-integer-suffix suffix)
+ ,(parse-lexeme-tree n))
+ ]
+ [('integer n)
+ (parse-lexeme-tree n)]
+
+ ;; Character literals, stored as raw integers
+ ;; so mathematical operations keep working on them.
[('char ('escaped-char ('base-8-char n)))
- (-> n (string->number 8) integer->char)]
+ (-> n (string->number 8) #; integer->char)]
[('char ('escaped-char ('base-16-char n)))
- (-> n (string->number 16) integer->char)]
+ (-> n (string->number 16) #; integer->char)]
[('char ('escaped-char c))
- (case (string-ref c 0)
- ((#\a) #\alarm)
- ((#\b) #\backspace)
- ((#\e) #\esc)
- ((#\f) #\page)
- ((#\n) #\newline)
- ((#\r) #\return)
- ((#\t) #\tab)
- ((#\v) #\vtab)
- ((#\\) #\\)
- ((#\') #\'))]
- [('char c) (string-ref c 0)]
+ (char->integer
+ (case (string-ref c 0)
+ ((#\a) #\alarm)
+ ((#\b) #\backspace)
+ ((#\e) #\esc)
+ ((#\f) #\page)
+ ((#\n) #\newline)
+ ((#\r) #\return)
+ ((#\t) #\tab)
+ ((#\v) #\vtab)
+ ((#\\) #\\)
+ ((#\') #\')))]
+ [('char c) (char->integer (string-ref c 0))]
[('variable var) (string->symbol var)]
[('operator op) (string->symbol op)]
@@ -228,27 +268,11 @@
`(funcall ,(parse-lexeme-tree function)
,(parse-lexeme-tree arguments))]
- [bare (error "Naked literal in lex-tree. How did that get there?"
- bare)]))
-
-;;; TODO
-;; (f "*C++")
-;; $427 = (dereference (post-increment C))
-
-(define (group-by list item)
- (let loop ((done '())
- (current '())
- (rem list))
- (cond [(null? rem)
- (reverse (cons (reverse current) done))]
- [(eqv? item (car rem))
- (loop (cons (reverse current) done)
+ [bare (throw 'parse-error
+ 'parse-lexeme-tree
+ "Naked literal in lex-tree. How did that get there?"
'()
- (cdr rem))]
- [else
- (loop done
- (cons (car rem) current)
- (cdr rem))])))
+ bare)]))
;; https://en.wikipedia.org/wiki/Operators_in_C_and_C%2B%2B
@@ -256,47 +280,43 @@
(reverse
(apply append
;; This is only for binary operations
- `((* / %)
- (+ -)
- (<< >>)
- (< <= > >=)
- (== !=)
- (&)
- (^)
- (,(symbol #\|))
- (&&)
- (,(symbol #\| #\|))
- (= += -= *= /= %= <<= >>= &= ^= ,(symbol #\| #\=))
- (,(symbol #\,))
- ))))
-
-
-;; (f "2 + (2*2)")
-;; <unnamed port>:5967:23: In procedure resolve-order-of-operations:
-;; In procedure car: Wrong type argument in position 1 (expecting pair): ()
-
-;; Entering a new prompt. Type `,bt' for a backtrace or `,q' to continue.
+ `((-> ,(symbol #\.))
+ (* / %)
+ (+ -)
+ (<< >>)
+ (< <= > >=)
+ (== !=)
+ (&)
+ (^)
+ (,(symbol #\|))
+ (&&)
+ (,(symbol #\| #\|))
+ (= += -= *= /= %= <<= >>= &= ^= ,(symbol #\| #\=))
+ (,(symbol #\,))
+ ))))
(define* (resolve-order-of-operations
tree optional: (order order-of-operations))
- (cond [(null? order) (car tree)]
- [(not (list? tree)) tree]
- [(= 1 (length tree)) (resolve-order-of-operations
- (car tree) order)]
- [else
- (let ((groups (group-by tree (car order))))
- (cond [(= 1 (length groups))
- (resolve-order-of-operations
- (car groups) (cdr order))]
- [else
- (cons (car order)
- (append
- (map (lambda (g) (resolve-order-of-operations
- g (cdr order)))
- groups)))]))]))
+ (if (null? order)
+ (car tree)
+ (match tree
+ [('*other* body ...) body]
+ [(form) (resolve-order-of-operations form order)]
+ [(forms ...)
+ (match (split-by forms (car order))
+ [(group) (resolve-order-of-operations group (cdr order))]
+ [groups
+ (cons (car order)
+ (map (lambda (form) (resolve-order-of-operations form order-of-operations))
+ groups))])]
+ [a a])))
+
+(define (mark-other form)
+ (if (list? form) (cons '*other* form) form))
;; Flatens a tree of infix triples. Stops when it should.
+;; (parenthesis, function calls, ...)
(define (flatten-infix form)
(match form
[('infix left op ('infix right ...))
@@ -304,12 +324,11 @@
(parse-lexeme-tree op)
(flatten-infix (cons 'infix right)))]
[('infix left op right)
- (map parse-lexeme-tree (list left op right))]
- [other (parse-lexeme-tree other)]))
-
-
-;; scheme@(vulgar parse-cpp)> (match-pattern expr "a xorb")
-;; $10 = #<peg start: 0 end: 6 string: a xorb tree: (infix (variable a) (operator xor) (variable b))>
+ (list (mark-other (parse-lexeme-tree left))
+ (parse-lexeme-tree op)
+ (mark-other (parse-lexeme-tree right)))
+ #; (map parse-lexeme-tree (list left op right))]
+ [other (error "Not an infix tree ~a" other)]))
(define (do-funcall function arguments)
@@ -323,51 +342,118 @@
(map (lambda (node) (replace-symbols node dict))
tree)))
-(define f (compose parse-lexeme-tree lex))
+
-;;; Right, when left simple binding
-;; direct constant (int|char)
-;; (op forms ...)
-;; (do-funcall forms ...)
-;; direct variable
+;; input "#define F(x, y) x + y"
+;; 1 full define | F(x, y)
+;; 2 macro name | F
+;; 3 macro args | (x, y)
+;; 4 macro body | x + y
+(define define-re (make-regexp "^#define ((\\w+)(\\([^)]*\\))?) (.*)"))
+
+(define (tokenize-define-line header-line)
+ (aif (regexp-exec define-re header-line)
+ (cons (match:substring it 1)
+ (match:substring it 4))
+ (error "Line dosen't match" header-line)))
+
+(define (tokenize-header-file header-file)
+ (map tokenize-define-line
+ (call-with-port
+ (open-input-pipe
+ (string-append "cpp -dM " header-file))
+ read-lines)))
+
+(define symb-map
+ `((,(symbol #\|) . logior)
+ (funcall . do-funcall)
+ (&& . and)
+ (& . logand)
+ (== . =)
+ (!= . (negate =))
+ ))
+
+(define (atom? x)
+ ;; NOT symbol
+ (or (number? x)
+ (char? x)
+ (string? x)))
+
+;; built in symbols. Should never be marked as dependencies
+(define (primitive? x)
+ (memv x (cons 'funcall binary-operators)))
+
+;; -> (list (dependencies . symbol . value)
(define (parse-cpp-define pair)
+ (define f (compose parse-lexeme-tree lex))
(define left (f (car pair)))
- (define right (replace-symbols
- (f (cdr pair))
- `((,(symbol #\|) . logior)
- (funcall . do-funcall)
- (&& . and)
- (& . logand)
- (== . =)
- (!= . (negate =))
- )))
-
- (match left
- [('funcall name ('#{,}# args ...))
- `(define (,name ,@args)
- ,right)]
-
- [('funcall name arg)
- `(define (,name ,arg)
- ,right)]
-
- [name `(define ,name ,right)]))
-
-;;; TODO order of these, to resolve dependencies
-(define (parse-cpp-file file)
- ;; (map parse-cpp-define (parse-header file))
- (map (lambda (i line) (catch #t (lambda () (parse-cpp-define line))
- (lambda (err caller fmt args . _) (format #t "~a ~?~%" i fmt args) #f)))
- (iota (length (parse-header file)) 1)
- (parse-header file)))
-
-;; (parse-cpp-file "/usr/include/termios.h")
-
-(begin
- (define file (open-output-file "/tmp/termios.scm"))
- (define lines (parse-cpp-file "/usr/include/termios.h"))
-
- (for-each (lambda (line) (format file "~y" line))
- lines)
- (close-port file))
+ (define proc-args
+ (match (and (pair? left)
+ (eq? 'funcall (car left))
+ (caddr left))
+ [#f '()]
+ [(_ args ...) args]
+ [arg (list arg)]))
+
+ (define right (f (cdr pair)))
+ (define alt-right (replace-symbols right symb-map))
+ (define dependencies
+ (lset-difference
+ eq?
+ (remove primitive?
+ (remove atom?
+ (flatten (if (list? right)
+ right (list right)))))
+ proc-args))
+
+ (cons
+ dependencies
+ (match left
+ [('funcall name ('#{,}# args ...))
+ (cons name `(lambda ,args ,alt-right))]
+
+ [('funcall name arg)
+ (cons name `(lambda (,arg) ,alt-right))]
+
+ [name (cons name alt-right)])))
+
+
+(define source-form (make-object-property))
+
+(define (parse-cpp-file lines)
+ (for (i line) in (enumerate lines)
+ (catch #t
+ (lambda ()
+ (let ((def (parse-cpp-define line)))
+ (set! (source-form def)
+ (format #f "#define ~a ~a" (car line) (cdr line)))
+ def))
+ (lambda (err caller fmt args . _)
+ (format #t "~a ~?~%" i fmt args) #f))))
+
+(define (private-c-symbol? string)
+ (char=? #\_ (string-ref string 0)))
+
+(define-macro (include# header-file)
+ (define lines (remove (compose private-c-symbol? car)
+ (tokenize-header-file header-file)))
+
+ (define forms (parse-cpp-file lines))
+
+ (define graph*
+ (fold (lambda (node graph)
+ (set! (source-form (cdr node))
+ (source-form node))
+ (add-node graph (cdr node) (car node)))
+ (make-graph car)
+ (filter identity forms)))
+
+ ;; Hack for termios since this symbol isn't defined.
+ ;; (including in the above removed private c symbols)
+ (define graph (add-node graph* (cons '_POSIX_VDISABLE #f) '()))
+
+ `(begin
+ ,@(for (key . value) in (resolve-dependency-graph graph)
+ `(define ,key ,value)))))
+