aboutsummaryrefslogtreecommitdiff
path: root/module/c
diff options
context:
space:
mode:
authorHugo Hörnquist <hugo@lysator.liu.se>2022-06-28 23:46:08 +0200
committerHugo Hörnquist <hugo@lysator.liu.se>2022-07-07 21:17:22 +0200
commit690034ca8abcd931e2fb6bb8129450deee701179 (patch)
treeb9b1fc396dcab97b6c08de71e89e8d985fdb93bd /module/c
parentJS user addition for parsing Microsoft Teams links. (diff)
downloadcalp-690034ca8abcd931e2fb6bb8129450deee701179.tar.gz
calp-690034ca8abcd931e2fb6bb8129450deee701179.tar.xz
Major work on c parser.
Diffstat (limited to 'module/c')
-rw-r--r--module/c/cpp.scm60
-rw-r--r--module/c/lex.scm25
-rw-r--r--module/c/operators.scm1
-rw-r--r--module/c/parse.scm138
4 files changed, 168 insertions, 56 deletions
diff --git a/module/c/cpp.scm b/module/c/cpp.scm
index a2935352..9a8245ad 100644
--- a/module/c/cpp.scm
+++ b/module/c/cpp.scm
@@ -11,7 +11,7 @@
:use-module (c lex)
:use-module (c parse)
:use-module (c operators)
- :export (do-funcall replace-symbols include#)
+ :export (replace-symbols include#)
)
@@ -20,7 +20,7 @@
;; 2 macro name | F
;; 3 macro args | (x, y)
;; 4 macro body | x + y
-(define define-re (make-regexp "^#define ((\\w+)(\\([^)]*\\))?) (.*)"))
+(define define-re (make-regexp "^#define ((\\w+)([(][^)]*[)])?) (.*)"))
(define (tokenize-define-line header-line)
(aif (regexp-exec define-re header-line)
@@ -32,20 +32,6 @@
(list header-line) #f)))
-(define (do-funcall function arguments)
- (if (list? arguments)
- (apply function arguments)
- (function arguments)))
-
-(define symb-map
- `((,(symbol #\|) . logior)
- (funcall . (@ (c cpp) do-funcall))
- (&& . and)
- (& . logand)
- (== . =)
- (!= . (negate =))
- ))
-
(define (replace-symbols tree dict)
(if (not (list? tree))
(or (assoc-ref dict tree) tree)
@@ -77,7 +63,6 @@
[arg (list arg)]))
(define right (f (cdr pair)))
- (define alt-right (replace-symbols right symb-map))
(define dependencies
(lset-difference
eq?
@@ -91,12 +76,12 @@
dependencies
(match left
[('funcall name ('#{,}# args ...))
- (cons name `(lambda ,args ,alt-right))]
+ (cons name `(lambda ,args ,right))]
[('funcall name arg)
- (cons name `(lambda (,arg) ,alt-right))]
+ (cons name `(lambda (,arg) ,right))]
- [name (cons name alt-right)])))
+ [name (cons name right)])))
(define (parse-cpp-file lines)
@@ -104,7 +89,9 @@
(catch #t
(lambda () (parse-cpp-define line))
(lambda (err caller fmt args data)
- (format #t "~a ~?~%" fmt args)
+ (format #t "~a in ~a: ~?~%"
+ err caller fmt args)
+ (format #t "~s~%" line)
#f)))
lines))
@@ -114,29 +101,32 @@
(define (tokenize-header-file header-file)
(map tokenize-define-line
(call-with-port
- (open-input-pipe
- (string-append "cpp -dM " header-file))
+ (open-pipe* OPEN_READ "cpp" "-dM" header-file)
read-lines)))
-(define-macro (include# header-file . args)
-
- (define define-form (if (null? args) 'define (car args)))
-
- (define lines (remove (compose private-c-symbol? car)
- (tokenize-header-file header-file)))
+(define (load-cpp-file header-file)
+ (define lines (tokenize-header-file header-file))
(define forms (parse-cpp-file lines))
- (define graph*
- (fold (lambda (node graph)
- (add-node graph (cdr node) (car node)))
- (make-graph car)
- (filter identity forms)))
+ (fold (lambda (node graph)
+ (add-node graph (cdr node) (car node)))
+ (make-graph car)
+ (filter identity forms)))
+(define (include% header-file)
+ (define graph* (load-cpp-file header-file))
;; Hack for termios since this symbol isn't defined.
;; (including in the above removed private c symbols)
(define graph (add-node graph* (cons '_POSIX_VDISABLE #f) '()))
+ ;; TODO expand bodies
+ ;; (remove (compose private-c-symbol? car))
+ (resolve-dependency-graph graph))
+
+(define-macro (include# header-file . args)
+
+ (define define-form (if (null? args) 'define (car args)))
`(begin
,@(map (lambda (pair) `(,define-form ,(car pair) ,(cdr pair)))
- (resolve-dependency-graph graph))))
+ (include% header-file))))
diff --git a/module/c/lex.scm b/module/c/lex.scm
index 34e52d88..5f395322 100644
--- a/module/c/lex.scm
+++ b/module/c/lex.scm
@@ -43,8 +43,22 @@
(define-peg-pattern integer all
(and (or base-8 base-16 base-10) (? integer-suffix)))
+;; (define-peg-pattern float-suffix all
+;; (* (or "f" "F" "l" "L")))
+
+;; (define-peg-pattern exponent all
+;; (and (ignore (or "e" "E")) (? (or "+" "-")) base-10))
+
+;; (define-peg-pattern float all
+;; (or
+;; (and base-10 exponent (? float-suffix))
+;; (and base-10 (ignore ".") (? exponent) (? float-suffix))
+;; (and (? base-10) (ignore ".") base-10 (? exponent) (? float-suffix))))
+
(define-peg-pattern number body
- (or integer))
+ (or ; float
+ integer
+ ))
(define-peg-pattern group all
(and (ignore "(") expr (ignore ")")))
@@ -65,11 +79,16 @@
(define-peg-pattern char all
(and (ignore "'") (or escaped-char peg-any) (ignore "'")))
+;; (define-peg-pattern quot none (string "\""))
+
+;; (define-peg-pattern string all
+;; (and quot (* (or escaped-char (or peg-any))) quot))
(define-peg-pattern* operator all
`(or ,@(map symbol->string symbol-binary-operators)
,@(map (lambda (op) `(and ,(symbol->string op) ws))
- wordy-binary-operators)))
+ wordy-binary-operators)
+ "?" ":"))
;; whitespace
(define-peg-pattern ws none
@@ -99,7 +118,7 @@
)))
(define-peg-pattern postfix-operator all
- (or "++" "--"))
+ (or "++" "--" "*"))
(define-peg-pattern postfix all
;; literals can't be in-place incremented and decremented
diff --git a/module/c/operators.scm b/module/c/operators.scm
index ab1b3e7c..131c702c 100644
--- a/module/c/operators.scm
+++ b/module/c/operators.scm
@@ -9,6 +9,7 @@
`(+ - * / & ,(symbol #\|) ^ << >> % < > =))
;; apparently part of C
+;; https://en.cppreference.com/w/cpp/language/operator_alternative
(define wordy-binary-operators
'(bitand and_eq and bitor or_eq or xor_eq xor))
diff --git a/module/c/parse.scm b/module/c/parse.scm
index 8030da77..d407f5ac 100644
--- a/module/c/parse.scm
+++ b/module/c/parse.scm
@@ -38,6 +38,19 @@
"Invalid integer suffix ~s"
(list str) #f)))
+(define (group-body->type vars)
+ (concatenate
+ (map
+ (match-lambda (('variable var) (list (parse-lexeme-tree `(variable ,var))))
+ (('postfix ('variable var)
+ ('postfix-operator "*"))
+ (list (parse-lexeme-tree `(variable ,var))
+ '*))
+ (else (scm-error 'c-parse-error "parse-lexeme-tree"
+ "Invalid token ~s in typecast form: ~s"
+ (list else vars) #f)))
+ vars)))
+
(define (parse-lexeme-tree tree)
(match tree
['() '()]
@@ -74,25 +87,43 @@
((#\v) #\vtab)
((#\\) #\\)
((#\') #\')))]
+
[('char c) (char->integer (string-ref c 0))]
[('variable var) (string->symbol var)]
+
+ [('operator "&&") 'and]
+ [('operator "&=") 'and_eq]
+ [('operator "&") 'bitand]
+ [('operator "|") 'bitor]
+ [('operator "!=") 'not_eq]
+ [('operator "||") 'or]
+ [('operator "|=") 'or_eq]
+ [('operator "^") 'xor]
+ [('operator "^=") 'xor_eq]
[('operator op) (string->symbol op)]
+
[('prefix-operator op)
(case (string->symbol op)
+ ((!) 'not)
+ ((~) 'compl)
((*) 'dereference)
((&) 'pointer)
((++) 'pre-increment)
((--) 'pre-decrement)
- (else => identity))]
+ (else (scm-error 'c-parse-error "parse-lexeme-tree"
+ "Unknown prefix operator ~s"
+ (list op) #f)))]
[('postfix-operator op)
(case (string->symbol op)
[(++) 'post-increment]
[(--) 'post-decrement]
- [else => identity])]
+ [else (scm-error 'c-parse-error "parse-lexeme-tree"
+ "Unknown postfix operator ~s"
+ (list op) #f)])]
;; Parenthesis grouping
- [('group args)
+ [('group args ...)
(parse-lexeme-tree args)]
;; Atomic item. Used by flatten-infix
@@ -108,37 +139,104 @@
,(parse-lexeme-tree arg))]
[('infix args ...)
- (resolve-order-of-operations
- (flatten-infix (cons 'infix args)))]
+ (let ((r
+ (resolve-order-of-operations
+ (flatten-infix (cons 'infix args)))))
+ (match r
+ (`(: (? ,op ,true)
+ ,false)
+ `(ternary ,op ,true ,false))
+ (_ r)))]
[('funcall function ('group arguments))
`(funcall ,(parse-lexeme-tree function)
,(parse-lexeme-tree arguments))]
- [bare (scm-error 'c-parse-error
- "parse-lexeme-tree"
- "Naked literal in lex-tree: ~s"
- (list bare)
- #f)]))
+ [(('variable "struct") ('variable value) ..1)
+ `(struct-type ,@(map string->symbol value))
+ ]
+
+ ;; A list of variables. Most likely a type signature
+ ;; [(('variable value) ..1)
+ ;; ]
+
+ ;; A typecast with only variables must (?) be a typecast?
+ [(('group groups ..1) ... value)
+ (fold-right (lambda (type done) `(as-type ,type ,done))
+ (parse-lexeme-tree value)
+ (map group-body->type groups))]
+
+ ;; Type name resolution?
+ ;; https://en.wikipedia.org/wiki/C_data_types
+ ;; base types with spaces:
+ ;; =======================
+ ;; [[un]signed] char
+ ;; [[un]signed] short [int]
+ ;; [[un]signed] int
+ ;; [un]signed [int]
+ ;; [[un]signed] long [int]
+ ;; [[un]signed] long long [int]
+ ;; float
+ ;; [long] double
+
+ ;; https://en.wikipedia.org/wiki/Type_qualifier
+ ;; qualifiers
+ ;; const
+ ;; volatile
+ ;; restrict
+ ;; _Atomic
+
+
+ ;; Storage specifiers
+ ;; auto
+ ;; register
+ ;; static
+ ;; extern
+
+ ;; struct <typename>
+ ;; enum <typename>
+ ;; union <typename>
+
+ ;; https://en.wikipedia.org/wiki/C_syntax
+ ;; int (*ptr_to_array)[100]
+
+
+ [(? symbol? bare)
+ (scm-error 'c-parse-error
+ "parse-lexeme-tree"
+ "Naked literal in lex-tree: ~s"
+ (list bare)
+ #f)]
+
+ [form
+ (scm-error 'c-parse-error
+ "parse-lexeme-tree"
+ "Unknown form in lex-tree: ~s"
+ (list form) #f)
+ ]))
;; https://en.wikipedia.org/wiki/Operators_in_C_and_C%2B%2B
+;; https://en.cppreference.com/w/c/language/operator_precedence
(define order-of-operations
(reverse
(concatenate
;; This is only for binary operations
`((-> ,(symbol #\.))
+ ;; All unary procedures go here, incnluding typecasts, and sizeof
(* / %)
(+ -)
(<< >>)
(< <= > >=)
- (== !=)
- (&)
- (^)
- (,(symbol #\|))
- (&&)
- (,(symbol #\| #\|))
- (= += -= *= /= %= <<= >>= &= ^= ,(symbol #\| #\=))
+ (== != not_eq)
+ (& bitand)
+ (^ xorg)
+ (,(symbol #\|) bitor)
+ (&& and)
+ (,(symbol #\| #\|) or)
+ (? :)
+ (= += -= *= /= %= <<= >>= &= ^= ,(symbol #\| #\=)
+ and_eq or_eq xor_eq)
(,(symbol #\,))
))))
@@ -148,8 +246,12 @@
(define* (resolve-order-of-operations
tree optional: (order order-of-operations))
+ (format (current-error-port) "~s~%" tree)
(if (null? order)
- (car tree)
+ (scm-error 'c-parse-error
+ "resolve-order-of-operations"
+ "Out of operations to resolve when resolving expression ~s"
+ (list tree) #f)
(match tree
[('*other* body ...) body]
[(form) (resolve-order-of-operations form order)]