aboutsummaryrefslogtreecommitdiff
path: root/module/c/parse.scm
diff options
context:
space:
mode:
authorHugo Hörnquist <hugo@lysator.liu.se>2022-06-28 23:46:08 +0200
committerHugo Hörnquist <hugo@lysator.liu.se>2022-07-07 21:17:22 +0200
commit690034ca8abcd931e2fb6bb8129450deee701179 (patch)
treeb9b1fc396dcab97b6c08de71e89e8d985fdb93bd /module/c/parse.scm
parentJS user addition for parsing Microsoft Teams links. (diff)
downloadcalp-690034ca8abcd931e2fb6bb8129450deee701179.tar.gz
calp-690034ca8abcd931e2fb6bb8129450deee701179.tar.xz
Major work on c parser.
Diffstat (limited to 'module/c/parse.scm')
-rw-r--r--module/c/parse.scm138
1 files changed, 120 insertions, 18 deletions
diff --git a/module/c/parse.scm b/module/c/parse.scm
index 8030da77..d407f5ac 100644
--- a/module/c/parse.scm
+++ b/module/c/parse.scm
@@ -38,6 +38,19 @@
"Invalid integer suffix ~s"
(list str) #f)))
+(define (group-body->type vars)
+ (concatenate
+ (map
+ (match-lambda (('variable var) (list (parse-lexeme-tree `(variable ,var))))
+ (('postfix ('variable var)
+ ('postfix-operator "*"))
+ (list (parse-lexeme-tree `(variable ,var))
+ '*))
+ (else (scm-error 'c-parse-error "parse-lexeme-tree"
+ "Invalid token ~s in typecast form: ~s"
+ (list else vars) #f)))
+ vars)))
+
(define (parse-lexeme-tree tree)
(match tree
['() '()]
@@ -74,25 +87,43 @@
((#\v) #\vtab)
((#\\) #\\)
((#\') #\')))]
+
[('char c) (char->integer (string-ref c 0))]
[('variable var) (string->symbol var)]
+
+ [('operator "&&") 'and]
+ [('operator "&=") 'and_eq]
+ [('operator "&") 'bitand]
+ [('operator "|") 'bitor]
+ [('operator "!=") 'not_eq]
+ [('operator "||") 'or]
+ [('operator "|=") 'or_eq]
+ [('operator "^") 'xor]
+ [('operator "^=") 'xor_eq]
[('operator op) (string->symbol op)]
+
[('prefix-operator op)
(case (string->symbol op)
+ ((!) 'not)
+ ((~) 'compl)
((*) 'dereference)
((&) 'pointer)
((++) 'pre-increment)
((--) 'pre-decrement)
- (else => identity))]
+ (else (scm-error 'c-parse-error "parse-lexeme-tree"
+ "Unknown prefix operator ~s"
+ (list op) #f)))]
[('postfix-operator op)
(case (string->symbol op)
[(++) 'post-increment]
[(--) 'post-decrement]
- [else => identity])]
+ [else (scm-error 'c-parse-error "parse-lexeme-tree"
+ "Unknown postfix operator ~s"
+ (list op) #f)])]
;; Parenthesis grouping
- [('group args)
+ [('group args ...)
(parse-lexeme-tree args)]
;; Atomic item. Used by flatten-infix
@@ -108,37 +139,104 @@
,(parse-lexeme-tree arg))]
[('infix args ...)
- (resolve-order-of-operations
- (flatten-infix (cons 'infix args)))]
+ (let ((r
+ (resolve-order-of-operations
+ (flatten-infix (cons 'infix args)))))
+ (match r
+ (`(: (? ,op ,true)
+ ,false)
+ `(ternary ,op ,true ,false))
+ (_ r)))]
[('funcall function ('group arguments))
`(funcall ,(parse-lexeme-tree function)
,(parse-lexeme-tree arguments))]
- [bare (scm-error 'c-parse-error
- "parse-lexeme-tree"
- "Naked literal in lex-tree: ~s"
- (list bare)
- #f)]))
+ [(('variable "struct") ('variable value) ..1)
+ `(struct-type ,@(map string->symbol value))
+ ]
+
+ ;; A list of variables. Most likely a type signature
+ ;; [(('variable value) ..1)
+ ;; ]
+
+ ;; A typecast with only variables must (?) be a typecast?
+ [(('group groups ..1) ... value)
+ (fold-right (lambda (type done) `(as-type ,type ,done))
+ (parse-lexeme-tree value)
+ (map group-body->type groups))]
+
+ ;; Type name resolution?
+ ;; https://en.wikipedia.org/wiki/C_data_types
+ ;; base types with spaces:
+ ;; =======================
+ ;; [[un]signed] char
+ ;; [[un]signed] short [int]
+ ;; [[un]signed] int
+ ;; [un]signed [int]
+ ;; [[un]signed] long [int]
+ ;; [[un]signed] long long [int]
+ ;; float
+ ;; [long] double
+
+ ;; https://en.wikipedia.org/wiki/Type_qualifier
+ ;; qualifiers
+ ;; const
+ ;; volatile
+ ;; restrict
+ ;; _Atomic
+
+
+ ;; Storage specifiers
+ ;; auto
+ ;; register
+ ;; static
+ ;; extern
+
+ ;; struct <typename>
+ ;; enum <typename>
+ ;; union <typename>
+
+ ;; https://en.wikipedia.org/wiki/C_syntax
+ ;; int (*ptr_to_array)[100]
+
+
+ [(? symbol? bare)
+ (scm-error 'c-parse-error
+ "parse-lexeme-tree"
+ "Naked literal in lex-tree: ~s"
+ (list bare)
+ #f)]
+
+ [form
+ (scm-error 'c-parse-error
+ "parse-lexeme-tree"
+ "Unknown form in lex-tree: ~s"
+ (list form) #f)
+ ]))
;; https://en.wikipedia.org/wiki/Operators_in_C_and_C%2B%2B
+;; https://en.cppreference.com/w/c/language/operator_precedence
(define order-of-operations
(reverse
(concatenate
;; This is only for binary operations
`((-> ,(symbol #\.))
+ ;; All unary procedures go here, incnluding typecasts, and sizeof
(* / %)
(+ -)
(<< >>)
(< <= > >=)
- (== !=)
- (&)
- (^)
- (,(symbol #\|))
- (&&)
- (,(symbol #\| #\|))
- (= += -= *= /= %= <<= >>= &= ^= ,(symbol #\| #\=))
+ (== != not_eq)
+ (& bitand)
+ (^ xorg)
+ (,(symbol #\|) bitor)
+ (&& and)
+ (,(symbol #\| #\|) or)
+ (? :)
+ (= += -= *= /= %= <<= >>= &= ^= ,(symbol #\| #\=)
+ and_eq or_eq xor_eq)
(,(symbol #\,))
))))
@@ -148,8 +246,12 @@
(define* (resolve-order-of-operations
tree optional: (order order-of-operations))
+ (format (current-error-port) "~s~%" tree)
(if (null? order)
- (car tree)
+ (scm-error 'c-parse-error
+ "resolve-order-of-operations"
+ "Out of operations to resolve when resolving expression ~s"
+ (list tree) #f)
(match tree
[('*other* body ...) body]
[(form) (resolve-order-of-operations form order)]