aboutsummaryrefslogtreecommitdiff
path: root/module/c/parse.scm
diff options
context:
space:
mode:
authorHugo Hörnquist <hugo@lysator.liu.se>2022-07-21 18:05:59 +0200
committerHugo Hörnquist <hugo@lysator.liu.se>2022-07-21 18:05:59 +0200
commit3413f60db482ce7e6d6d786348723a2b406d1038 (patch)
treec3063d65d05e93ff6045f99fa15ff80acf7d3066 /module/c/parse.scm
parentMajor work on parser. (diff)
downloadcalp-3413f60db482ce7e6d6d786348723a2b406d1038.tar.gz
calp-3413f60db482ce7e6d6d786348723a2b406d1038.tar.xz
Remove old unused files.
Diffstat (limited to 'module/c/parse.scm')
-rw-r--r--module/c/parse.scm426
1 files changed, 0 insertions, 426 deletions
diff --git a/module/c/parse.scm b/module/c/parse.scm
deleted file mode 100644
index 7d11ea17..00000000
--- a/module/c/parse.scm
+++ /dev/null
@@ -1,426 +0,0 @@
-(define-module (c parse)
- :use-module (hnh util)
- :use-module (srfi srfi-1)
- :use-module (srfi srfi-71)
- :use-module (ice-9 match)
- :use-module ((rnrs io ports)
- :select (string->bytevector make-transcoder utf-8-codec))
- :use-module (rnrs bytevectors)
- :export (parse-lexeme-tree))
-
-(define (permutations set)
- (concatenate
- (map (lambda (key)
- (map (lambda (o) (cons key o))
- (delete key set)))
- set)))
-
-(define (symbol-concat pair)
- (cond [(null? (car pair)) (cdr pair)]
- [(null? (cdr pair)) (car pair)]
- [else (symbol-append (car pair) (cdr pair))]))
-
-(define (parse-integer-suffix str)
- (define valid-sequences
- (delete 'dummy
- (lset-union eq? '(dummy)
- (map symbol-concat (permutations '(() U L)))
- (map symbol-concat (permutations '(() U LL))))))
-
- ;; => (LLU ULL LL LU UL L U)
-
- (aif (memv (string->symbol (string-upcase str))
- valid-sequences)
- (case (car it)
- [(LLU ULL) '(unsigned long long)]
- [(LU UL) '(unsigned long)]
- [(LL) '(long long)]
- [(L) '(long)]
- [(U) '(unsigned)])
- (scm-error 'c-parse-error "parse-integer-suffix"
- "Invalid integer suffix ~s"
- (list str) #f)))
-
-(define (parse-float-suffix str)
- (case (string->symbol str)
- ((f F) '(float))
- ((l L) '(long double))))
-
-(define (group-body->type vars)
- (concatenate
- (map
- (match-lambda (('variable var) (list (parse-lexeme-tree `(variable ,var))))
- (('postfix ('variable var)
- ('postfix-operator "*"))
- (list (parse-lexeme-tree `(variable ,var))
- '*))
- (else (scm-error 'c-parse-error "parse-lexeme-tree"
- "Invalid token ~s in typecast form: ~s"
- (list else vars) #f)))
- vars)))
-
-;; Takes a list of strings and integers, and merges it all into a single
-;; bytevector representing a c string
-(define* (string-fragments->c-string
- fragments optional: (transcoder (make-transcoder (utf-8-codec))))
-
- (define fragments-fixed
- (map (lambda (frag)
- (if (string? frag)
- (string->bytevector frag transcoder)
- frag))
- fragments))
-
- (define bv-length
- (fold (lambda (item sum) (+ sum (if (bytevector? item)
- (bytevector-length item)
- 1)))
- 0 fragments-fixed))
-
- (define bv (make-bytevector (1+ bv-length)))
- ;; trailing null byte
- (bytevector-u8-set! bv bv-length 0)
- (fold (lambda (item idx)
- (cond ((bytevector? item)
- (bytevector-copy! item 0
- bv idx
- (bytevector-length item))
- (+ idx (bytevector-length item)))
- (else (bytevector-u8-set! bv idx item)
- (+ idx 1))))
- 0
- fragments-fixed)
- bv)
-
-(define (parse-float-form float-form)
- (let ((float-string
- (fold (lambda (arg str)
- (string-append
- str
- (match arg
- (('float-integer ('base-10 n)) n)
- (('float-decimal ('base-10 n)) (string-append "." n))
- (('exponent "+" ('base-10 n)) (string-append "e" n))
- (('exponent ('base-10 n)) (string-append "e" n))
- (('exponent "-" ('base-10 n)) (string-append "e-" n)))))
- "" float-form)))
- ;; exact->inexact is a no-op if we already have an inexact number, but
- ;; ensures we get an inexact number when we have an exact number (which we
- ;; can get from the "1." case). Returning an inexact number here is important
- ;; to avoid arithmetic suprises later.
- (exact->inexact
- (or (string->number float-string)
- (scm-error 'c-parse-error "parse-lexeme-tree"
- "Couldn't parse expression as float: ~s"
- (list `(float ,@args)) #f)))))
-
-
-(define (resolve-escaped-char form)
- (match form
- (('base-8-char n) (string->number n 8))
- (('base-16-char n) (string->number n 16))
- (c (char->integer
- (case (string-ref c 0)
- ((#\a) #\alarm)
- ((#\b) #\backspace)
- ((#\e) #\esc) ;; non-standard
- ((#\f) #\page)
- ((#\n) #\newline)
- ((#\r) #\return)
- ((#\t) #\tab)
- ((#\v) #\vtab)
- ((#\\) #\\)
- ;; These are valid in both strings and chars
- ((#\') #\')
- ((#\") #\"))))))
-
-;; Takes a list of strings and escaped-char form
-;; and returns a list of strings and integers
-(define (resolve-string-fragment fragment)
- (match fragment
- (('escaped-char c)
- (resolve-escaped-char c))
- (fargment fragment)))
-
-(define (parse-lexeme-tree tree)
- (match tree
- ['() '()]
-
- ;; Number constants
- [('base-10 n) (string->number n 10)]
- [('base-8 n) (string->number n 8)]
- [('base-16 n) (string->number n 16)]
-
- [('integer n ('integer-suffix suffix))
- `(as-type
- ,(parse-integer-suffix suffix)
- ,(parse-lexeme-tree n))]
-
- [('integer n)
- (parse-lexeme-tree n)]
-
-
- [('float args ... ('float-suffix suffix))
- `(as-type ,(parse-float-suffix suffix)
- ;; parse rest of float as if it lacked a suffix
- ,(parse-lexeme-tree `(float ,@args)))]
-
- [('float args ...) (parse-float-form args)]
-
- ;; Character literals, stored as raw integers
- ;; so mathematical operations keep working on them.
- [('char ('escaped-char c)) (resolve-escaped-char c)]
-
- [('char c) (char->integer (string-ref c 0))]
-
- [('variable var) (string->symbol var)]
-
- ;; normalize some binary operators to their wordy equivalent
- ;; (which also happens to match better with scheme)
- [('operator "&&") 'and]
- [('operator "&=") 'and_eq]
- [('operator "&") 'bitand]
- [('operator "|") 'bitor]
- [('operator "!=") 'not_eq]
- [('operator "||") 'or]
- [('operator "|=") 'or_eq]
- [('operator "^") 'xor]
- [('operator "^=") 'xor_eq]
- ;; Change these names to something scheme can handle better
- [('operator ".") 'object-slot]
- [('operator "->") 'dereference-slot]
- [('operator op) (string->symbol op)]
-
- [('prefix-operator op)
- (case (string->symbol op)
- ((!) 'not)
- ((~) 'compl)
- ((*) 'dereference)
- ((&) 'pointer)
- ((++) 'pre-increment)
- ((--) 'pre-decrement)
- ((-) '-)
- (else (scm-error 'c-parse-error "parse-lexeme-tree"
- "Unknown prefix operator ~s"
- (list op) #f)))]
- [('postfix-operator op)
- (case (string->symbol op)
- [(++) 'post-increment]
- [(--) 'post-decrement]
- [else (scm-error 'c-parse-error "parse-lexeme-tree"
- "Unknown postfix operator ~s"
- (list op) #f)])]
-
- ;; Parenthesis grouping
- [('group args ...)
- (parse-lexeme-tree args)]
-
- [('prefix op arg)
- `(,(parse-lexeme-tree op)
- ,(parse-lexeme-tree arg))]
-
- [('postfix arg op)
- `(,(parse-lexeme-tree op)
- ,(parse-lexeme-tree arg))]
-
-
-
-
-
- ;; resolved-operator and ternary are the return "types"
- ;; of resolve-order-of-operations
- [(('resolved-operator op) args ...)
- `(,op ,@(map parse-lexeme-tree args))]
-
- [('ternary a b c)
- `(ternary ,(parse-lexeme-tree a)
- ,(parse-lexeme-tree b)
- ,(parse-lexeme-tree c))]
-
-
-
-
- ;; Is it OK for literal strings to be "stored" inline?
- ;; Or must they be a pointer?
- ['string #vu8(0)]
- [('string str ...)
- (-> (map resolve-string-fragment str)
- string-fragments->c-string)]
-
- ;; implicit concatenation of string literals
- [(('string str ...) ...)
- (-> (map resolve-string-fragment (concatenate str))
- string-fragments->c-string)]
-
- [('infix args ...)
- (let ((r (resolve-order-of-operations
- (flatten-infix (cons 'infix args)))))
- (parse-lexeme-tree r))]
-
-
- [('funcall function ('group arguments))
- `(funcall ,(parse-lexeme-tree function)
- ,(parse-lexeme-tree arguments))]
-
- [(('variable "struct") ('variable value) ..1)
- `(struct-type ,@(map string->symbol value))
- ]
-
- ;; A list of variables. Most likely a type signature
- ;; [(('variable value) ..1)
- ;; ]
-
- ;; A typecast with only variables must (?) be a typecast?
- [(('group groups ..1) ... value)
- (fold-right (lambda (type done) `(as-type ,type ,done))
- (parse-lexeme-tree value)
- (map group-body->type groups))]
-
- ;; Type name resolution?
- ;; https://en.wikipedia.org/wiki/C_data_types
- ;; base types with spaces:
- ;; =======================
- ;; [[un]signed] char
- ;; [[un]signed] short [int]
- ;; [[un]signed] int
- ;; [un]signed [int]
- ;; [[un]signed] long [int]
- ;; [[un]signed] long long [int]
- ;; float
- ;; [long] double
-
- ;; https://en.wikipedia.org/wiki/Type_qualifier
- ;; qualifiers
- ;; const
- ;; volatile
- ;; restrict
- ;; _Atomic
-
-
- ;; Storage specifiers
- ;; auto
- ;; register
- ;; static
- ;; extern
-
- ;; struct <typename>
- ;; enum <typename>
- ;; union <typename>
-
- ;; https://en.wikipedia.org/wiki/C_syntax
- ;; int (*ptr_to_array)[100]
-
-
- [(? symbol? bare)
- (scm-error 'c-parse-error
- "parse-lexeme-tree"
- "Naked literal in lex-tree: ~s"
- (list bare)
- #f)]
-
- [form
- (scm-error 'c-parse-error
- "parse-lexeme-tree"
- "Unknown form in lex-tree: ~s"
- (list form) #f)
- ]))
-
-;; https://en.wikipedia.org/wiki/Operators_in_C_and_C%2B%2B
-;; https://en.cppreference.com/w/c/language/operator_precedence
-
-(define order-of-operations
- (reverse
- ;; This is only for binary operations
- `((-> ,(symbol #\.))
- ;; All unary procedures go here, incnluding typecasts, and sizeof
- (* / %)
- (+ -)
- (<< >>)
- (< <= > >=)
- (== != not_eq)
- (& bitand)
- (^ xorg)
- (,(symbol #\|) bitor)
- (&& and)
- (,(symbol #\| #\|) or)
- (? :)
- (= += -= *= /= %= <<= >>= &= ^= ,(symbol #\| #\=)
- and_eq or_eq xor_eq)
- (,(symbol #\,))
- )))
-
-;; a.b->c.d (. (-> (. a b) c) d)
-;; 2 * 3 / 4 * 5 => (* (/ (* 2 3) 4) 5)
-;; eller => (* 2 (/ 3 4) 5)
-
-(define* (resolve-order-of-operations
- tree optional: (order order-of-operations))
-
- (if (null? order)
- (scm-error 'c-parse-error
- "resolve-order-of-operations"
- "Out of operations to resolve when resolving expression ~s"
- (list tree) #f)
- (match tree
- [('fixed-infix form) form]
- [('fixed-infix forms ...)
- (match (split-by-one-of forms (car order))
- [(group)
- (resolve-order-of-operations (cons 'fixed-infix group)
- (cdr order))]
- [(a ('? b ...) (': c ...))
- `(ternary ,(resolve-order-of-operations (cons 'fixed-infix a) (cdr order))
- ,(resolve-order-of-operations (cons 'fixed-infix b) (cdr order))
- ,(resolve-order-of-operations (cons 'fixed-infix c) (cdr order)))]
- [(first rest ...)
- ;; TODO this is only valid for the associative operators (+, ...)
- ;; but not some other (<, ...)
- (if (apply eq? (map car rest))
- (let ((op (caar rest)))
- `((resolved-operator ,op)
- ,@(map (lambda (x) (resolve-order-of-operations (cons 'fixed-infix x)
- (cdr order)))
- (cons first (map cdr rest)))))
- (fold (lambda (item done)
- (let ((operator args (car+cdr item)))
- `((resolved-operator ,operator)
- ,done ,(resolve-order-of-operations
- (cons 'fixed-infix args)
- (cdr order)))))
- (resolve-order-of-operations (cons 'fixed-infix first)
- (cdr order))
- rest))])])))
-
-;; 1 * 2 / 3 * 4
-;; ⇒ ((1) (* 2) (/ 3) (* 4))
-;; (1)
-;; (* (1) 2)
-;; (/ (* (1) 2) 3)
-;; (* (/ (* (1) 2) 3) 4)
-
-;; Flatens a tree of infix triples. Stops when it should.
-;; (parenthesis, function calls, ...)
-(define (flatten-infix form)
- (cons 'fixed-infix
- (let loop ((form form))
- (match form
- [('infix left op ('infix right ...))
- (cons* left
- (parse-lexeme-tree op)
- (loop (cons 'infix right)))]
-
- [('infix left op right)
- (list left
- (parse-lexeme-tree op)
- right)]
-
- [('infix form) form]
-
- [other (scm-error 'c-parse-error
- "flatten-infix"
- "Not an infix tree ~a"
- (list other)
- #f)]))))
-
-
-