diff options
Diffstat (limited to 'module')
-rw-r--r-- | module/c/cpp.scm | 95 | ||||
-rw-r--r-- | module/c/eval.scm | 265 | ||||
-rw-r--r-- | module/c/eval/environment.scm | 34 | ||||
-rw-r--r-- | module/c/lex.scm | 52 | ||||
-rw-r--r-- | module/c/operators.scm | 3 | ||||
-rw-r--r-- | module/c/parse.scm | 411 | ||||
-rw-r--r-- | module/hnh/util.scm | 25 | ||||
-rw-r--r-- | module/hnh/util/path.scm | 3 | ||||
-rw-r--r-- | module/srfi/srfi-64/util.scm | 11 |
9 files changed, 761 insertions, 138 deletions
diff --git a/module/c/cpp.scm b/module/c/cpp.scm index a2935352..aed496f2 100644 --- a/module/c/cpp.scm +++ b/module/c/cpp.scm @@ -5,47 +5,37 @@ :use-module (ice-9 match) :use-module (ice-9 regex) :use-module ((rnrs io ports) :select (call-with-port)) + :use-module ((rnrs bytevectors) :select (bytevector?)) :use-module (ice-9 format) :use-module ((hnh util io) :select (read-lines)) :use-module (hnh util graph) :use-module (c lex) :use-module (c parse) :use-module (c operators) - :export (do-funcall replace-symbols include#) + :export (replace-symbols include#) ) ;; input "#define F(x, y) x + y" -;; 1 full define | F(x, y) +;; 1 full define | F(x,y) ;; 2 macro name | F -;; 3 macro args | (x, y) -;; 4 macro body | x + y -(define define-re (make-regexp "^#define ((\\w+)(\\([^)]*\\))?) (.*)")) +;; 3 macro args | (x,y) +;; 5 macro body | x + y or #f +(define define-re (make-regexp "^#define ((\\w+)([(][^)]*[)])?)( (.*))?")) (define (tokenize-define-line header-line) (aif (regexp-exec define-re header-line) (cons (match:substring it 1) - (match:substring it 4)) + (let ((body (match:substring it 5))) + (if (or (eqv? body #f) + (string-null? body)) + "1" body))) (scm-error 'c-parse-error "tokenize-define-line" "Line dosen't match: ~s" (list header-line) #f))) -(define (do-funcall function arguments) - (if (list? arguments) - (apply function arguments) - (function arguments))) - -(define symb-map - `((,(symbol #\|) . logior) - (funcall . (@ (c cpp) do-funcall)) - (&& . and) - (& . logand) - (== . =) - (!= . (negate =)) - )) - (define (replace-symbols tree dict) (if (not (list? tree)) (or (assoc-ref dict tree) tree) @@ -55,12 +45,27 @@ ;; Direct values. Lisp also has quoted symbols in this group. (define (immediate? x) (or (number? x) - (char? x) - (string? x))) + (bytevector? x))) +;; TODO replace this with something sensible +;; like a correct list extracted from (c eval) +;; and not thinging that types are variables ;; built in symbols. Should never be marked as dependencies (define (primitive? x) - (memv x (cons 'funcall binary-operators))) + (memv x `( + ;; language primitives + sizeof + + ;; special forms introduced by parser + funcall ternary struct-type as-type + + ;; unary operatons which aren't also binary operators + ++ -- ! ~ + not compl dereference pointer + pre-increment pre-decrement + post-increment post-decrement + ,@binary-operators + ))) @@ -77,7 +82,6 @@ [arg (list arg)])) (define right (f (cdr pair))) - (define alt-right (replace-symbols right symb-map)) (define dependencies (lset-difference eq? @@ -91,12 +95,12 @@ dependencies (match left [('funcall name ('#{,}# args ...)) - (cons name `(lambda ,args ,alt-right))] + (cons name `(lambda ,args ,right))] [('funcall name arg) - (cons name `(lambda (,arg) ,alt-right))] + (cons name `(lambda (,arg) ,right))] - [name (cons name alt-right)]))) + [name (cons name right)]))) (define (parse-cpp-file lines) @@ -104,7 +108,9 @@ (catch #t (lambda () (parse-cpp-define line)) (lambda (err caller fmt args data) - (format #t "~a ~?~%" fmt args) + (format #t "~a in ~a: ~?~%" + err caller fmt args) + (format #t "~s~%" line) #f))) lines)) @@ -114,29 +120,32 @@ (define (tokenize-header-file header-file) (map tokenize-define-line (call-with-port - (open-input-pipe - (string-append "cpp -dM " header-file)) + (open-pipe* OPEN_READ "cpp" "-dM" header-file) read-lines))) -(define-macro (include# header-file . args) - - (define define-form (if (null? args) 'define (car args))) - - (define lines (remove (compose private-c-symbol? car) - (tokenize-header-file header-file))) +(define (load-cpp-file header-file) + (define lines (tokenize-header-file header-file)) (define forms (parse-cpp-file lines)) - (define graph* - (fold (lambda (node graph) - (add-node graph (cdr node) (car node))) - (make-graph car) - (filter identity forms))) + (fold (lambda (node graph) + (add-node graph (cdr node) (car node))) + (make-graph car) + (filter identity forms))) +(define (include% header-file) + (define graph* (load-cpp-file header-file)) ;; Hack for termios since this symbol isn't defined. ;; (including in the above removed private c symbols) - (define graph (add-node graph* (cons '_POSIX_VDISABLE #f) '())) + (define graph (add-node graph* (cons '_POSIX_VDISABLE 0) '())) + ;; TODO expand bodies + ;; (remove (compose private-c-symbol? car)) + (resolve-dependency-graph graph)) + +(define-macro (include# header-file . args) + + (define define-form (if (null? args) 'define (car args))) `(begin ,@(map (lambda (pair) `(,define-form ,(car pair) ,(cdr pair))) - (resolve-dependency-graph graph)))) + (include% header-file)))) diff --git a/module/c/eval.scm b/module/c/eval.scm new file mode 100644 index 00000000..67d0075d --- /dev/null +++ b/module/c/eval.scm @@ -0,0 +1,265 @@ +(define-module (c eval) + :use-module (hnh util) + :use-module (srfi srfi-1) + :use-module (srfi srfi-71) + :use-module (srfi srfi-88) + :use-module (ice-9 match) + :use-module (ice-9 curried-definitions) + :use-module ((rnrs bytevectors) + :select (bytevector?)) + :use-module ((rnrs arithmetic bitwise) + :select (bitwise-not + bitwise-and + bitwise-ior + bitwise-xor + bitwise-arithmetic-shift-left + bitwise-arithmetic-shift-right)) + :use-module (c eval environment) + :export (c-procedure? + procedure-formals + procedure-body + procedure-arity + + c-eval + )) + +(define C-TRUE 1) +(define C-FALSE 0) + +(define (boolean->c-boolean bool) + (if bool C-TRUE C-FALSE)) + +(define (c-boolean->boolean bool) + (not (zero? bool))) + +(define (c-not b) + (-> b c-boolean->boolean not boolean->c-boolean)) + +(define (c-procedure? expr) + (and (list? expr) + (not (null? expr)) + (eq? 'lambda (car expr)))) + +(define* (ensure-c-procedure expr optional: calling-procedure) + (unless (c-procedure? expr) + (scm-error 'c-eval-error calling-procedure + "Value not a procedure: ~s" + (list procedure #f)))) + +(define (procedure-formals procedure) + (ensure-c-procedure procedure "procedure-formals") + (list-ref procedure 1)) + +(define (procedure-body procedure) + (ensure-c-procedure procedure "procedure-body") + (list-ref procedure 2)) + +(define (procedure-arity procedure) + (length (procedure-formals procedure))) + +(define (literal? expression) + (or (number? expression) + (bytevector? expression))) + + + +;; Internal helper procedures + +(define (mod-set operation) + (lambda (env slot value) + ;; a += b + ;; a = a + b + (c-eval env `(= ,slot (,operation ,slot ,value))))) + +(define (fold-2 proc init lst) + (car+cdr + (fold (lambda (arg env+done) + (let ((env* arg* (proc (car env+done) arg))) + (cons* env* arg* (cdr env+done)))) + init + lst))) + +;; TODO this disregards +;; - floating point convertions +;; - integer truncation +(define ((simple-infix operation) env . operands) + (let ((env done (fold-2 c-eval (cons env '()) operands))) + (values env (apply operation (reverse done))))) + +(define ((binary-operator proc) env i c) + (let ((env* i* (c-eval env i))) + (let ((env** c* (c-eval env* c))) + (values env** (proc i* c*))))) + + + + +;; The order of evaluation for a number of these is undefined, meaning +;; that any side effects without sequence points is undefined. +;; However, for many of these I do the sensible thing and evaluate them +;; from left to right, instead of ignoring all side effects. + +;; TODO double check these with a C standard + + +;; Operators have their own namespace. They are called without funcall +;; in the pseudo-lisp which C is compiled to, and they expand more like +;; lisp macros, since its up to each operator what to do with its operands. +;; This to allow assignment and short circuting. +(define primitives + `((and . ,(lambda (env . operands) + (let loop ((env env) (operands operands)) + (if (null? operands) + (values env C-TRUE) + (let* ((env* result (c-eval env (car operands)))) + (if (c-boolean->boolean result) + (loop env* (cdr operands)) + (values env* result))))))) + (or . ,(lambda (env . operands) + (let loop ((env env) (operands operands)) + (if (null? operands) + (values env C-FALSE) + (let* ((env* result (c-eval env (car operands)))) + (if (false? result) + (values env* result) + (loop env* (cdr operands)))))))) + (= . ,(lambda (env slot value) + ;; TOOD if slot isn't a variable, but a field (or array index) + ;; then it needs to be resolved... + (let ((env* result (c-eval env value))) + (values (env-set! env* slot result) + result)))) + (and_eq ,(mod-set 'bitand)) ; &= + (or_eq ,(mod-set 'bitor)) ; |= + (xor_eq ,(mod-set 'xor)) ; ^= + (+= ,(mod-set '+)) + (-= ,(mod-set '-)) + (*= ,(mod-set '*)) + (/= ,(mod-set '/)) + (<<= ,(mod-set '<<)) + (>>= ,(mod-set '>>)) + (%= ,(mod-set '%)) + (+ . ,(simple-infix +)) + (* . ,(simple-infix *)) + (/ . ,(simple-infix /)) + (- . ,(lambda (env op . operands) + (if (null? operands) + (let ((env* value (c-eval env op))) + (values env* (- value))) + (apply (simple-infix -) + env op operands)))) + (bitor . ,(simple-infix bitwise-ior)) + (bitand . ,(simple-infix bitwise-and)) + (xor . ,(simple-infix bitwise-xor)) + (not_eq . ,(lambda (env a b) (c-eval env `(not (== ,a ,b))))) ; != + (<< . ,(binary-operator bitwise-arithmetic-shift-left)) + (>> . ,(binary-operator bitwise-arithmetic-shift-right)) + (< . ,(binary-operator (compose boolean->c-boolean <))) + (> . ,(binary-operator (compose boolean->c-boolean >))) + ;; this assumes that = handles pointers + (== . ,(binary-operator (compose boolean->c-boolean =))) + (<= . ,(binary-operator (compose boolean->c-boolean <=))) + (>= . ,(binary-operator (compose boolean->c-boolean >=))) + (% . ,(binary-operator modulo)) + + (not . ,(lambda (env value) + (let ((env* result (c-eval env value))) + (values env* (c-not result))))) + (compl . ,(lambda (env value) + (let ((env* result (c-eval env value))) + (values env* (bitwise-not result))))) + + ;; ++C + (pre-increment . ,(lambda (env slot) (c-eval env `(+= ,slot 1)))) + (pre-decrement . ,(lambda (env slot) (c-eval env `(-= ,slot 1)))) + ;; TODO these (C++, C--) need to handle if slot isn't a direct variabl + (post-increment . ,(lambda (env slot) + (let ((value (env-ref env slot))) + (values (env-set! env slot (1+ value)) + value)))) + (pre-decrement . ,(lambda (env slot) + (let ((value (env-ref env slot))) + (values (env-set! env slot (1+ value)) + value)))) + + (ternary . ,(lambda (env test true-clause false-clause) + (let ((env* value (c-eval env test))) + (c-eval env* + (if (c-boolean->boolean value) + true-clause false-clause))))) + + ;; TODO remaining operations + (as-type . ,(lambda (env target-type value) + (format (current-error-port) "cast<~s>(~s)~%" target-type value) + (values env value))) + + (object-slot . ,(lambda (env object slot) + (scm-error 'not-implemented "object-slot" + "Object slots are not implemented, when accessing ~s.~s" + (list object slot) #f))) + (dereference-slot ,(lambda (env ptr slot) + (scm-error 'not-implemented "dereference-slot" + "Object slots are not implemented, when accessing ~s->~s" + (list object slot) #f))) + (dereference . ,(lambda (env ptr) + (scm-error 'not-implemented "dereference" + "Poiner dereferencing is not implemented: *~s" + (list ptr) #f))) + (pointer . ,(lambda (env value) + (scm-error 'not-implemented "pointer" + "Pointer of is not implemented: &~s" + (list value) #f))))) + +;; TODO |,| + + +(define (c-eval environment expression) + (match expression + (('lambda formals body) (values environment `(lambda ,formals ,body))) + ;; hack since sizeof really should be a operator + (('funcall 'sizeof arg) + ;; TODO + (format (current-error-port) "sizeof ~s~%" arg) + (values environment 1)) + + (('funcall procedure-name ('#{,}# args ...)) + (let ((procedure (env-ref environment procedure-name))) + (ensure-c-procedure procedure "c-eval") + (unless (= (length args) (procedure-arity procedure)) + (scm-error 'c-eval-error "c-eval" + "Procedure arity mismatch for ~s, expected ~s, got ~s" + (list procedure-name + (procedure-arity procedure) + (length args)) + #f)) + (let ((env args* (fold-2 c-eval (cons environment '()) args ))) + (let ((inner-environment + (fold (lambda (name value env) (env-set! env name value)) + (push-frame! env) + (procedure-formals procedure) args*))) + (let ((resulting-environment + result-value + (c-eval inner-environment (procedure-body procedure)))) + (values (pop-frame! resulting-environment) + result-value)))))) + + (('funcall procedure arg) + (c-eval environment `(funcall ,procedure (#{,}# ,arg)))) + + ((operator operands ...) + (apply (or (assoc-ref primitives operator) + (scm-error 'c-eval-error "c-eval" + "Applying non-existant primitive operator: ~s, operands: ~s" + (list operator operands) #f)) + environment operands)) + + ;; "f()" gets compiled to simply f + ;; meaning that we instead use the environment to determine + ;; if something is a variable or procedure + (expr + (if (literal? expr) + (values environment expr) + (let ((value (env-ref environment expr))) + (if (c-procedure? value) + (c-eval environment `(funcall ,value (#{,}#))) + (values environment value))))))) diff --git a/module/c/eval/environment.scm b/module/c/eval/environment.scm new file mode 100644 index 00000000..12eefaf7 --- /dev/null +++ b/module/c/eval/environment.scm @@ -0,0 +1,34 @@ +(define-module (c eval environment) + :use-module (srfi srfi-1) + :export (make-environment + env-set! env-ref push-frame! pop-frame!)) + +(define (make-frame) + (make-hash-table)) + +(define (make-environment) + (list (make-frame))) + +;; Returns an updated environment, linear update +(define (env-set! env key value) + ;; get handle to differentiate #f + ;; (even though #f should never be stored since it's not a C value) + (cond ((find (lambda (frame) (hashq-get-handle frame key)) env) + => (lambda (frame) (hashq-set! frame key value))) + (else (hashq-set! (car env) key value))) + env) + +(define (env-ref env key) + (cond ((null? env) + (scm-error 'misc-error "env-ref" + "~s unbound" + (list key) + #f)) + ((hashq-get-handle (car env) key) => cdr) + (else (env-ref (cdr env) key)))) + +(define (push-frame! environment) + (cons (make-frame) environment)) + +(define (pop-frame! environment) + (cdr environment)) diff --git a/module/c/lex.scm b/module/c/lex.scm index 34e52d88..0bde5c9e 100644 --- a/module/c/lex.scm +++ b/module/c/lex.scm @@ -43,8 +43,23 @@ (define-peg-pattern integer all (and (or base-8 base-16 base-10) (? integer-suffix))) +(define-peg-pattern float-suffix all + (* (or "f" "F" "l" "L"))) + +(define-peg-pattern exponent all + (and (ignore (or "e" "E")) (? (or "+" "-")) base-10)) + +;; Helper patterns for creating named groups in float +(define-peg-pattern float-integer all base-10) +(define-peg-pattern float-decimal all base-10) + +(define-peg-pattern float all + (or (and float-integer exponent (? float-suffix)) + (and (? float-integer) (ignore ".") float-decimal (? exponent) (? float-suffix)) + (and float-integer (ignore ".") (? exponent) (? float-suffix)))) + (define-peg-pattern number body - (or integer)) + (or float integer)) (define-peg-pattern group all (and (ignore "(") expr (ignore ")"))) @@ -65,11 +80,16 @@ (define-peg-pattern char all (and (ignore "'") (or escaped-char peg-any) (ignore "'"))) +(define-peg-pattern quot none "\"") + +(define-peg-pattern string all + (and quot (* (or escaped-char (and (not-followed-by "\"") peg-any))) quot)) (define-peg-pattern* operator all `(or ,@(map symbol->string symbol-binary-operators) ,@(map (lambda (op) `(and ,(symbol->string op) ws)) - wordy-binary-operators))) + wordy-binary-operators) + "?" ":")) ;; whitespace (define-peg-pattern ws none @@ -89,17 +109,23 @@ base-10-digit)))) (define-peg-pattern prefix-operator all - (or "!" "~" "*" "&" "++" "--" "+" "-")) + ;; It's important that ++ and -- are BEFORE + and - + ;; otherwise the first + is found, leaving the second +, which fails + ;; to lex since it's an invalid token + ;; TODO sizeof can be written as a prefix operator + ;; (without parenthesis) if the operand is an expression. + (or "*" "&" "++" "--" + "!" "~" "+" "-")) + ;;; Note that stacked pre or postfix operators without parenthesis ;;; dosen't work. So `*&C' is invalid, while `*(&C)' is valid. (define-peg-pattern prefix all - (and prefix-operator sp (or variable group funcall #; postfix - ))) + (and prefix-operator sp (or variable group funcall literal))) (define-peg-pattern postfix-operator all - (or "++" "--")) + (or "++" "--" "*")) (define-peg-pattern postfix all ;; literals can't be in-place incremented and decremented @@ -111,15 +137,25 @@ ;; first case is "same" as expr, but in different order to prevent ;; infinite self reference. Pre and postfix not here, solved by having ;; them before infix in expr - (and (or funcall postfix prefix group char number variable) + (and (or funcall postfix prefix group literal variable) sp operator sp expr)) (define-peg-pattern funcall all (and variable sp group)) +(define-peg-pattern literal body + (or char string number)) + ;;; main parser (define-peg-pattern expr body - (+ (and sp (or infix postfix prefix funcall group char number variable) + (+ (and sp (or + ;; float must be BEFORE infix, otherwise 3.2 is parsed as (infix 3 (operator ".") 2) + ;; that however breaks the infix logic, meaning that floating point numbers can't be + ;; used in basic arithmetic. + ;; TODO remove all implicit order of operations handling in the lexer, and move it to + ;; the parser. This should also fix the case of typecasts being applied incorrectly. + float + infix postfix prefix funcall group literal variable) sp))) diff --git a/module/c/operators.scm b/module/c/operators.scm index ab1b3e7c..910dc8a9 100644 --- a/module/c/operators.scm +++ b/module/c/operators.scm @@ -9,8 +9,9 @@ `(+ - * / & ,(symbol #\|) ^ << >> % < > =)) ;; apparently part of C +;; https://en.cppreference.com/w/cpp/language/operator_alternative (define wordy-binary-operators - '(bitand and_eq and bitor or_eq or xor_eq xor)) + '(bitand and_eq and bitor or_eq or xor_eq xor not_eq)) (define symbol-binary-operators (append (map (lambda (x) (symbol-append x '=)) simple-operators) diff --git a/module/c/parse.scm b/module/c/parse.scm index 8030da77..7d11ea17 100644 --- a/module/c/parse.scm +++ b/module/c/parse.scm @@ -1,11 +1,14 @@ (define-module (c parse) :use-module (hnh util) :use-module (srfi srfi-1) + :use-module (srfi srfi-71) :use-module (ice-9 match) + :use-module ((rnrs io ports) + :select (string->bytevector make-transcoder utf-8-codec)) + :use-module (rnrs bytevectors) :export (parse-lexeme-tree)) -;;; Rename this -(define (perms set) +(define (permutations set) (concatenate (map (lambda (key) (map (lambda (o) (cons key o)) @@ -21,23 +24,124 @@ (define valid-sequences (delete 'dummy (lset-union eq? '(dummy) - (map symbol-concat (perms '(() U L))) - (map symbol-concat (perms '(() U LL)))))) + (map symbol-concat (permutations '(() U L))) + (map symbol-concat (permutations '(() U LL)))))) ;; => (LLU ULL LL LU UL L U) (aif (memv (string->symbol (string-upcase str)) valid-sequences) (case (car it) - [(LLU ULL) '(unsigned long-long)] + [(LLU ULL) '(unsigned long long)] [(LU UL) '(unsigned long)] - [(LL) '(long-long)] + [(LL) '(long long)] [(L) '(long)] [(U) '(unsigned)]) (scm-error 'c-parse-error "parse-integer-suffix" "Invalid integer suffix ~s" (list str) #f))) +(define (parse-float-suffix str) + (case (string->symbol str) + ((f F) '(float)) + ((l L) '(long double)))) + +(define (group-body->type vars) + (concatenate + (map + (match-lambda (('variable var) (list (parse-lexeme-tree `(variable ,var)))) + (('postfix ('variable var) + ('postfix-operator "*")) + (list (parse-lexeme-tree `(variable ,var)) + '*)) + (else (scm-error 'c-parse-error "parse-lexeme-tree" + "Invalid token ~s in typecast form: ~s" + (list else vars) #f))) + vars))) + +;; Takes a list of strings and integers, and merges it all into a single +;; bytevector representing a c string +(define* (string-fragments->c-string + fragments optional: (transcoder (make-transcoder (utf-8-codec)))) + + (define fragments-fixed + (map (lambda (frag) + (if (string? frag) + (string->bytevector frag transcoder) + frag)) + fragments)) + + (define bv-length + (fold (lambda (item sum) (+ sum (if (bytevector? item) + (bytevector-length item) + 1))) + 0 fragments-fixed)) + + (define bv (make-bytevector (1+ bv-length))) + ;; trailing null byte + (bytevector-u8-set! bv bv-length 0) + (fold (lambda (item idx) + (cond ((bytevector? item) + (bytevector-copy! item 0 + bv idx + (bytevector-length item)) + (+ idx (bytevector-length item))) + (else (bytevector-u8-set! bv idx item) + (+ idx 1)))) + 0 + fragments-fixed) + bv) + +(define (parse-float-form float-form) + (let ((float-string + (fold (lambda (arg str) + (string-append + str + (match arg + (('float-integer ('base-10 n)) n) + (('float-decimal ('base-10 n)) (string-append "." n)) + (('exponent "+" ('base-10 n)) (string-append "e" n)) + (('exponent ('base-10 n)) (string-append "e" n)) + (('exponent "-" ('base-10 n)) (string-append "e-" n))))) + "" float-form))) + ;; exact->inexact is a no-op if we already have an inexact number, but + ;; ensures we get an inexact number when we have an exact number (which we + ;; can get from the "1." case). Returning an inexact number here is important + ;; to avoid arithmetic suprises later. + (exact->inexact + (or (string->number float-string) + (scm-error 'c-parse-error "parse-lexeme-tree" + "Couldn't parse expression as float: ~s" + (list `(float ,@args)) #f))))) + + +(define (resolve-escaped-char form) + (match form + (('base-8-char n) (string->number n 8)) + (('base-16-char n) (string->number n 16)) + (c (char->integer + (case (string-ref c 0) + ((#\a) #\alarm) + ((#\b) #\backspace) + ((#\e) #\esc) ;; non-standard + ((#\f) #\page) + ((#\n) #\newline) + ((#\r) #\return) + ((#\t) #\tab) + ((#\v) #\vtab) + ((#\\) #\\) + ;; These are valid in both strings and chars + ((#\') #\') + ((#\") #\")))))) + +;; Takes a list of strings and escaped-char form +;; and returns a list of strings and integers +(define (resolve-string-fragment fragment) + (match fragment + (('escaped-char c) + (resolve-escaped-char c)) + (fargment fragment))) + (define (parse-lexeme-tree tree) (match tree ['() '()] @@ -50,55 +154,67 @@ [('integer n ('integer-suffix suffix)) `(as-type ,(parse-integer-suffix suffix) - ,(parse-lexeme-tree n)) - ] + ,(parse-lexeme-tree n))] + [('integer n) (parse-lexeme-tree n)] + + [('float args ... ('float-suffix suffix)) + `(as-type ,(parse-float-suffix suffix) + ;; parse rest of float as if it lacked a suffix + ,(parse-lexeme-tree `(float ,@args)))] + + [('float args ...) (parse-float-form args)] + ;; Character literals, stored as raw integers ;; so mathematical operations keep working on them. - [('char ('escaped-char ('base-8-char n))) - (-> n (string->number 8) #; integer->char)] - [('char ('escaped-char ('base-16-char n))) - (-> n (string->number 16) #; integer->char)] - [('char ('escaped-char c)) - (char->integer - (case (string-ref c 0) - ((#\a) #\alarm) - ((#\b) #\backspace) - ((#\e) #\esc) - ((#\f) #\page) - ((#\n) #\newline) - ((#\r) #\return) - ((#\t) #\tab) - ((#\v) #\vtab) - ((#\\) #\\) - ((#\') #\')))] + [('char ('escaped-char c)) (resolve-escaped-char c)] + [('char c) (char->integer (string-ref c 0))] [('variable var) (string->symbol var)] + + ;; normalize some binary operators to their wordy equivalent + ;; (which also happens to match better with scheme) + [('operator "&&") 'and] + [('operator "&=") 'and_eq] + [('operator "&") 'bitand] + [('operator "|") 'bitor] + [('operator "!=") 'not_eq] + [('operator "||") 'or] + [('operator "|=") 'or_eq] + [('operator "^") 'xor] + [('operator "^=") 'xor_eq] + ;; Change these names to something scheme can handle better + [('operator ".") 'object-slot] + [('operator "->") 'dereference-slot] [('operator op) (string->symbol op)] + [('prefix-operator op) (case (string->symbol op) + ((!) 'not) + ((~) 'compl) ((*) 'dereference) ((&) 'pointer) ((++) 'pre-increment) ((--) 'pre-decrement) - (else => identity))] + ((-) '-) + (else (scm-error 'c-parse-error "parse-lexeme-tree" + "Unknown prefix operator ~s" + (list op) #f)))] [('postfix-operator op) (case (string->symbol op) [(++) 'post-increment] [(--) 'post-decrement] - [else => identity])] + [else (scm-error 'c-parse-error "parse-lexeme-tree" + "Unknown postfix operator ~s" + (list op) #f)])] ;; Parenthesis grouping - [('group args) + [('group args ...) (parse-lexeme-tree args)] - ;; Atomic item. Used by flatten-infix - [('atom body) - (parse-lexeme-tree body)] - [('prefix op arg) `(,(parse-lexeme-tree op) ,(parse-lexeme-tree arg))] @@ -107,81 +223,204 @@ `(,(parse-lexeme-tree op) ,(parse-lexeme-tree arg))] + + + + + ;; resolved-operator and ternary are the return "types" + ;; of resolve-order-of-operations + [(('resolved-operator op) args ...) + `(,op ,@(map parse-lexeme-tree args))] + + [('ternary a b c) + `(ternary ,(parse-lexeme-tree a) + ,(parse-lexeme-tree b) + ,(parse-lexeme-tree c))] + + + + + ;; Is it OK for literal strings to be "stored" inline? + ;; Or must they be a pointer? + ['string #vu8(0)] + [('string str ...) + (-> (map resolve-string-fragment str) + string-fragments->c-string)] + + ;; implicit concatenation of string literals + [(('string str ...) ...) + (-> (map resolve-string-fragment (concatenate str)) + string-fragments->c-string)] + [('infix args ...) - (resolve-order-of-operations - (flatten-infix (cons 'infix args)))] + (let ((r (resolve-order-of-operations + (flatten-infix (cons 'infix args))))) + (parse-lexeme-tree r))] + [('funcall function ('group arguments)) `(funcall ,(parse-lexeme-tree function) ,(parse-lexeme-tree arguments))] - [bare (scm-error 'c-parse-error - "parse-lexeme-tree" - "Naked literal in lex-tree: ~s" - (list bare) - #f)])) + [(('variable "struct") ('variable value) ..1) + `(struct-type ,@(map string->symbol value)) + ] + + ;; A list of variables. Most likely a type signature + ;; [(('variable value) ..1) + ;; ] + + ;; A typecast with only variables must (?) be a typecast? + [(('group groups ..1) ... value) + (fold-right (lambda (type done) `(as-type ,type ,done)) + (parse-lexeme-tree value) + (map group-body->type groups))] + + ;; Type name resolution? + ;; https://en.wikipedia.org/wiki/C_data_types + ;; base types with spaces: + ;; ======================= + ;; [[un]signed] char + ;; [[un]signed] short [int] + ;; [[un]signed] int + ;; [un]signed [int] + ;; [[un]signed] long [int] + ;; [[un]signed] long long [int] + ;; float + ;; [long] double + + ;; https://en.wikipedia.org/wiki/Type_qualifier + ;; qualifiers + ;; const + ;; volatile + ;; restrict + ;; _Atomic + + + ;; Storage specifiers + ;; auto + ;; register + ;; static + ;; extern + + ;; struct <typename> + ;; enum <typename> + ;; union <typename> + + ;; https://en.wikipedia.org/wiki/C_syntax + ;; int (*ptr_to_array)[100] + + + [(? symbol? bare) + (scm-error 'c-parse-error + "parse-lexeme-tree" + "Naked literal in lex-tree: ~s" + (list bare) + #f)] + + [form + (scm-error 'c-parse-error + "parse-lexeme-tree" + "Unknown form in lex-tree: ~s" + (list form) #f) + ])) ;; https://en.wikipedia.org/wiki/Operators_in_C_and_C%2B%2B +;; https://en.cppreference.com/w/c/language/operator_precedence (define order-of-operations (reverse - (concatenate - ;; This is only for binary operations - `((-> ,(symbol #\.)) - (* / %) - (+ -) - (<< >>) - (< <= > >=) - (== !=) - (&) - (^) - (,(symbol #\|)) - (&&) - (,(symbol #\| #\|)) - (= += -= *= /= %= <<= >>= &= ^= ,(symbol #\| #\=)) - (,(symbol #\,)) - )))) - -(define (mark-other form) - (if (list? form) (cons '*other* form) form)) + ;; This is only for binary operations + `((-> ,(symbol #\.)) + ;; All unary procedures go here, incnluding typecasts, and sizeof + (* / %) + (+ -) + (<< >>) + (< <= > >=) + (== != not_eq) + (& bitand) + (^ xorg) + (,(symbol #\|) bitor) + (&& and) + (,(symbol #\| #\|) or) + (? :) + (= += -= *= /= %= <<= >>= &= ^= ,(symbol #\| #\=) + and_eq or_eq xor_eq) + (,(symbol #\,)) + ))) + +;; a.b->c.d (. (-> (. a b) c) d) +;; 2 * 3 / 4 * 5 => (* (/ (* 2 3) 4) 5) +;; eller => (* 2 (/ 3 4) 5) (define* (resolve-order-of-operations tree optional: (order order-of-operations)) (if (null? order) - (car tree) + (scm-error 'c-parse-error + "resolve-order-of-operations" + "Out of operations to resolve when resolving expression ~s" + (list tree) #f) (match tree - [('*other* body ...) body] - [(form) (resolve-order-of-operations form order)] - [(forms ...) - (match (split-by forms (car order)) - [(group) (resolve-order-of-operations group (cdr order))] - [groups - (cons (car order) - (map (lambda (form) (resolve-order-of-operations - form order-of-operations)) - groups))])] - [a a]))) + [('fixed-infix form) form] + [('fixed-infix forms ...) + (match (split-by-one-of forms (car order)) + [(group) + (resolve-order-of-operations (cons 'fixed-infix group) + (cdr order))] + [(a ('? b ...) (': c ...)) + `(ternary ,(resolve-order-of-operations (cons 'fixed-infix a) (cdr order)) + ,(resolve-order-of-operations (cons 'fixed-infix b) (cdr order)) + ,(resolve-order-of-operations (cons 'fixed-infix c) (cdr order)))] + [(first rest ...) + ;; TODO this is only valid for the associative operators (+, ...) + ;; but not some other (<, ...) + (if (apply eq? (map car rest)) + (let ((op (caar rest))) + `((resolved-operator ,op) + ,@(map (lambda (x) (resolve-order-of-operations (cons 'fixed-infix x) + (cdr order))) + (cons first (map cdr rest))))) + (fold (lambda (item done) + (let ((operator args (car+cdr item))) + `((resolved-operator ,operator) + ,done ,(resolve-order-of-operations + (cons 'fixed-infix args) + (cdr order))))) + (resolve-order-of-operations (cons 'fixed-infix first) + (cdr order)) + rest))])]))) + +;; 1 * 2 / 3 * 4 +;; ⇒ ((1) (* 2) (/ 3) (* 4)) +;; (1) +;; (* (1) 2) +;; (/ (* (1) 2) 3) +;; (* (/ (* (1) 2) 3) 4) ;; Flatens a tree of infix triples. Stops when it should. ;; (parenthesis, function calls, ...) (define (flatten-infix form) - (match form - [('infix left op ('infix right ...)) - (cons* (parse-lexeme-tree left) - (parse-lexeme-tree op) - (flatten-infix (cons 'infix right)))] - - [('infix left op right) - (list (mark-other (parse-lexeme-tree left)) - (parse-lexeme-tree op) - (mark-other (parse-lexeme-tree right)))] - - [other (scm-error 'c-parse-error - "flatten-infix" - "Not an infix tree ~a" - (list other) - #f)])) + (cons 'fixed-infix + (let loop ((form form)) + (match form + [('infix left op ('infix right ...)) + (cons* left + (parse-lexeme-tree op) + (loop (cons 'infix right)))] + + [('infix left op right) + (list left + (parse-lexeme-tree op) + right)] + + [('infix form) form] + + [other (scm-error 'c-parse-error + "flatten-infix" + "Not an infix tree ~a" + (list other) + #f)])))) diff --git a/module/hnh/util.scm b/module/hnh/util.scm index d2c0dd5f..9a45704b 100644 --- a/module/hnh/util.scm +++ b/module/hnh/util.scm @@ -34,6 +34,8 @@ group-by split-by + split-by-one-of + span-upto cross-product @@ -341,6 +343,29 @@ (cdr rem))]))) +(define (split-by-one-of lst items) + (cond ((null? items) + (scm-error 'wrong-type-arg "split-by-one-of" + "Must have at least one item to split by, when splitting ~s" + (cons items '()) #f)) + ((not (list? items)) + (scm-error 'wrong-type-arg "split-by-one-of" + "Items must be list of list of symbols, got ~s" + (list items) #f)) + (else + (call-with-values + (lambda () + (car+cdr + (let loop ((token 'sentinel-token) (lst lst)) + (let ((head tail (break (lambda (item) (memv item items)) + lst))) + (let ((group (cons token head))) + (if (null? tail) + (list group) + (cons group (loop (car tail) (cdr tail))))))))) + ;; Remove the sentinel token + (lambda (first rest) (cons (cdr first) rest)))))) + ;; Simar to span from srfi-1, but never takes more than ;; @var{count} items. Can however still take less. diff --git a/module/hnh/util/path.scm b/module/hnh/util/path.scm index ea081e85..0c8af48a 100644 --- a/module/hnh/util/path.scm +++ b/module/hnh/util/path.scm @@ -3,6 +3,7 @@ :use-module (srfi srfi-71) :use-module (hnh util) :export (path-append + path-absolute? path-join path-split file-hidden? @@ -12,6 +13,8 @@ (define // file-name-separator-string) (define /? file-name-separator?) +(define path-absolute? absolute-file-name?) + (define (path-append . strings) (fold (lambda (s done) (string-append diff --git a/module/srfi/srfi-64/util.scm b/module/srfi/srfi-64/util.scm new file mode 100644 index 00000000..a371227f --- /dev/null +++ b/module/srfi/srfi-64/util.scm @@ -0,0 +1,11 @@ +(define-module (srfi srfi-64 util) + :use-module (ice-9 curried-definitions) + :use-module ((srfi srfi-1) :select (every)) + :use-module (srfi srfi-64) + :export (test-match-group)) + +;; Specifier for name of group +(define ((test-match-group name . names) runner) + (every string=? + (reverse (cons name names)) + (test-runner-group-stack runner))) |