diff options
-rw-r--r-- | doc/ref/guile/util-path.texi | 5 | ||||
-rw-r--r-- | doc/ref/guile/util.texi | 21 | ||||
-rw-r--r-- | module/c/cpp.scm | 95 | ||||
-rw-r--r-- | module/c/eval.scm | 265 | ||||
-rw-r--r-- | module/c/eval/environment.scm | 34 | ||||
-rw-r--r-- | module/c/lex.scm | 52 | ||||
-rw-r--r-- | module/c/operators.scm | 3 | ||||
-rw-r--r-- | module/c/parse.scm | 411 | ||||
-rw-r--r-- | module/hnh/util.scm | 25 | ||||
-rw-r--r-- | module/hnh/util/path.scm | 3 | ||||
-rw-r--r-- | module/srfi/srfi-64/util.scm | 11 | ||||
-rwxr-xr-x | tests/run-tests.scm | 74 | ||||
-rw-r--r-- | tests/test/c-parse.scm | 69 | ||||
-rw-r--r-- | tests/test/cpp.scm | 603 | ||||
-rw-r--r-- | tests/test/util.scm | 19 |
15 files changed, 1519 insertions, 171 deletions
diff --git a/doc/ref/guile/util-path.texi b/doc/ref/guile/util-path.texi index 322c50ec..2a53ba91 100644 --- a/doc/ref/guile/util-path.texi +++ b/doc/ref/guile/util-path.texi @@ -3,7 +3,10 @@ Provided by the module @code{(hnh util path)}. -See also @code{absolute-file-name?} from Guile. + +@defun path-absolute? string +Alias of @code{absolute-file-name?} from Guile. +@end defun @defun path-append strings ... Joins all strings into a path, squeezing duplicated delimiters, but diff --git a/doc/ref/guile/util.texi b/doc/ref/guile/util.texi index 32df5fce..222b59c5 100644 --- a/doc/ref/guile/util.texi +++ b/doc/ref/guile/util.texi @@ -198,6 +198,27 @@ Split a list into sub-lists on @var{element} @end lisp @end defun +@defun split-by-one-of lst items +Like split-by, but takes a list of delimiters. +Returns a list where the first element is everything before the first +delimiter, and the remaining elements is the splitting delimiter +consed with everything until the next delimiter. + +@lisp +(split-by-one-of '() '(+))) +⇒ (()) + +(split-by-one-of '(1 + 2) '(/)) +⇒ ((1 + 2)) + +(split-by-one-of '(1 + 2 - 3) '(+ -)) +⇒ ((1) (+ 2) (- 3)) + +(split-by-one-of '(1 + 2 * 3 + 4) '(*)) +⇒ ((1 + 2) (* 3 + 4)) +@end lisp +@end defun + @defun span-upto count predicate list Simar to span from srfi-1, but never takes more than diff --git a/module/c/cpp.scm b/module/c/cpp.scm index a2935352..aed496f2 100644 --- a/module/c/cpp.scm +++ b/module/c/cpp.scm @@ -5,47 +5,37 @@ :use-module (ice-9 match) :use-module (ice-9 regex) :use-module ((rnrs io ports) :select (call-with-port)) + :use-module ((rnrs bytevectors) :select (bytevector?)) :use-module (ice-9 format) :use-module ((hnh util io) :select (read-lines)) :use-module (hnh util graph) :use-module (c lex) :use-module (c parse) :use-module (c operators) - :export (do-funcall replace-symbols include#) + :export (replace-symbols include#) ) ;; input "#define F(x, y) x + y" -;; 1 full define | F(x, y) +;; 1 full define | F(x,y) ;; 2 macro name | F -;; 3 macro args | (x, y) -;; 4 macro body | x + y -(define define-re (make-regexp "^#define ((\\w+)(\\([^)]*\\))?) (.*)")) +;; 3 macro args | (x,y) +;; 5 macro body | x + y or #f +(define define-re (make-regexp "^#define ((\\w+)([(][^)]*[)])?)( (.*))?")) (define (tokenize-define-line header-line) (aif (regexp-exec define-re header-line) (cons (match:substring it 1) - (match:substring it 4)) + (let ((body (match:substring it 5))) + (if (or (eqv? body #f) + (string-null? body)) + "1" body))) (scm-error 'c-parse-error "tokenize-define-line" "Line dosen't match: ~s" (list header-line) #f))) -(define (do-funcall function arguments) - (if (list? arguments) - (apply function arguments) - (function arguments))) - -(define symb-map - `((,(symbol #\|) . logior) - (funcall . (@ (c cpp) do-funcall)) - (&& . and) - (& . logand) - (== . =) - (!= . (negate =)) - )) - (define (replace-symbols tree dict) (if (not (list? tree)) (or (assoc-ref dict tree) tree) @@ -55,12 +45,27 @@ ;; Direct values. Lisp also has quoted symbols in this group. (define (immediate? x) (or (number? x) - (char? x) - (string? x))) + (bytevector? x))) +;; TODO replace this with something sensible +;; like a correct list extracted from (c eval) +;; and not thinging that types are variables ;; built in symbols. Should never be marked as dependencies (define (primitive? x) - (memv x (cons 'funcall binary-operators))) + (memv x `( + ;; language primitives + sizeof + + ;; special forms introduced by parser + funcall ternary struct-type as-type + + ;; unary operatons which aren't also binary operators + ++ -- ! ~ + not compl dereference pointer + pre-increment pre-decrement + post-increment post-decrement + ,@binary-operators + ))) @@ -77,7 +82,6 @@ [arg (list arg)])) (define right (f (cdr pair))) - (define alt-right (replace-symbols right symb-map)) (define dependencies (lset-difference eq? @@ -91,12 +95,12 @@ dependencies (match left [('funcall name ('#{,}# args ...)) - (cons name `(lambda ,args ,alt-right))] + (cons name `(lambda ,args ,right))] [('funcall name arg) - (cons name `(lambda (,arg) ,alt-right))] + (cons name `(lambda (,arg) ,right))] - [name (cons name alt-right)]))) + [name (cons name right)]))) (define (parse-cpp-file lines) @@ -104,7 +108,9 @@ (catch #t (lambda () (parse-cpp-define line)) (lambda (err caller fmt args data) - (format #t "~a ~?~%" fmt args) + (format #t "~a in ~a: ~?~%" + err caller fmt args) + (format #t "~s~%" line) #f))) lines)) @@ -114,29 +120,32 @@ (define (tokenize-header-file header-file) (map tokenize-define-line (call-with-port - (open-input-pipe - (string-append "cpp -dM " header-file)) + (open-pipe* OPEN_READ "cpp" "-dM" header-file) read-lines))) -(define-macro (include# header-file . args) - - (define define-form (if (null? args) 'define (car args))) - - (define lines (remove (compose private-c-symbol? car) - (tokenize-header-file header-file))) +(define (load-cpp-file header-file) + (define lines (tokenize-header-file header-file)) (define forms (parse-cpp-file lines)) - (define graph* - (fold (lambda (node graph) - (add-node graph (cdr node) (car node))) - (make-graph car) - (filter identity forms))) + (fold (lambda (node graph) + (add-node graph (cdr node) (car node))) + (make-graph car) + (filter identity forms))) +(define (include% header-file) + (define graph* (load-cpp-file header-file)) ;; Hack for termios since this symbol isn't defined. ;; (including in the above removed private c symbols) - (define graph (add-node graph* (cons '_POSIX_VDISABLE #f) '())) + (define graph (add-node graph* (cons '_POSIX_VDISABLE 0) '())) + ;; TODO expand bodies + ;; (remove (compose private-c-symbol? car)) + (resolve-dependency-graph graph)) + +(define-macro (include# header-file . args) + + (define define-form (if (null? args) 'define (car args))) `(begin ,@(map (lambda (pair) `(,define-form ,(car pair) ,(cdr pair))) - (resolve-dependency-graph graph)))) + (include% header-file)))) diff --git a/module/c/eval.scm b/module/c/eval.scm new file mode 100644 index 00000000..67d0075d --- /dev/null +++ b/module/c/eval.scm @@ -0,0 +1,265 @@ +(define-module (c eval) + :use-module (hnh util) + :use-module (srfi srfi-1) + :use-module (srfi srfi-71) + :use-module (srfi srfi-88) + :use-module (ice-9 match) + :use-module (ice-9 curried-definitions) + :use-module ((rnrs bytevectors) + :select (bytevector?)) + :use-module ((rnrs arithmetic bitwise) + :select (bitwise-not + bitwise-and + bitwise-ior + bitwise-xor + bitwise-arithmetic-shift-left + bitwise-arithmetic-shift-right)) + :use-module (c eval environment) + :export (c-procedure? + procedure-formals + procedure-body + procedure-arity + + c-eval + )) + +(define C-TRUE 1) +(define C-FALSE 0) + +(define (boolean->c-boolean bool) + (if bool C-TRUE C-FALSE)) + +(define (c-boolean->boolean bool) + (not (zero? bool))) + +(define (c-not b) + (-> b c-boolean->boolean not boolean->c-boolean)) + +(define (c-procedure? expr) + (and (list? expr) + (not (null? expr)) + (eq? 'lambda (car expr)))) + +(define* (ensure-c-procedure expr optional: calling-procedure) + (unless (c-procedure? expr) + (scm-error 'c-eval-error calling-procedure + "Value not a procedure: ~s" + (list procedure #f)))) + +(define (procedure-formals procedure) + (ensure-c-procedure procedure "procedure-formals") + (list-ref procedure 1)) + +(define (procedure-body procedure) + (ensure-c-procedure procedure "procedure-body") + (list-ref procedure 2)) + +(define (procedure-arity procedure) + (length (procedure-formals procedure))) + +(define (literal? expression) + (or (number? expression) + (bytevector? expression))) + + + +;; Internal helper procedures + +(define (mod-set operation) + (lambda (env slot value) + ;; a += b + ;; a = a + b + (c-eval env `(= ,slot (,operation ,slot ,value))))) + +(define (fold-2 proc init lst) + (car+cdr + (fold (lambda (arg env+done) + (let ((env* arg* (proc (car env+done) arg))) + (cons* env* arg* (cdr env+done)))) + init + lst))) + +;; TODO this disregards +;; - floating point convertions +;; - integer truncation +(define ((simple-infix operation) env . operands) + (let ((env done (fold-2 c-eval (cons env '()) operands))) + (values env (apply operation (reverse done))))) + +(define ((binary-operator proc) env i c) + (let ((env* i* (c-eval env i))) + (let ((env** c* (c-eval env* c))) + (values env** (proc i* c*))))) + + + + +;; The order of evaluation for a number of these is undefined, meaning +;; that any side effects without sequence points is undefined. +;; However, for many of these I do the sensible thing and evaluate them +;; from left to right, instead of ignoring all side effects. + +;; TODO double check these with a C standard + + +;; Operators have their own namespace. They are called without funcall +;; in the pseudo-lisp which C is compiled to, and they expand more like +;; lisp macros, since its up to each operator what to do with its operands. +;; This to allow assignment and short circuting. +(define primitives + `((and . ,(lambda (env . operands) + (let loop ((env env) (operands operands)) + (if (null? operands) + (values env C-TRUE) + (let* ((env* result (c-eval env (car operands)))) + (if (c-boolean->boolean result) + (loop env* (cdr operands)) + (values env* result))))))) + (or . ,(lambda (env . operands) + (let loop ((env env) (operands operands)) + (if (null? operands) + (values env C-FALSE) + (let* ((env* result (c-eval env (car operands)))) + (if (false? result) + (values env* result) + (loop env* (cdr operands)))))))) + (= . ,(lambda (env slot value) + ;; TOOD if slot isn't a variable, but a field (or array index) + ;; then it needs to be resolved... + (let ((env* result (c-eval env value))) + (values (env-set! env* slot result) + result)))) + (and_eq ,(mod-set 'bitand)) ; &= + (or_eq ,(mod-set 'bitor)) ; |= + (xor_eq ,(mod-set 'xor)) ; ^= + (+= ,(mod-set '+)) + (-= ,(mod-set '-)) + (*= ,(mod-set '*)) + (/= ,(mod-set '/)) + (<<= ,(mod-set '<<)) + (>>= ,(mod-set '>>)) + (%= ,(mod-set '%)) + (+ . ,(simple-infix +)) + (* . ,(simple-infix *)) + (/ . ,(simple-infix /)) + (- . ,(lambda (env op . operands) + (if (null? operands) + (let ((env* value (c-eval env op))) + (values env* (- value))) + (apply (simple-infix -) + env op operands)))) + (bitor . ,(simple-infix bitwise-ior)) + (bitand . ,(simple-infix bitwise-and)) + (xor . ,(simple-infix bitwise-xor)) + (not_eq . ,(lambda (env a b) (c-eval env `(not (== ,a ,b))))) ; != + (<< . ,(binary-operator bitwise-arithmetic-shift-left)) + (>> . ,(binary-operator bitwise-arithmetic-shift-right)) + (< . ,(binary-operator (compose boolean->c-boolean <))) + (> . ,(binary-operator (compose boolean->c-boolean >))) + ;; this assumes that = handles pointers + (== . ,(binary-operator (compose boolean->c-boolean =))) + (<= . ,(binary-operator (compose boolean->c-boolean <=))) + (>= . ,(binary-operator (compose boolean->c-boolean >=))) + (% . ,(binary-operator modulo)) + + (not . ,(lambda (env value) + (let ((env* result (c-eval env value))) + (values env* (c-not result))))) + (compl . ,(lambda (env value) + (let ((env* result (c-eval env value))) + (values env* (bitwise-not result))))) + + ;; ++C + (pre-increment . ,(lambda (env slot) (c-eval env `(+= ,slot 1)))) + (pre-decrement . ,(lambda (env slot) (c-eval env `(-= ,slot 1)))) + ;; TODO these (C++, C--) need to handle if slot isn't a direct variabl + (post-increment . ,(lambda (env slot) + (let ((value (env-ref env slot))) + (values (env-set! env slot (1+ value)) + value)))) + (pre-decrement . ,(lambda (env slot) + (let ((value (env-ref env slot))) + (values (env-set! env slot (1+ value)) + value)))) + + (ternary . ,(lambda (env test true-clause false-clause) + (let ((env* value (c-eval env test))) + (c-eval env* + (if (c-boolean->boolean value) + true-clause false-clause))))) + + ;; TODO remaining operations + (as-type . ,(lambda (env target-type value) + (format (current-error-port) "cast<~s>(~s)~%" target-type value) + (values env value))) + + (object-slot . ,(lambda (env object slot) + (scm-error 'not-implemented "object-slot" + "Object slots are not implemented, when accessing ~s.~s" + (list object slot) #f))) + (dereference-slot ,(lambda (env ptr slot) + (scm-error 'not-implemented "dereference-slot" + "Object slots are not implemented, when accessing ~s->~s" + (list object slot) #f))) + (dereference . ,(lambda (env ptr) + (scm-error 'not-implemented "dereference" + "Poiner dereferencing is not implemented: *~s" + (list ptr) #f))) + (pointer . ,(lambda (env value) + (scm-error 'not-implemented "pointer" + "Pointer of is not implemented: &~s" + (list value) #f))))) + +;; TODO |,| + + +(define (c-eval environment expression) + (match expression + (('lambda formals body) (values environment `(lambda ,formals ,body))) + ;; hack since sizeof really should be a operator + (('funcall 'sizeof arg) + ;; TODO + (format (current-error-port) "sizeof ~s~%" arg) + (values environment 1)) + + (('funcall procedure-name ('#{,}# args ...)) + (let ((procedure (env-ref environment procedure-name))) + (ensure-c-procedure procedure "c-eval") + (unless (= (length args) (procedure-arity procedure)) + (scm-error 'c-eval-error "c-eval" + "Procedure arity mismatch for ~s, expected ~s, got ~s" + (list procedure-name + (procedure-arity procedure) + (length args)) + #f)) + (let ((env args* (fold-2 c-eval (cons environment '()) args ))) + (let ((inner-environment + (fold (lambda (name value env) (env-set! env name value)) + (push-frame! env) + (procedure-formals procedure) args*))) + (let ((resulting-environment + result-value + (c-eval inner-environment (procedure-body procedure)))) + (values (pop-frame! resulting-environment) + result-value)))))) + + (('funcall procedure arg) + (c-eval environment `(funcall ,procedure (#{,}# ,arg)))) + + ((operator operands ...) + (apply (or (assoc-ref primitives operator) + (scm-error 'c-eval-error "c-eval" + "Applying non-existant primitive operator: ~s, operands: ~s" + (list operator operands) #f)) + environment operands)) + + ;; "f()" gets compiled to simply f + ;; meaning that we instead use the environment to determine + ;; if something is a variable or procedure + (expr + (if (literal? expr) + (values environment expr) + (let ((value (env-ref environment expr))) + (if (c-procedure? value) + (c-eval environment `(funcall ,value (#{,}#))) + (values environment value))))))) diff --git a/module/c/eval/environment.scm b/module/c/eval/environment.scm new file mode 100644 index 00000000..12eefaf7 --- /dev/null +++ b/module/c/eval/environment.scm @@ -0,0 +1,34 @@ +(define-module (c eval environment) + :use-module (srfi srfi-1) + :export (make-environment + env-set! env-ref push-frame! pop-frame!)) + +(define (make-frame) + (make-hash-table)) + +(define (make-environment) + (list (make-frame))) + +;; Returns an updated environment, linear update +(define (env-set! env key value) + ;; get handle to differentiate #f + ;; (even though #f should never be stored since it's not a C value) + (cond ((find (lambda (frame) (hashq-get-handle frame key)) env) + => (lambda (frame) (hashq-set! frame key value))) + (else (hashq-set! (car env) key value))) + env) + +(define (env-ref env key) + (cond ((null? env) + (scm-error 'misc-error "env-ref" + "~s unbound" + (list key) + #f)) + ((hashq-get-handle (car env) key) => cdr) + (else (env-ref (cdr env) key)))) + +(define (push-frame! environment) + (cons (make-frame) environment)) + +(define (pop-frame! environment) + (cdr environment)) diff --git a/module/c/lex.scm b/module/c/lex.scm index 34e52d88..0bde5c9e 100644 --- a/module/c/lex.scm +++ b/module/c/lex.scm @@ -43,8 +43,23 @@ (define-peg-pattern integer all (and (or base-8 base-16 base-10) (? integer-suffix))) +(define-peg-pattern float-suffix all + (* (or "f" "F" "l" "L"))) + +(define-peg-pattern exponent all + (and (ignore (or "e" "E")) (? (or "+" "-")) base-10)) + +;; Helper patterns for creating named groups in float +(define-peg-pattern float-integer all base-10) +(define-peg-pattern float-decimal all base-10) + +(define-peg-pattern float all + (or (and float-integer exponent (? float-suffix)) + (and (? float-integer) (ignore ".") float-decimal (? exponent) (? float-suffix)) + (and float-integer (ignore ".") (? exponent) (? float-suffix)))) + (define-peg-pattern number body - (or integer)) + (or float integer)) (define-peg-pattern group all (and (ignore "(") expr (ignore ")"))) @@ -65,11 +80,16 @@ (define-peg-pattern char all (and (ignore "'") (or escaped-char peg-any) (ignore "'"))) +(define-peg-pattern quot none "\"") + +(define-peg-pattern string all + (and quot (* (or escaped-char (and (not-followed-by "\"") peg-any))) quot)) (define-peg-pattern* operator all `(or ,@(map symbol->string symbol-binary-operators) ,@(map (lambda (op) `(and ,(symbol->string op) ws)) - wordy-binary-operators))) + wordy-binary-operators) + "?" ":")) ;; whitespace (define-peg-pattern ws none @@ -89,17 +109,23 @@ base-10-digit)))) (define-peg-pattern prefix-operator all - (or "!" "~" "*" "&" "++" "--" "+" "-")) + ;; It's important that ++ and -- are BEFORE + and - + ;; otherwise the first + is found, leaving the second +, which fails + ;; to lex since it's an invalid token + ;; TODO sizeof can be written as a prefix operator + ;; (without parenthesis) if the operand is an expression. + (or "*" "&" "++" "--" + "!" "~" "+" "-")) + ;;; Note that stacked pre or postfix operators without parenthesis ;;; dosen't work. So `*&C' is invalid, while `*(&C)' is valid. (define-peg-pattern prefix all - (and prefix-operator sp (or variable group funcall #; postfix - ))) + (and prefix-operator sp (or variable group funcall literal))) (define-peg-pattern postfix-operator all - (or "++" "--")) + (or "++" "--" "*")) (define-peg-pattern postfix all ;; literals can't be in-place incremented and decremented @@ -111,15 +137,25 @@ ;; first case is "same" as expr, but in different order to prevent ;; infinite self reference. Pre and postfix not here, solved by having ;; them before infix in expr - (and (or funcall postfix prefix group char number variable) + (and (or funcall postfix prefix group literal variable) sp operator sp expr)) (define-peg-pattern funcall all (and variable sp group)) +(define-peg-pattern literal body + (or char string number)) + ;;; main parser (define-peg-pattern expr body - (+ (and sp (or infix postfix prefix funcall group char number variable) + (+ (and sp (or + ;; float must be BEFORE infix, otherwise 3.2 is parsed as (infix 3 (operator ".") 2) + ;; that however breaks the infix logic, meaning that floating point numbers can't be + ;; used in basic arithmetic. + ;; TODO remove all implicit order of operations handling in the lexer, and move it to + ;; the parser. This should also fix the case of typecasts being applied incorrectly. + float + infix postfix prefix funcall group literal variable) sp))) diff --git a/module/c/operators.scm b/module/c/operators.scm index ab1b3e7c..910dc8a9 100644 --- a/module/c/operators.scm +++ b/module/c/operators.scm @@ -9,8 +9,9 @@ `(+ - * / & ,(symbol #\|) ^ << >> % < > =)) ;; apparently part of C +;; https://en.cppreference.com/w/cpp/language/operator_alternative (define wordy-binary-operators - '(bitand and_eq and bitor or_eq or xor_eq xor)) + '(bitand and_eq and bitor or_eq or xor_eq xor not_eq)) (define symbol-binary-operators (append (map (lambda (x) (symbol-append x '=)) simple-operators) diff --git a/module/c/parse.scm b/module/c/parse.scm index 8030da77..7d11ea17 100644 --- a/module/c/parse.scm +++ b/module/c/parse.scm @@ -1,11 +1,14 @@ (define-module (c parse) :use-module (hnh util) :use-module (srfi srfi-1) + :use-module (srfi srfi-71) :use-module (ice-9 match) + :use-module ((rnrs io ports) + :select (string->bytevector make-transcoder utf-8-codec)) + :use-module (rnrs bytevectors) :export (parse-lexeme-tree)) -;;; Rename this -(define (perms set) +(define (permutations set) (concatenate (map (lambda (key) (map (lambda (o) (cons key o)) @@ -21,23 +24,124 @@ (define valid-sequences (delete 'dummy (lset-union eq? '(dummy) - (map symbol-concat (perms '(() U L))) - (map symbol-concat (perms '(() U LL)))))) + (map symbol-concat (permutations '(() U L))) + (map symbol-concat (permutations '(() U LL)))))) ;; => (LLU ULL LL LU UL L U) (aif (memv (string->symbol (string-upcase str)) valid-sequences) (case (car it) - [(LLU ULL) '(unsigned long-long)] + [(LLU ULL) '(unsigned long long)] [(LU UL) '(unsigned long)] - [(LL) '(long-long)] + [(LL) '(long long)] [(L) '(long)] [(U) '(unsigned)]) (scm-error 'c-parse-error "parse-integer-suffix" "Invalid integer suffix ~s" (list str) #f))) +(define (parse-float-suffix str) + (case (string->symbol str) + ((f F) '(float)) + ((l L) '(long double)))) + +(define (group-body->type vars) + (concatenate + (map + (match-lambda (('variable var) (list (parse-lexeme-tree `(variable ,var)))) + (('postfix ('variable var) + ('postfix-operator "*")) + (list (parse-lexeme-tree `(variable ,var)) + '*)) + (else (scm-error 'c-parse-error "parse-lexeme-tree" + "Invalid token ~s in typecast form: ~s" + (list else vars) #f))) + vars))) + +;; Takes a list of strings and integers, and merges it all into a single +;; bytevector representing a c string +(define* (string-fragments->c-string + fragments optional: (transcoder (make-transcoder (utf-8-codec)))) + + (define fragments-fixed + (map (lambda (frag) + (if (string? frag) + (string->bytevector frag transcoder) + frag)) + fragments)) + + (define bv-length + (fold (lambda (item sum) (+ sum (if (bytevector? item) + (bytevector-length item) + 1))) + 0 fragments-fixed)) + + (define bv (make-bytevector (1+ bv-length))) + ;; trailing null byte + (bytevector-u8-set! bv bv-length 0) + (fold (lambda (item idx) + (cond ((bytevector? item) + (bytevector-copy! item 0 + bv idx + (bytevector-length item)) + (+ idx (bytevector-length item))) + (else (bytevector-u8-set! bv idx item) + (+ idx 1)))) + 0 + fragments-fixed) + bv) + +(define (parse-float-form float-form) + (let ((float-string + (fold (lambda (arg str) + (string-append + str + (match arg + (('float-integer ('base-10 n)) n) + (('float-decimal ('base-10 n)) (string-append "." n)) + (('exponent "+" ('base-10 n)) (string-append "e" n)) + (('exponent ('base-10 n)) (string-append "e" n)) + (('exponent "-" ('base-10 n)) (string-append "e-" n))))) + "" float-form))) + ;; exact->inexact is a no-op if we already have an inexact number, but + ;; ensures we get an inexact number when we have an exact number (which we + ;; can get from the "1." case). Returning an inexact number here is important + ;; to avoid arithmetic suprises later. + (exact->inexact + (or (string->number float-string) + (scm-error 'c-parse-error "parse-lexeme-tree" + "Couldn't parse expression as float: ~s" + (list `(float ,@args)) #f))))) + + +(define (resolve-escaped-char form) + (match form + (('base-8-char n) (string->number n 8)) + (('base-16-char n) (string->number n 16)) + (c (char->integer + (case (string-ref c 0) + ((#\a) #\alarm) + ((#\b) #\backspace) + ((#\e) #\esc) ;; non-standard + ((#\f) #\page) + ((#\n) #\newline) + ((#\r) #\return) + ((#\t) #\tab) + ((#\v) #\vtab) + ((#\\) #\\) + ;; These are valid in both strings and chars + ((#\') #\') + ((#\") #\")))))) + +;; Takes a list of strings and escaped-char form +;; and returns a list of strings and integers +(define (resolve-string-fragment fragment) + (match fragment + (('escaped-char c) + (resolve-escaped-char c)) + (fargment fragment))) + (define (parse-lexeme-tree tree) (match tree ['() '()] @@ -50,55 +154,67 @@ [('integer n ('integer-suffix suffix)) `(as-type ,(parse-integer-suffix suffix) - ,(parse-lexeme-tree n)) - ] + ,(parse-lexeme-tree n))] + [('integer n) (parse-lexeme-tree n)] + + [('float args ... ('float-suffix suffix)) + `(as-type ,(parse-float-suffix suffix) + ;; parse rest of float as if it lacked a suffix + ,(parse-lexeme-tree `(float ,@args)))] + + [('float args ...) (parse-float-form args)] + ;; Character literals, stored as raw integers ;; so mathematical operations keep working on them. - [('char ('escaped-char ('base-8-char n))) - (-> n (string->number 8) #; integer->char)] - [('char ('escaped-char ('base-16-char n))) - (-> n (string->number 16) #; integer->char)] - [('char ('escaped-char c)) - (char->integer - (case (string-ref c 0) - ((#\a) #\alarm) - ((#\b) #\backspace) - ((#\e) #\esc) - ((#\f) #\page) - ((#\n) #\newline) - ((#\r) #\return) - ((#\t) #\tab) - ((#\v) #\vtab) - ((#\\) #\\) - ((#\') #\')))] + [('char ('escaped-char c)) (resolve-escaped-char c)] + [('char c) (char->integer (string-ref c 0))] [('variable var) (string->symbol var)] + + ;; normalize some binary operators to their wordy equivalent + ;; (which also happens to match better with scheme) + [('operator "&&") 'and] + [('operator "&=") 'and_eq] + [('operator "&") 'bitand] + [('operator "|") 'bitor] + [('operator "!=") 'not_eq] + [('operator "||") 'or] + [('operator "|=") 'or_eq] + [('operator "^") 'xor] + [('operator "^=") 'xor_eq] + ;; Change these names to something scheme can handle better + [('operator ".") 'object-slot] + [('operator "->") 'dereference-slot] [('operator op) (string->symbol op)] + [('prefix-operator op) (case (string->symbol op) + ((!) 'not) + ((~) 'compl) ((*) 'dereference) ((&) 'pointer) ((++) 'pre-increment) ((--) 'pre-decrement) - (else => identity))] + ((-) '-) + (else (scm-error 'c-parse-error "parse-lexeme-tree" + "Unknown prefix operator ~s" + (list op) #f)))] [('postfix-operator op) (case (string->symbol op) [(++) 'post-increment] [(--) 'post-decrement] - [else => identity])] + [else (scm-error 'c-parse-error "parse-lexeme-tree" + "Unknown postfix operator ~s" + (list op) #f)])] ;; Parenthesis grouping - [('group args) + [('group args ...) (parse-lexeme-tree args)] - ;; Atomic item. Used by flatten-infix - [('atom body) - (parse-lexeme-tree body)] - [('prefix op arg) `(,(parse-lexeme-tree op) ,(parse-lexeme-tree arg))] @@ -107,81 +223,204 @@ `(,(parse-lexeme-tree op) ,(parse-lexeme-tree arg))] + + + + + ;; resolved-operator and ternary are the return "types" + ;; of resolve-order-of-operations + [(('resolved-operator op) args ...) + `(,op ,@(map parse-lexeme-tree args))] + + [('ternary a b c) + `(ternary ,(parse-lexeme-tree a) + ,(parse-lexeme-tree b) + ,(parse-lexeme-tree c))] + + + + + ;; Is it OK for literal strings to be "stored" inline? + ;; Or must they be a pointer? + ['string #vu8(0)] + [('string str ...) + (-> (map resolve-string-fragment str) + string-fragments->c-string)] + + ;; implicit concatenation of string literals + [(('string str ...) ...) + (-> (map resolve-string-fragment (concatenate str)) + string-fragments->c-string)] + [('infix args ...) - (resolve-order-of-operations - (flatten-infix (cons 'infix args)))] + (let ((r (resolve-order-of-operations + (flatten-infix (cons 'infix args))))) + (parse-lexeme-tree r))] + [('funcall function ('group arguments)) `(funcall ,(parse-lexeme-tree function) ,(parse-lexeme-tree arguments))] - [bare (scm-error 'c-parse-error - "parse-lexeme-tree" - "Naked literal in lex-tree: ~s" - (list bare) - #f)])) + [(('variable "struct") ('variable value) ..1) + `(struct-type ,@(map string->symbol value)) + ] + + ;; A list of variables. Most likely a type signature + ;; [(('variable value) ..1) + ;; ] + + ;; A typecast with only variables must (?) be a typecast? + [(('group groups ..1) ... value) + (fold-right (lambda (type done) `(as-type ,type ,done)) + (parse-lexeme-tree value) + (map group-body->type groups))] + + ;; Type name resolution? + ;; https://en.wikipedia.org/wiki/C_data_types + ;; base types with spaces: + ;; ======================= + ;; [[un]signed] char + ;; [[un]signed] short [int] + ;; [[un]signed] int + ;; [un]signed [int] + ;; [[un]signed] long [int] + ;; [[un]signed] long long [int] + ;; float + ;; [long] double + + ;; https://en.wikipedia.org/wiki/Type_qualifier + ;; qualifiers + ;; const + ;; volatile + ;; restrict + ;; _Atomic + + + ;; Storage specifiers + ;; auto + ;; register + ;; static + ;; extern + + ;; struct <typename> + ;; enum <typename> + ;; union <typename> + + ;; https://en.wikipedia.org/wiki/C_syntax + ;; int (*ptr_to_array)[100] + + + [(? symbol? bare) + (scm-error 'c-parse-error + "parse-lexeme-tree" + "Naked literal in lex-tree: ~s" + (list bare) + #f)] + + [form + (scm-error 'c-parse-error + "parse-lexeme-tree" + "Unknown form in lex-tree: ~s" + (list form) #f) + ])) ;; https://en.wikipedia.org/wiki/Operators_in_C_and_C%2B%2B +;; https://en.cppreference.com/w/c/language/operator_precedence (define order-of-operations (reverse - (concatenate - ;; This is only for binary operations - `((-> ,(symbol #\.)) - (* / %) - (+ -) - (<< >>) - (< <= > >=) - (== !=) - (&) - (^) - (,(symbol #\|)) - (&&) - (,(symbol #\| #\|)) - (= += -= *= /= %= <<= >>= &= ^= ,(symbol #\| #\=)) - (,(symbol #\,)) - )))) - -(define (mark-other form) - (if (list? form) (cons '*other* form) form)) + ;; This is only for binary operations + `((-> ,(symbol #\.)) + ;; All unary procedures go here, incnluding typecasts, and sizeof + (* / %) + (+ -) + (<< >>) + (< <= > >=) + (== != not_eq) + (& bitand) + (^ xorg) + (,(symbol #\|) bitor) + (&& and) + (,(symbol #\| #\|) or) + (? :) + (= += -= *= /= %= <<= >>= &= ^= ,(symbol #\| #\=) + and_eq or_eq xor_eq) + (,(symbol #\,)) + ))) + +;; a.b->c.d (. (-> (. a b) c) d) +;; 2 * 3 / 4 * 5 => (* (/ (* 2 3) 4) 5) +;; eller => (* 2 (/ 3 4) 5) (define* (resolve-order-of-operations tree optional: (order order-of-operations)) (if (null? order) - (car tree) + (scm-error 'c-parse-error + "resolve-order-of-operations" + "Out of operations to resolve when resolving expression ~s" + (list tree) #f) (match tree - [('*other* body ...) body] - [(form) (resolve-order-of-operations form order)] - [(forms ...) - (match (split-by forms (car order)) - [(group) (resolve-order-of-operations group (cdr order))] - [groups - (cons (car order) - (map (lambda (form) (resolve-order-of-operations - form order-of-operations)) - groups))])] - [a a]))) + [('fixed-infix form) form] + [('fixed-infix forms ...) + (match (split-by-one-of forms (car order)) + [(group) + (resolve-order-of-operations (cons 'fixed-infix group) + (cdr order))] + [(a ('? b ...) (': c ...)) + `(ternary ,(resolve-order-of-operations (cons 'fixed-infix a) (cdr order)) + ,(resolve-order-of-operations (cons 'fixed-infix b) (cdr order)) + ,(resolve-order-of-operations (cons 'fixed-infix c) (cdr order)))] + [(first rest ...) + ;; TODO this is only valid for the associative operators (+, ...) + ;; but not some other (<, ...) + (if (apply eq? (map car rest)) + (let ((op (caar rest))) + `((resolved-operator ,op) + ,@(map (lambda (x) (resolve-order-of-operations (cons 'fixed-infix x) + (cdr order))) + (cons first (map cdr rest))))) + (fold (lambda (item done) + (let ((operator args (car+cdr item))) + `((resolved-operator ,operator) + ,done ,(resolve-order-of-operations + (cons 'fixed-infix args) + (cdr order))))) + (resolve-order-of-operations (cons 'fixed-infix first) + (cdr order)) + rest))])]))) + +;; 1 * 2 / 3 * 4 +;; ⇒ ((1) (* 2) (/ 3) (* 4)) +;; (1) +;; (* (1) 2) +;; (/ (* (1) 2) 3) +;; (* (/ (* (1) 2) 3) 4) ;; Flatens a tree of infix triples. Stops when it should. ;; (parenthesis, function calls, ...) (define (flatten-infix form) - (match form - [('infix left op ('infix right ...)) - (cons* (parse-lexeme-tree left) - (parse-lexeme-tree op) - (flatten-infix (cons 'infix right)))] - - [('infix left op right) - (list (mark-other (parse-lexeme-tree left)) - (parse-lexeme-tree op) - (mark-other (parse-lexeme-tree right)))] - - [other (scm-error 'c-parse-error - "flatten-infix" - "Not an infix tree ~a" - (list other) - #f)])) + (cons 'fixed-infix + (let loop ((form form)) + (match form + [('infix left op ('infix right ...)) + (cons* left + (parse-lexeme-tree op) + (loop (cons 'infix right)))] + + [('infix left op right) + (list left + (parse-lexeme-tree op) + right)] + + [('infix form) form] + + [other (scm-error 'c-parse-error + "flatten-infix" + "Not an infix tree ~a" + (list other) + #f)])))) diff --git a/module/hnh/util.scm b/module/hnh/util.scm index d2c0dd5f..9a45704b 100644 --- a/module/hnh/util.scm +++ b/module/hnh/util.scm @@ -34,6 +34,8 @@ group-by split-by + split-by-one-of + span-upto cross-product @@ -341,6 +343,29 @@ (cdr rem))]))) +(define (split-by-one-of lst items) + (cond ((null? items) + (scm-error 'wrong-type-arg "split-by-one-of" + "Must have at least one item to split by, when splitting ~s" + (cons items '()) #f)) + ((not (list? items)) + (scm-error 'wrong-type-arg "split-by-one-of" + "Items must be list of list of symbols, got ~s" + (list items) #f)) + (else + (call-with-values + (lambda () + (car+cdr + (let loop ((token 'sentinel-token) (lst lst)) + (let ((head tail (break (lambda (item) (memv item items)) + lst))) + (let ((group (cons token head))) + (if (null? tail) + (list group) + (cons group (loop (car tail) (cdr tail))))))))) + ;; Remove the sentinel token + (lambda (first rest) (cons (cdr first) rest)))))) + ;; Simar to span from srfi-1, but never takes more than ;; @var{count} items. Can however still take less. diff --git a/module/hnh/util/path.scm b/module/hnh/util/path.scm index ea081e85..0c8af48a 100644 --- a/module/hnh/util/path.scm +++ b/module/hnh/util/path.scm @@ -3,6 +3,7 @@ :use-module (srfi srfi-71) :use-module (hnh util) :export (path-append + path-absolute? path-join path-split file-hidden? @@ -12,6 +13,8 @@ (define // file-name-separator-string) (define /? file-name-separator?) +(define path-absolute? absolute-file-name?) + (define (path-append . strings) (fold (lambda (s done) (string-append diff --git a/module/srfi/srfi-64/util.scm b/module/srfi/srfi-64/util.scm new file mode 100644 index 00000000..a371227f --- /dev/null +++ b/module/srfi/srfi-64/util.scm @@ -0,0 +1,11 @@ +(define-module (srfi srfi-64 util) + :use-module (ice-9 curried-definitions) + :use-module ((srfi srfi-1) :select (every)) + :use-module (srfi srfi-64) + :export (test-match-group)) + +;; Specifier for name of group +(define ((test-match-group name . names) runner) + (every string=? + (reverse (cons name names)) + (test-runner-group-stack runner))) diff --git a/tests/run-tests.scm b/tests/run-tests.scm index 5270636e..3955a6a2 100755 --- a/tests/run-tests.scm +++ b/tests/run-tests.scm @@ -27,6 +27,7 @@ fi (ice-9 pretty-print) (ice-9 getopt-long) (ice-9 match) + (ice-9 regex) (system vm coverage) ((all-modules) :select (fs-find)) ) @@ -54,6 +55,24 @@ fi (define (make-indent depth) (make-string (* 2 depth) #\space)) +(define (string-replace-head s1 s2) + (string-replace s1 s2 + 0 (string-length s2))) + +(define (pp form indent prefix-1) + (let ((prefix (make-string (+ (string-length indent) + (string-length prefix-1)) + #\space))) + (display + (string-replace-head + (with-output-to-string + (lambda () (pretty-print + form + per-line-prefix: prefix + width: (- 79 (string-length indent))))) + (string-append indent prefix-1))))) + + (define (construct-test-runner) (define runner (test-runner-null)) (define depth 0) @@ -75,7 +94,10 @@ fi (cond ((test-runner-test-name runner) (negate string-null?) => identity) ((test-result-ref runner 'expected-value) - => (lambda (p) (with-output-to-string (lambda () (display (bold "[SOURCE]: ")) (truncated-print p width: 60)))))))) + => (lambda (p) (with-output-to-string + (lambda () + (display (bold "[SOURCE]: ")) + (truncated-print p width: 60)))))))) (when (eq? 'fail (test-result-kind)) (cond ((test-result-ref runner 'actual-error) => (lambda (err) @@ -94,12 +116,12 @@ fi (unknown-actual (gensym))) (let ((expected (test-result-ref runner 'expected-value unknown-expected)) (actual (test-result-ref runner 'actual-value unknown-actual))) - (if (eq? expected unknown-expected) - (format #t "~aAssertion failed, received ~s~%" - (make-indent (1+ depth)) actual) - (format #t "~aExpected: ~s~%~aReceived: ~s~%" - (make-indent (1+ depth)) expected - (make-indent (1+ depth)) actual)))))) + (let ((indent (make-indent (1+ depth)))) + (if (eq? expected unknown-expected) + (format #t "~aAssertion failed~%" indent) + (begin + (pp expected indent "Expected: ") + (pp actual indent "Received: ")))))))) (format #t "~aNear ~a:~a~%" (make-indent (1+ depth)) (test-result-ref runner 'source-file) @@ -203,9 +225,6 @@ fi ;; (format #t "Running on:~%~y~%" files) -(awhen (option-ref options 'only #f) - (set! files (list (path-append "test" it)))) - ((@ (hnh util exceptions) warnings-are-errors) #t) @@ -240,9 +259,38 @@ fi (test-begin "suite") -(awhen (option-ref options 'skip #f) - (format #t "Skipping ~s~%" it) - (test-skip it)) + +(define onlies + (let %loop ((args (command-line)) (onlies '())) + (define* (loop args key: only) + (if only + (%loop args (cons only onlies)) + (%loop args onlies))) + (if (null? args) + onlies + (cond ((string-match "^--skip(=.*)?$" (car args)) + => (lambda (m) + (cond ((match:substring m 1) + => (lambda (s) + (format #t "Skipping ~s~%" s) + (test-skip s) + (loop (cdr args)))) + (else (format #t "Skipping ~s~%" (cadr args)) + (test-skip (cadr args)) + (loop (cddr args)))))) + ((string-match "^--only(=.*)?$" (car args)) + => (lambda (m) + (cond ((match:substring m 1) + => (lambda (s) + (loop (cdr args) only: s))) + (else (loop (cddr args) only: (cadr args)))))) + (else (loop (cdr args))))))) + +(unless (null? onlies) + (set! files + (map (lambda (x) (path-append "test" x)) + ;; reverse only until I have built a dependency graph for tests + (reverse onlies)))) (finalizer (lambda () (for-each (lambda (f) (catch/print-trace (lambda () (test-group f (load f))))) files))) diff --git a/tests/test/c-parse.scm b/tests/test/c-parse.scm new file mode 100644 index 00000000..c16958de --- /dev/null +++ b/tests/test/c-parse.scm @@ -0,0 +1,69 @@ +;;; Commentary +;; Test implementation details of C parser +;; TODO Should be ran before (test cpp) +;;; Code + +(define-module (test cpp) + :use-module (srfi srfi-64) + :use-module (srfi srfi-88) + :use-module ((c lex) :select (lex)) + :use-module (c parse)) + +(define flatten-infix (@@ (c parse) flatten-infix)) +(define resolve-order-of-operations (@@ (c parse) resolve-order-of-operations)) + +(test-group "Flatten infix" + (test-equal "Simple binary operator" + '(fixed-infix (integer (base-10 "1")) + + + (integer (base-10 "2"))) + (flatten-infix (lex "1 + 2"))) + + (test-equal "Simple binary operator, with compound structure in on branch" + '(fixed-infix (integer (base-10 "1")) + + + (funcall (variable "f") + (group (integer (base-10 "2"))))) + (flatten-infix (lex "1 + f(2)")))) + +(test-group "Order of operations" + (test-equal "Basic binary operator" + '((resolved-operator +) + (integer (base-10 "1")) + (integer (base-10 "2"))) + (resolve-order-of-operations (flatten-infix (lex "1 + 2")))) + + (test-equal "Multiple operators, with non-left-associative application" + '((resolved-operator +) + (integer (base-10 "1")) + ((resolved-operator *) + (integer (base-10 "2")) + (integer (base-10 "3")))) + (resolve-order-of-operations (flatten-infix (lex "1 + 2 * 3")))) + + (test-equal "Multiple of the same operation gets clumed together" + '((resolved-operator +) + (integer (base-10 "1")) + (integer (base-10 "2")) + (integer (base-10 "3"))) + (resolve-order-of-operations (flatten-infix (lex "1 + 2 + 3")))) + + (test-equal "Simple Ternary" + '(ternary + (integer (base-10 "1")) + (integer (base-10 "2")) + (integer (base-10 "3"))) + (resolve-order-of-operations (flatten-infix (lex "1 ? 2 : 3")))) + + (test-equal "ternary with further infix operators" + '(ternary ((resolved-operator +) + (integer (base-10 "1")) + (integer (base-10 "2"))) + ((resolved-operator %) + (integer (base-10 "3")) + (integer (base-10 "4"))) + ((resolved-operator *) + (integer (base-10 "5")) + (integer (base-10 "6")))) + (resolve-order-of-operations (flatten-infix (lex "1 + 2? 3 % 4 : 5 * 6"))))) + diff --git a/tests/test/cpp.scm b/tests/test/cpp.scm index 9c720fde..1294bc96 100644 --- a/tests/test/cpp.scm +++ b/tests/test/cpp.scm @@ -3,37 +3,602 @@ ;;; Code: (define-module (test cpp) + :use-module (srfi srfi-1) :use-module (srfi srfi-64) :use-module (srfi srfi-88) :use-module ((c lex) :select (lex)) - :use-module ((c parse) :select (parse-lexeme-tree))) + :use-module ((c parse) :select (parse-lexeme-tree)) + :use-module ((c eval) :select (c-eval)) + :use-module ((c eval environment) :select (make-environment env-set!)) + :use-module ((rnrs arithmetic bitwise) + :select (bitwise-xor))) + +;; Note that the lexer's output isn't stable. +;; The tests here are more to see where the lexer succeeds but the parser fails. +;; So changing the lexer test cases isn't a problem +;; but don't change the parser test cases + +;; __asm__ always has strings as arguments +(test-skip "__asm__") + +;; Lexer produces garbage when attempted. Fixing this would also fix cast +;; operations. +(test-skip "Float in infix expression") +;; order of operation is wrong, leading to an incorrect result +(test-skip "Cast with operation") + +;; not implemented +(test-skip "Token concatenation") + +;; A string follewed by a macro (which expands to a string) +;; should be concatenated. This is however not yet implemented +(test-skip "Implicit concatenation of string and macro") (define run (compose parse-lexeme-tree lex)) -(test-equal - '(+ (post-increment (dereference C)) 3) - (run "(*C)++ + 3")) +(define (alist->environment alist) + (fold (lambda (pair env) + (apply env-set! env pair)) + (make-environment) + alist)) + +(define (exec form . base-bindings) + (call-with-values + (lambda () (c-eval (alist->environment base-bindings) + (run form))) + (lambda (env value) value))) + +(define-syntax let-group + (syntax-rules () + ((let ((form name) rest ...) body ...) + (test-group name + (let ((form name) + rest ...) + body ...))))) + +(let-group + ((form "(*C)++ + 3")) + (test-equal '(infix (postfix (group (prefix (prefix-operator "*") + (variable "C"))) + (postfix-operator "++")) + (operator "+") + (integer (base-10 "3"))) + (lex form)) + (test-equal '(+ (post-increment (dereference C)) 3) + (run form))) + +(let-group + ((form "*C++ + 3")) + (test-equal '(infix (postfix (prefix (prefix-operator "*") + (variable "C")) + (postfix-operator "++")) + (operator "+") + (integer (base-10 "3"))) + (lex form)) + (test-equal '(+ (post-increment (dereference C)) 3) + (run form))) + +(let-group + ((form "*C++")) + (test-equal '(postfix (prefix (prefix-operator "*") + (variable "C")) + (postfix-operator "++")) + (lex form)) + (test-equal '(post-increment (dereference C)) + (run form))) + +(let-group + ((form "C++ + C++")) + (test-equal '(infix (postfix (variable "C") + (postfix-operator "++")) + (operator "+") + (postfix (variable "C") + (postfix-operator "++"))) + (lex form)) + (test-equal '(+ (post-increment C) (post-increment C)) + (run form))) + +(let-group + ((form "++C + ++C")) + (test-equal '(infix (prefix (prefix-operator "++") + (variable "C")) + (operator "+") + (prefix (prefix-operator "++") + (variable "C"))) + (lex form)) + (test-equal '(+ (pre-increment C) (pre-increment C)) + (run form))) + +(let-group + ((form "2 + 2 * 2")) + (test-equal '(infix (integer (base-10 "2")) + (operator "+") + (infix (integer (base-10 "2")) + (operator "*") + (integer (base-10 "2")))) + (lex form)) + (test-equal '(+ 2 (* 2 2)) (run form)) + (test-equal 6 (exec form))) + +(let-group + ((form "2 * 2 + 2")) + (test-equal '(infix (integer (base-10 "2")) + (operator "*") + (infix (integer (base-10 "2")) + (operator "+") + (integer (base-10 "2")))) + (lex form)) + (test-equal '(+ (* 2 2) 2) (run form)) + (test-equal 6 (exec form))) + +(let-group + ((form "2+2+2")) + (test-equal '(infix (integer (base-10 "2")) + (operator "+") + (infix (integer (base-10 "2")) + (operator "+") + (integer (base-10 "2")))) (lex form)) + (test-equal '(+ 2 2 2) (run form)) + (test-equal 6 (exec form))) + +(test-group "Unary minus" + (test-group "Without space" + (let ((form "-1")) + (test-equal '(prefix (prefix-operator "-") + (integer (base-10 "1"))) + (lex form)) + (test-equal '(- 1) (run form)) + (test-equal -1 (exec form)))) + + (test-group "With space" + (let ((form "- 1")) + (test-equal '(prefix (prefix-operator "-") + (integer (base-10 "1"))) + (lex form)) + (test-equal '(- 1) (run form)) + (test-equal -1 (exec form)))) + + (test-group "Before variable" + (let ((form "-x")) + (test-equal '(prefix (prefix-operator "-") + (variable "x")) + (lex form)) + (test-equal '(- x) (run form)) + (test-equal -5 (exec form '(x 5))))) + + (test-group "Before infix" + (let ((form "-x+3")) + (test-equal '(infix (prefix (prefix-operator "-") + (variable "x")) + (operator "+") + (integer (base-10 "3"))) + (lex form)) + (test-equal '(+ (- x) 3) (run form)) + (test-equal -2 (exec form '(x 5))))) + + (test-group "Inside infix expression" + (let ((form "x+-3")) + (test-equal '(infix (variable "x") + (operator "+") + (prefix (prefix-operator "-") + (integer (base-10 "3")))) + (lex form)) + (test-equal '(+ x (- 3)) (run form)) + (test-equal 2 (exec form '(x 5))))) + ) + + + + +;; Hand picked forms from output of `cpp -dM /usr/include/termios.h` on +;; FreeBSD 13.1-RELEASE releng/13.1-n250148-fc952ac2212 GENERIC amd64 +;; 2022-06-28 + +(let ((form "00000200")) + (test-equal '(integer (base-8 "0000200")) (lex form)) + (test-equal 128 (run form))) + +(let ((form "0")) + (test-equal '(integer (base-10 "0")) (lex form)) + (test-equal 0 (run form))) + +(let ((form "1000000U")) + (test-equal '(integer (base-10 "1000000") (integer-suffix "U")) (lex form)) + (test-equal '(as-type (unsigned) 1000000) (run form)) + (test-equal 1000000 (exec form))) + + +(let ((form "0x10c")) + (test-equal '(integer (base-16 "10c")) (lex form)) + (test-equal 268 (run form))) + +;; Lexer keeps original case, handled later by parser +(let ((form "0X10C")) + (test-equal '(integer (base-16 "10C")) (lex form)) + (test-equal 268 (run form))) + +(let ((form "a != b")) + (test-equal '(infix (variable "a") + (operator "!=") + (variable "b")) + (lex form)) + (test-equal '(not_eq a b) (run form)) + (test-equal 1 (exec form '(a 1) '(b 2))) + (test-equal 0 (exec form '(a 1) '(b 1))) + ) + +(let ((form "((c) == (val) && (val) != _POSIX_VDISABLE)")) + ;; (test-equal '() (lex form)) + (test-equal '(and (== c val) + (not_eq val _POSIX_VDISABLE)) + (run form)) + (test-equal 0 (exec form '(c 1) '(val 2) '(_POSIX_VDISABLE 3))) + ) + +(let ((form "CTRL('O')")) + (test-equal '(funcall (variable "CTRL") (group (char "O"))) (lex form)) + (test-equal '(funcall CTRL 79) (run form)) + (test-equal (bitwise-xor #x40 (char->integer #\O)) + (exec form + ;; Definition copied from our parsers output of + ;; preprocessing output as defined above + '(CTRL (lambda (x) + (ternary (and (>= x 97) (<= x 122)) + (+ (- x 97) 1) + (bitand (+ (- x 65) 1) 127))))))) + +(let ((form "CREPRINT")) + (test-equal '(variable "CREPRINT") (lex form)) + (test-equal 'CREPRINT (run form))) + +(let ((form "(CCTS_OFLOW | CRTS_IFLOW)")) + (test-equal '(group (infix (variable "CCTS_OFLOW") + (operator "|") + (variable "CRTS_IFLOW"))) + (lex form)) + (test-equal '(bitor CCTS_OFLOW CRTS_IFLOW) (run form))) + +;; ((x) >= 'a' && (x) <= 'z' +;; ? ((x) - 'a' + 1) +;; : (((x) - 'a' + 1) & 0x7f)) +(let ((form "((x) >= 'a' && (x) <= 'z' ? ((x) - 'a' + 1) : (((x) - 'a' + 1) & 0x7f))")) + ;; (test-equal '() (lex form)) + (test-equal '(ternary + (and (>= x #x61) + (<= x #x7A)) + (+ (- x #x61) 1) + (bitand (+ (- x #x61) 1) 127)) + (run form))) + +(let ((form "((x) & ~(IOCPARM_MASK << 16))")) + ;; (test-equal '() (lex form)) + (test-equal '(bitand x (compl (<< IOCPARM_MASK 16))) (run form))) + +(let ((form "(((x) >> 8) & 0xff)")) + ;; (test-equal '() (lex form)) + (test-equal '(bitand (>> x 8) 255) (run form))) + +(let ((form "(((x) >> 16) & IOCPARM_MASK)")) + ;; (test-equal '() (lex form)) + (test-equal '(bitand (>> x 16) IOCPARM_MASK) (run form))) + +(let ((form "((1 << IOCPARM_SHIFT) - 1)")) + ;; (test-equal '() (lex form)) + (test-equal '(- (<< 1 IOCPARM_SHIFT) 1) (run form))) + +(let ((form "_IO('t', 120)")) + (test-equal '(funcall + (variable "_IO") + (group (infix (char "t") + (operator ",") + (integer (base-10 "120"))))) + (lex form)) + (test-equal '(funcall _IO (#{,}# 116 120)) (run form))) + +;; note the lone type +(let ((form "_IOW('t', 98, int)")) + ;; (test-equal '() (lex form)) + (test-equal '(funcall _IOW (#{,}# 116 98 int)) + (run form))) + +;; note the multi-word type +(let ((form "_IOR('t', 19, struct termios)")) + ;; (test-equal '() (lex form)) + (test-equal '(funcall _IOR (#{,}# 116 19 (struct-type termios))) (run form))) + + +;; TODO concatenation rules +;; #define __CONCAT(x,y) __CONCAT1(x,y) +;; #define __CONCAT1(x,y) x ## y +;; #define __CONSTANT_CFSTRINGS__ 1 +;; #define __COPYRIGHT(s) __IDSTRING(__CONCAT(__copyright_,__LINE__),s) + +(test-group "Token concatenation" + (let ((form "x ## y")) + (test-equal '() (lex form)) + (test-equal '0 (run form)))) + +(test-group "Floating point numbers" + + (test-group "Diffent forms" + (test-group "No decimal point, exponent, no suffix" + (let ((form "10e10")) + (test-equal '(float (float-integer (base-10 "10")) + (exponent (base-10 "10"))) + (lex form)) + (test-equal 10e10 (run form)))) + + (test-group "No decimal point, negative exponent" + (let ((form "10e-10")) + (test-equal '(float (float-integer (base-10 "10")) + (exponent "-" (base-10 "10"))) + (lex form)) + (test-equal 10e-10 (run form)))) + + (test-group "No decimal point, exponent and suffix" + (let ((form "10e10L")) + (test-equal '(float (float-integer (base-10 "10")) + (exponent (base-10 "10")) + (float-suffix "L")) + (lex form)) + (test-equal '(as-type (long double) 10e10) + (run form)))) + + (test-group "Leading period, no exponent or suffix" + (let ((form ".1")) + (test-equal '(float (float-decimal (base-10 "1"))) (lex form)) + (test-equal 0.1 (run form)))) + + (test-group "Trailing period, no exponent or suffix" + (let ((form "1.")) + (test-equal '(float (float-integer (base-10 "1"))) (lex form)) + (test-equal 1.0 (run form))))) + + + (test-group "Negative float" + (let ((form "-1.0")) + (test-equal '(prefix (prefix-operator "-") + (float (float-integer (base-10 "1")) + (float-decimal (base-10 "0")))) + (lex form)) + (test-equal '(- 1.0) (run form)))) + + + + (test-group "Real world examples" + (let ((form "4.9406564584124654e-324")) + (test-equal '(float (float-integer (base-10 "4")) + (float-decimal (base-10 "9406564584124654")) + (exponent "-" (base-10 "324"))) + (lex form)) + (test-equal 4.9406564584124654e-324 (run form))) + + (let ((form "1.7976931348623157e+308")) + (test-equal '(float (float-integer (base-10 "1")) + (float-decimal (base-10 "7976931348623157")) + (exponent "+" (base-10 "308"))) + (lex form)) + (test-equal 1.7976931348623157e+308 (run form)))) + + (test-group "Float in infix expression" + (test-group "Simple case" + (let ((form "1. + .1")) + (test-equal '(infix (float (float-integer (base-10 "1"))) + (operator "+") + (float (float-decimal (base-10 "1")))) + (lex form)) + (test-equal '(+ 1.0 0.1) (run form)))) + ;; (test-group "Complicated case") + )) + +(test-group "Typecasts" + + (let ((form "(unsigned) 5")) + (test-equal '((group (variable "unsigned")) + (integer (base-10 "5"))) + (lex form)) + (test-equal '(as-type (unsigned) 5) + (run form))) + + (let ((form "(unsigned integer) 5")) + (test-equal '((group (variable "unsigned") + (variable "integer")) + (integer (base-10 "5"))) + (lex form)) + (test-equal '(as-type (unsigned integer) 5) (run form))) + + (test-group "Pointer with space" + (let ((form "(int *) 5")) + (test-equal '((group (postfix (variable "int") + (postfix-operator "*"))) + (integer (base-10 "5"))) + (lex form)) + (test-equal '(as-type (int *) 5) + (run form)))) + + (test-group "Pointer without space" + (let ((form "(int*) 5")) + (test-equal '((group (postfix (variable "int") + (postfix-operator "*"))) + (integer (base-10 "5"))) + (lex form)) + (test-equal '(as-type (int *) 5) + (run form)))) + + (test-group "Multi word type pointer" + (let ((form "(unsigned int*) 5")) + (test-equal '((group (variable "unsigned") + (postfix (variable "int") + (postfix-operator "*"))) + (integer (base-10 "5"))) + (lex form)) + (test-equal '(as-type (unsigned int *) 5) + (run form)))) + + (test-group "Double cast" + (let ((form "(int)(unsigned) 5")) + (test-equal '((group (variable "int")) + (group (variable "unsigned")) + (integer (base-10 "5"))) (lex form)) + (test-equal '(as-type (int) (as-type (unsigned) 5)) + (run form)))) + + (test-group "Cast with operation" + (let ((form "(int) 5 + 7")) + (test-equal '((group (variable "int")) + (infix (integer (base-10 "5")) + (operator "+") + (integer (base-10 "7")))) + (lex form)) + + (test-equal '(+ (as-type (int) 5) 7) + (run form)))) + + + + (test-group "Tripple cast, with value inside paranthesis" + (let ((form "(type)(__uintptr_t)(const void *)(var)")) + (test-equal '((group (variable "type")) + (group (variable "__uintptr_t")) + (group (variable "const") + (postfix (variable "void") + (postfix-operator "*"))) + (group (variable "var"))) + (lex form)) + (test-equal '(as-type (type) + (as-type (__uintptr_t) + (as-type (const void *) + var))) + (run form)))) + + (test-group "Same as above, but whole thing inside parenthesis" + (let ((form "((type)(__uintptr_t)(const void *)(var))")) + (test-equal '(group (group (variable "type")) + (group (variable "__uintptr_t")) + (group (variable "const") + (postfix (variable "void") + (postfix-operator "*"))) + (group (variable "var"))) + (lex form)) + (test-equal '(as-type (type) + (as-type (__uintptr_t) + (as-type (const void *) + var))) + (run form)))) + + (let ((form "((type)(__uintptr_t)(const volatile void *)(var))")) + ;; (test-equal '() (lex form)) + (test-equal '(as-type (type) + (as-type (__uintptr_t) + (as-type (const volatile void *) + var))) + (run form))) + + (let ((form "((unsigned long) ((inout) | (((len) & IOCPARM_MASK) << 16) | ((group) << 8) | (num)))")) + (test-equal '(group (group (variable "unsigned") (variable "long")) + (group (infix (group (variable "inout")) + (operator "|") + (infix (group (infix (group (infix (group (variable "len")) + (operator "&") + (variable "IOCPARM_MASK"))) + (operator "<<") + (integer (base-10 "16")))) + (operator "|") + (infix (group (infix (group (variable "group")) + (operator "<<") + (integer (base-10 "8")))) + (operator "|") + (group (variable "num"))))))) + (lex form)) + (test-equal '(as-type (unsigned long) + (bitor inout + (<< (bitand len IOCPARM_MASK) 16) + (<< group 8) + num)) + (run form)))) + +(test-group "Characters" + (let ((form "'c'")) + (test-equal '(char "c") (lex form)) + (test-equal #x63 (run form))) + + (let ((form "'\\n'")) + (test-equal '(char (escaped-char "n")) (lex form)) + (test-equal (char->integer #\newline) (run form)))) + +(test-group "Strings" + (test-group "Empty string" + (let ((form "\"\"")) + (test-equal 'string (lex form)) + (test-equal #vu8(0) (run form)))) + + (test-group "Simple string" + (let ((form "\"li\"")) + (test-equal '(string "li") (lex form)) + (test-equal #vu8(#x6C #x69 0) (run form)))) + + (test-group "Implicit concatenation of strings" + (let ((form "\"a\" \"b\"")) + (test-equal '((string "a") + (string "b")) + (lex form)) + (test-equal #vu8(#x61 #x62 0) + (run form)))) -(test-equal - '(+ (post-increment (dereference C)) 3) - (run "*C++ + 3")) + (test-group "Implicit concatenation of string and macro" + (let ((form "\"a\" MACRO")) + (test-equal '((string "a") (variable "MACRO")) (lex form)) + (test-equal '() (run form)))) -(test-equal - '(post-increment (dereference C)) - (run "*C++")) + (test-group "String with only escape" + (let ((form (string #\" #\\ #\" #\"))) + (test-equal `(string (escaped-char "\"")) (lex form)) + (test-equal #vu8(34 0) (run form)))) -(test-equal - '(+ (post-increment C) (post-increment C)) - (run "C++ + C++")) + (test-group "String with escape at start" + (let ((form (string #\" #\\ #\" #\a #\"))) + (test-equal `(string (escaped-char "\"") "a") (lex form)) + (test-equal #vu8(34 #x61 0) (run form)))) -(test-equal - '(+ (pre-increment C) (pre-increment C)) - (run "++C + ++C")) + (test-group "String with escape at end" + (let ((form (string #\" #\a #\\ #\" #\"))) + (test-equal `(string "a" (escaped-char "\"")) (lex form)) + (test-equal #vu8(#x61 34 0) (run form)))) -(test-equal '(+ 2 (* 2 2)) (run "2 + 2 * 2")) + (test-group "String with escape in middle" + (let ((form (string #\" #\a #\\ #\" #\b #\"))) + (test-equal `(string "a" (escaped-char "\"") "b") (lex form)) + (test-equal #vu8(#x61 34 #x62 0) (run form)))) -(test-equal '(+ (* 2 2) 2) (run "2 * 2 + 2")) + ;; \e is semi non-standard + (test-group "String with bakslash-e esacpe" + (let ((form "\"\\e\"")) + (test-equal '(string (escaped-char "e")) (lex form)) + (test-equal #vu8(#x1b 0) (run form)))) -(test-equal '(+ 2 2 2) (run "2+2+2")) + (test-group "String with byte secquence escape" + (let ((form "\"\\xf0\\x9f\\x92\\xa9\"")) + (test-equal '(string (escaped-char (base-16-char "f0")) + (escaped-char (base-16-char "9f")) + (escaped-char (base-16-char "92")) + (escaped-char (base-16-char "a9"))) + (lex form)) + (test-equal #vu8(#xf0 #x9f #x92 #xa9 0) (run form))))) +(test-group "__asm__" + (let ((form "__asm__(\".globl \" __XSTRING(sym))")) + (test-equal '() (lex form)) + ;; TODO implicit string concatenation + (test-equal '(funcall __asm__ + (string ".globl ") + (funcall __XSTRING sym)) (run form)))) +(let ((form "__attribute__((__aligned__(x)))")) + (test-equal '(funcall (variable "__attribute__") + (group (group (funcall (variable "__aligned__") + (group (variable "x")))))) + (lex form)) + ;; This drops the extra set of parenthesis. Do we care? + (test-equal '(funcall __attribute__ + (funcall __aligned__ x)) + (run form))) diff --git a/tests/test/util.scm b/tests/test/util.scm index 1de96a37..aa37d20c 100644 --- a/tests/test/util.scm +++ b/tests/test/util.scm @@ -187,6 +187,25 @@ (test-error 'wrong-type-arg (find-extreme '())) +;; TODO group-by +;; TODO split-by + +(test-group "Split-by-one-of" + + (test-equal "Empty input" + '(()) (split-by-one-of '() '(+))) + + (test-equal "No matching tokens" + '((1 + 2)) (split-by-one-of '(1 + 2) '(/))) + + (test-equal "Matching tokens" + '((1) (+ 2) (- 3)) + (split-by-one-of '(1 + 2 - 3) '(+ -))) + + (test-equal "Maching tokens, multiple values in each group" + '((1 + 2) (* 3 + 4)) + (split-by-one-of '(1 + 2 * 3 + 4) '(*)))) + (call-with-values (lambda () (span-upto |