aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--doc/ref/guile/util-path.texi5
-rw-r--r--doc/ref/guile/util.texi21
-rw-r--r--module/c/cpp.scm95
-rw-r--r--module/c/eval.scm265
-rw-r--r--module/c/eval/environment.scm34
-rw-r--r--module/c/lex.scm52
-rw-r--r--module/c/operators.scm3
-rw-r--r--module/c/parse.scm411
-rw-r--r--module/hnh/util.scm25
-rw-r--r--module/hnh/util/path.scm3
-rw-r--r--module/srfi/srfi-64/util.scm11
-rwxr-xr-xtests/run-tests.scm74
-rw-r--r--tests/test/c-parse.scm69
-rw-r--r--tests/test/cpp.scm603
-rw-r--r--tests/test/util.scm19
15 files changed, 1519 insertions, 171 deletions
diff --git a/doc/ref/guile/util-path.texi b/doc/ref/guile/util-path.texi
index 322c50ec..2a53ba91 100644
--- a/doc/ref/guile/util-path.texi
+++ b/doc/ref/guile/util-path.texi
@@ -3,7 +3,10 @@
Provided by the module @code{(hnh util path)}.
-See also @code{absolute-file-name?} from Guile.
+
+@defun path-absolute? string
+Alias of @code{absolute-file-name?} from Guile.
+@end defun
@defun path-append strings ...
Joins all strings into a path, squeezing duplicated delimiters, but
diff --git a/doc/ref/guile/util.texi b/doc/ref/guile/util.texi
index 32df5fce..222b59c5 100644
--- a/doc/ref/guile/util.texi
+++ b/doc/ref/guile/util.texi
@@ -198,6 +198,27 @@ Split a list into sub-lists on @var{element}
@end lisp
@end defun
+@defun split-by-one-of lst items
+Like split-by, but takes a list of delimiters.
+Returns a list where the first element is everything before the first
+delimiter, and the remaining elements is the splitting delimiter
+consed with everything until the next delimiter.
+
+@lisp
+(split-by-one-of '() '(+)))
+⇒ (())
+
+(split-by-one-of '(1 + 2) '(/))
+⇒ ((1 + 2))
+
+(split-by-one-of '(1 + 2 - 3) '(+ -))
+⇒ ((1) (+ 2) (- 3))
+
+(split-by-one-of '(1 + 2 * 3 + 4) '(*))
+⇒ ((1 + 2) (* 3 + 4))
+@end lisp
+@end defun
+
@defun span-upto count predicate list
Simar to span from srfi-1, but never takes more than
diff --git a/module/c/cpp.scm b/module/c/cpp.scm
index a2935352..aed496f2 100644
--- a/module/c/cpp.scm
+++ b/module/c/cpp.scm
@@ -5,47 +5,37 @@
:use-module (ice-9 match)
:use-module (ice-9 regex)
:use-module ((rnrs io ports) :select (call-with-port))
+ :use-module ((rnrs bytevectors) :select (bytevector?))
:use-module (ice-9 format)
:use-module ((hnh util io) :select (read-lines))
:use-module (hnh util graph)
:use-module (c lex)
:use-module (c parse)
:use-module (c operators)
- :export (do-funcall replace-symbols include#)
+ :export (replace-symbols include#)
)
;; input "#define F(x, y) x + y"
-;; 1 full define | F(x, y)
+;; 1 full define | F(x,y)
;; 2 macro name | F
-;; 3 macro args | (x, y)
-;; 4 macro body | x + y
-(define define-re (make-regexp "^#define ((\\w+)(\\([^)]*\\))?) (.*)"))
+;; 3 macro args | (x,y)
+;; 5 macro body | x + y or #f
+(define define-re (make-regexp "^#define ((\\w+)([(][^)]*[)])?)( (.*))?"))
(define (tokenize-define-line header-line)
(aif (regexp-exec define-re header-line)
(cons (match:substring it 1)
- (match:substring it 4))
+ (let ((body (match:substring it 5)))
+ (if (or (eqv? body #f)
+ (string-null? body))
+ "1" body)))
(scm-error 'c-parse-error
"tokenize-define-line"
"Line dosen't match: ~s"
(list header-line) #f)))
-(define (do-funcall function arguments)
- (if (list? arguments)
- (apply function arguments)
- (function arguments)))
-
-(define symb-map
- `((,(symbol #\|) . logior)
- (funcall . (@ (c cpp) do-funcall))
- (&& . and)
- (& . logand)
- (== . =)
- (!= . (negate =))
- ))
-
(define (replace-symbols tree dict)
(if (not (list? tree))
(or (assoc-ref dict tree) tree)
@@ -55,12 +45,27 @@
;; Direct values. Lisp also has quoted symbols in this group.
(define (immediate? x)
(or (number? x)
- (char? x)
- (string? x)))
+ (bytevector? x)))
+;; TODO replace this with something sensible
+;; like a correct list extracted from (c eval)
+;; and not thinging that types are variables
;; built in symbols. Should never be marked as dependencies
(define (primitive? x)
- (memv x (cons 'funcall binary-operators)))
+ (memv x `(
+ ;; language primitives
+ sizeof
+
+ ;; special forms introduced by parser
+ funcall ternary struct-type as-type
+
+ ;; unary operatons which aren't also binary operators
+ ++ -- ! ~
+ not compl dereference pointer
+ pre-increment pre-decrement
+ post-increment post-decrement
+ ,@binary-operators
+ )))
@@ -77,7 +82,6 @@
[arg (list arg)]))
(define right (f (cdr pair)))
- (define alt-right (replace-symbols right symb-map))
(define dependencies
(lset-difference
eq?
@@ -91,12 +95,12 @@
dependencies
(match left
[('funcall name ('#{,}# args ...))
- (cons name `(lambda ,args ,alt-right))]
+ (cons name `(lambda ,args ,right))]
[('funcall name arg)
- (cons name `(lambda (,arg) ,alt-right))]
+ (cons name `(lambda (,arg) ,right))]
- [name (cons name alt-right)])))
+ [name (cons name right)])))
(define (parse-cpp-file lines)
@@ -104,7 +108,9 @@
(catch #t
(lambda () (parse-cpp-define line))
(lambda (err caller fmt args data)
- (format #t "~a ~?~%" fmt args)
+ (format #t "~a in ~a: ~?~%"
+ err caller fmt args)
+ (format #t "~s~%" line)
#f)))
lines))
@@ -114,29 +120,32 @@
(define (tokenize-header-file header-file)
(map tokenize-define-line
(call-with-port
- (open-input-pipe
- (string-append "cpp -dM " header-file))
+ (open-pipe* OPEN_READ "cpp" "-dM" header-file)
read-lines)))
-(define-macro (include# header-file . args)
-
- (define define-form (if (null? args) 'define (car args)))
-
- (define lines (remove (compose private-c-symbol? car)
- (tokenize-header-file header-file)))
+(define (load-cpp-file header-file)
+ (define lines (tokenize-header-file header-file))
(define forms (parse-cpp-file lines))
- (define graph*
- (fold (lambda (node graph)
- (add-node graph (cdr node) (car node)))
- (make-graph car)
- (filter identity forms)))
+ (fold (lambda (node graph)
+ (add-node graph (cdr node) (car node)))
+ (make-graph car)
+ (filter identity forms)))
+(define (include% header-file)
+ (define graph* (load-cpp-file header-file))
;; Hack for termios since this symbol isn't defined.
;; (including in the above removed private c symbols)
- (define graph (add-node graph* (cons '_POSIX_VDISABLE #f) '()))
+ (define graph (add-node graph* (cons '_POSIX_VDISABLE 0) '()))
+ ;; TODO expand bodies
+ ;; (remove (compose private-c-symbol? car))
+ (resolve-dependency-graph graph))
+
+(define-macro (include# header-file . args)
+
+ (define define-form (if (null? args) 'define (car args)))
`(begin
,@(map (lambda (pair) `(,define-form ,(car pair) ,(cdr pair)))
- (resolve-dependency-graph graph))))
+ (include% header-file))))
diff --git a/module/c/eval.scm b/module/c/eval.scm
new file mode 100644
index 00000000..67d0075d
--- /dev/null
+++ b/module/c/eval.scm
@@ -0,0 +1,265 @@
+(define-module (c eval)
+ :use-module (hnh util)
+ :use-module (srfi srfi-1)
+ :use-module (srfi srfi-71)
+ :use-module (srfi srfi-88)
+ :use-module (ice-9 match)
+ :use-module (ice-9 curried-definitions)
+ :use-module ((rnrs bytevectors)
+ :select (bytevector?))
+ :use-module ((rnrs arithmetic bitwise)
+ :select (bitwise-not
+ bitwise-and
+ bitwise-ior
+ bitwise-xor
+ bitwise-arithmetic-shift-left
+ bitwise-arithmetic-shift-right))
+ :use-module (c eval environment)
+ :export (c-procedure?
+ procedure-formals
+ procedure-body
+ procedure-arity
+
+ c-eval
+ ))
+
+(define C-TRUE 1)
+(define C-FALSE 0)
+
+(define (boolean->c-boolean bool)
+ (if bool C-TRUE C-FALSE))
+
+(define (c-boolean->boolean bool)
+ (not (zero? bool)))
+
+(define (c-not b)
+ (-> b c-boolean->boolean not boolean->c-boolean))
+
+(define (c-procedure? expr)
+ (and (list? expr)
+ (not (null? expr))
+ (eq? 'lambda (car expr))))
+
+(define* (ensure-c-procedure expr optional: calling-procedure)
+ (unless (c-procedure? expr)
+ (scm-error 'c-eval-error calling-procedure
+ "Value not a procedure: ~s"
+ (list procedure #f))))
+
+(define (procedure-formals procedure)
+ (ensure-c-procedure procedure "procedure-formals")
+ (list-ref procedure 1))
+
+(define (procedure-body procedure)
+ (ensure-c-procedure procedure "procedure-body")
+ (list-ref procedure 2))
+
+(define (procedure-arity procedure)
+ (length (procedure-formals procedure)))
+
+(define (literal? expression)
+ (or (number? expression)
+ (bytevector? expression)))
+
+
+
+;; Internal helper procedures
+
+(define (mod-set operation)
+ (lambda (env slot value)
+ ;; a += b
+ ;; a = a + b
+ (c-eval env `(= ,slot (,operation ,slot ,value)))))
+
+(define (fold-2 proc init lst)
+ (car+cdr
+ (fold (lambda (arg env+done)
+ (let ((env* arg* (proc (car env+done) arg)))
+ (cons* env* arg* (cdr env+done))))
+ init
+ lst)))
+
+;; TODO this disregards
+;; - floating point convertions
+;; - integer truncation
+(define ((simple-infix operation) env . operands)
+ (let ((env done (fold-2 c-eval (cons env '()) operands)))
+ (values env (apply operation (reverse done)))))
+
+(define ((binary-operator proc) env i c)
+ (let ((env* i* (c-eval env i)))
+ (let ((env** c* (c-eval env* c)))
+ (values env** (proc i* c*)))))
+
+
+
+
+;; The order of evaluation for a number of these is undefined, meaning
+;; that any side effects without sequence points is undefined.
+;; However, for many of these I do the sensible thing and evaluate them
+;; from left to right, instead of ignoring all side effects.
+
+;; TODO double check these with a C standard
+
+
+;; Operators have their own namespace. They are called without funcall
+;; in the pseudo-lisp which C is compiled to, and they expand more like
+;; lisp macros, since its up to each operator what to do with its operands.
+;; This to allow assignment and short circuting.
+(define primitives
+ `((and . ,(lambda (env . operands)
+ (let loop ((env env) (operands operands))
+ (if (null? operands)
+ (values env C-TRUE)
+ (let* ((env* result (c-eval env (car operands))))
+ (if (c-boolean->boolean result)
+ (loop env* (cdr operands))
+ (values env* result)))))))
+ (or . ,(lambda (env . operands)
+ (let loop ((env env) (operands operands))
+ (if (null? operands)
+ (values env C-FALSE)
+ (let* ((env* result (c-eval env (car operands))))
+ (if (false? result)
+ (values env* result)
+ (loop env* (cdr operands))))))))
+ (= . ,(lambda (env slot value)
+ ;; TOOD if slot isn't a variable, but a field (or array index)
+ ;; then it needs to be resolved...
+ (let ((env* result (c-eval env value)))
+ (values (env-set! env* slot result)
+ result))))
+ (and_eq ,(mod-set 'bitand)) ; &=
+ (or_eq ,(mod-set 'bitor)) ; |=
+ (xor_eq ,(mod-set 'xor)) ; ^=
+ (+= ,(mod-set '+))
+ (-= ,(mod-set '-))
+ (*= ,(mod-set '*))
+ (/= ,(mod-set '/))
+ (<<= ,(mod-set '<<))
+ (>>= ,(mod-set '>>))
+ (%= ,(mod-set '%))
+ (+ . ,(simple-infix +))
+ (* . ,(simple-infix *))
+ (/ . ,(simple-infix /))
+ (- . ,(lambda (env op . operands)
+ (if (null? operands)
+ (let ((env* value (c-eval env op)))
+ (values env* (- value)))
+ (apply (simple-infix -)
+ env op operands))))
+ (bitor . ,(simple-infix bitwise-ior))
+ (bitand . ,(simple-infix bitwise-and))
+ (xor . ,(simple-infix bitwise-xor))
+ (not_eq . ,(lambda (env a b) (c-eval env `(not (== ,a ,b))))) ; !=
+ (<< . ,(binary-operator bitwise-arithmetic-shift-left))
+ (>> . ,(binary-operator bitwise-arithmetic-shift-right))
+ (< . ,(binary-operator (compose boolean->c-boolean <)))
+ (> . ,(binary-operator (compose boolean->c-boolean >)))
+ ;; this assumes that = handles pointers
+ (== . ,(binary-operator (compose boolean->c-boolean =)))
+ (<= . ,(binary-operator (compose boolean->c-boolean <=)))
+ (>= . ,(binary-operator (compose boolean->c-boolean >=)))
+ (% . ,(binary-operator modulo))
+
+ (not . ,(lambda (env value)
+ (let ((env* result (c-eval env value)))
+ (values env* (c-not result)))))
+ (compl . ,(lambda (env value)
+ (let ((env* result (c-eval env value)))
+ (values env* (bitwise-not result)))))
+
+ ;; ++C
+ (pre-increment . ,(lambda (env slot) (c-eval env `(+= ,slot 1))))
+ (pre-decrement . ,(lambda (env slot) (c-eval env `(-= ,slot 1))))
+ ;; TODO these (C++, C--) need to handle if slot isn't a direct variabl
+ (post-increment . ,(lambda (env slot)
+ (let ((value (env-ref env slot)))
+ (values (env-set! env slot (1+ value))
+ value))))
+ (pre-decrement . ,(lambda (env slot)
+ (let ((value (env-ref env slot)))
+ (values (env-set! env slot (1+ value))
+ value))))
+
+ (ternary . ,(lambda (env test true-clause false-clause)
+ (let ((env* value (c-eval env test)))
+ (c-eval env*
+ (if (c-boolean->boolean value)
+ true-clause false-clause)))))
+
+ ;; TODO remaining operations
+ (as-type . ,(lambda (env target-type value)
+ (format (current-error-port) "cast<~s>(~s)~%" target-type value)
+ (values env value)))
+
+ (object-slot . ,(lambda (env object slot)
+ (scm-error 'not-implemented "object-slot"
+ "Object slots are not implemented, when accessing ~s.~s"
+ (list object slot) #f)))
+ (dereference-slot ,(lambda (env ptr slot)
+ (scm-error 'not-implemented "dereference-slot"
+ "Object slots are not implemented, when accessing ~s->~s"
+ (list object slot) #f)))
+ (dereference . ,(lambda (env ptr)
+ (scm-error 'not-implemented "dereference"
+ "Poiner dereferencing is not implemented: *~s"
+ (list ptr) #f)))
+ (pointer . ,(lambda (env value)
+ (scm-error 'not-implemented "pointer"
+ "Pointer of is not implemented: &~s"
+ (list value) #f)))))
+
+;; TODO |,|
+
+
+(define (c-eval environment expression)
+ (match expression
+ (('lambda formals body) (values environment `(lambda ,formals ,body)))
+ ;; hack since sizeof really should be a operator
+ (('funcall 'sizeof arg)
+ ;; TODO
+ (format (current-error-port) "sizeof ~s~%" arg)
+ (values environment 1))
+
+ (('funcall procedure-name ('#{,}# args ...))
+ (let ((procedure (env-ref environment procedure-name)))
+ (ensure-c-procedure procedure "c-eval")
+ (unless (= (length args) (procedure-arity procedure))
+ (scm-error 'c-eval-error "c-eval"
+ "Procedure arity mismatch for ~s, expected ~s, got ~s"
+ (list procedure-name
+ (procedure-arity procedure)
+ (length args))
+ #f))
+ (let ((env args* (fold-2 c-eval (cons environment '()) args )))
+ (let ((inner-environment
+ (fold (lambda (name value env) (env-set! env name value))
+ (push-frame! env)
+ (procedure-formals procedure) args*)))
+ (let ((resulting-environment
+ result-value
+ (c-eval inner-environment (procedure-body procedure))))
+ (values (pop-frame! resulting-environment)
+ result-value))))))
+
+ (('funcall procedure arg)
+ (c-eval environment `(funcall ,procedure (#{,}# ,arg))))
+
+ ((operator operands ...)
+ (apply (or (assoc-ref primitives operator)
+ (scm-error 'c-eval-error "c-eval"
+ "Applying non-existant primitive operator: ~s, operands: ~s"
+ (list operator operands) #f))
+ environment operands))
+
+ ;; "f()" gets compiled to simply f
+ ;; meaning that we instead use the environment to determine
+ ;; if something is a variable or procedure
+ (expr
+ (if (literal? expr)
+ (values environment expr)
+ (let ((value (env-ref environment expr)))
+ (if (c-procedure? value)
+ (c-eval environment `(funcall ,value (#{,}#)))
+ (values environment value)))))))
diff --git a/module/c/eval/environment.scm b/module/c/eval/environment.scm
new file mode 100644
index 00000000..12eefaf7
--- /dev/null
+++ b/module/c/eval/environment.scm
@@ -0,0 +1,34 @@
+(define-module (c eval environment)
+ :use-module (srfi srfi-1)
+ :export (make-environment
+ env-set! env-ref push-frame! pop-frame!))
+
+(define (make-frame)
+ (make-hash-table))
+
+(define (make-environment)
+ (list (make-frame)))
+
+;; Returns an updated environment, linear update
+(define (env-set! env key value)
+ ;; get handle to differentiate #f
+ ;; (even though #f should never be stored since it's not a C value)
+ (cond ((find (lambda (frame) (hashq-get-handle frame key)) env)
+ => (lambda (frame) (hashq-set! frame key value)))
+ (else (hashq-set! (car env) key value)))
+ env)
+
+(define (env-ref env key)
+ (cond ((null? env)
+ (scm-error 'misc-error "env-ref"
+ "~s unbound"
+ (list key)
+ #f))
+ ((hashq-get-handle (car env) key) => cdr)
+ (else (env-ref (cdr env) key))))
+
+(define (push-frame! environment)
+ (cons (make-frame) environment))
+
+(define (pop-frame! environment)
+ (cdr environment))
diff --git a/module/c/lex.scm b/module/c/lex.scm
index 34e52d88..0bde5c9e 100644
--- a/module/c/lex.scm
+++ b/module/c/lex.scm
@@ -43,8 +43,23 @@
(define-peg-pattern integer all
(and (or base-8 base-16 base-10) (? integer-suffix)))
+(define-peg-pattern float-suffix all
+ (* (or "f" "F" "l" "L")))
+
+(define-peg-pattern exponent all
+ (and (ignore (or "e" "E")) (? (or "+" "-")) base-10))
+
+;; Helper patterns for creating named groups in float
+(define-peg-pattern float-integer all base-10)
+(define-peg-pattern float-decimal all base-10)
+
+(define-peg-pattern float all
+ (or (and float-integer exponent (? float-suffix))
+ (and (? float-integer) (ignore ".") float-decimal (? exponent) (? float-suffix))
+ (and float-integer (ignore ".") (? exponent) (? float-suffix))))
+
(define-peg-pattern number body
- (or integer))
+ (or float integer))
(define-peg-pattern group all
(and (ignore "(") expr (ignore ")")))
@@ -65,11 +80,16 @@
(define-peg-pattern char all
(and (ignore "'") (or escaped-char peg-any) (ignore "'")))
+(define-peg-pattern quot none "\"")
+
+(define-peg-pattern string all
+ (and quot (* (or escaped-char (and (not-followed-by "\"") peg-any))) quot))
(define-peg-pattern* operator all
`(or ,@(map symbol->string symbol-binary-operators)
,@(map (lambda (op) `(and ,(symbol->string op) ws))
- wordy-binary-operators)))
+ wordy-binary-operators)
+ "?" ":"))
;; whitespace
(define-peg-pattern ws none
@@ -89,17 +109,23 @@
base-10-digit))))
(define-peg-pattern prefix-operator all
- (or "!" "~" "*" "&" "++" "--" "+" "-"))
+ ;; It's important that ++ and -- are BEFORE + and -
+ ;; otherwise the first + is found, leaving the second +, which fails
+ ;; to lex since it's an invalid token
+ ;; TODO sizeof can be written as a prefix operator
+ ;; (without parenthesis) if the operand is an expression.
+ (or "*" "&" "++" "--"
+ "!" "~" "+" "-"))
+
;;; Note that stacked pre or postfix operators without parenthesis
;;; dosen't work. So `*&C' is invalid, while `*(&C)' is valid.
(define-peg-pattern prefix all
- (and prefix-operator sp (or variable group funcall #; postfix
- )))
+ (and prefix-operator sp (or variable group funcall literal)))
(define-peg-pattern postfix-operator all
- (or "++" "--"))
+ (or "++" "--" "*"))
(define-peg-pattern postfix all
;; literals can't be in-place incremented and decremented
@@ -111,15 +137,25 @@
;; first case is "same" as expr, but in different order to prevent
;; infinite self reference. Pre and postfix not here, solved by having
;; them before infix in expr
- (and (or funcall postfix prefix group char number variable)
+ (and (or funcall postfix prefix group literal variable)
sp operator sp expr))
(define-peg-pattern funcall all
(and variable sp group))
+(define-peg-pattern literal body
+ (or char string number))
+
;;; main parser
(define-peg-pattern expr body
- (+ (and sp (or infix postfix prefix funcall group char number variable)
+ (+ (and sp (or
+ ;; float must be BEFORE infix, otherwise 3.2 is parsed as (infix 3 (operator ".") 2)
+ ;; that however breaks the infix logic, meaning that floating point numbers can't be
+ ;; used in basic arithmetic.
+ ;; TODO remove all implicit order of operations handling in the lexer, and move it to
+ ;; the parser. This should also fix the case of typecasts being applied incorrectly.
+ float
+ infix postfix prefix funcall group literal variable)
sp)))
diff --git a/module/c/operators.scm b/module/c/operators.scm
index ab1b3e7c..910dc8a9 100644
--- a/module/c/operators.scm
+++ b/module/c/operators.scm
@@ -9,8 +9,9 @@
`(+ - * / & ,(symbol #\|) ^ << >> % < > =))
;; apparently part of C
+;; https://en.cppreference.com/w/cpp/language/operator_alternative
(define wordy-binary-operators
- '(bitand and_eq and bitor or_eq or xor_eq xor))
+ '(bitand and_eq and bitor or_eq or xor_eq xor not_eq))
(define symbol-binary-operators
(append (map (lambda (x) (symbol-append x '=)) simple-operators)
diff --git a/module/c/parse.scm b/module/c/parse.scm
index 8030da77..7d11ea17 100644
--- a/module/c/parse.scm
+++ b/module/c/parse.scm
@@ -1,11 +1,14 @@
(define-module (c parse)
:use-module (hnh util)
:use-module (srfi srfi-1)
+ :use-module (srfi srfi-71)
:use-module (ice-9 match)
+ :use-module ((rnrs io ports)
+ :select (string->bytevector make-transcoder utf-8-codec))
+ :use-module (rnrs bytevectors)
:export (parse-lexeme-tree))
-;;; Rename this
-(define (perms set)
+(define (permutations set)
(concatenate
(map (lambda (key)
(map (lambda (o) (cons key o))
@@ -21,23 +24,124 @@
(define valid-sequences
(delete 'dummy
(lset-union eq? '(dummy)
- (map symbol-concat (perms '(() U L)))
- (map symbol-concat (perms '(() U LL))))))
+ (map symbol-concat (permutations '(() U L)))
+ (map symbol-concat (permutations '(() U LL))))))
;; => (LLU ULL LL LU UL L U)
(aif (memv (string->symbol (string-upcase str))
valid-sequences)
(case (car it)
- [(LLU ULL) '(unsigned long-long)]
+ [(LLU ULL) '(unsigned long long)]
[(LU UL) '(unsigned long)]
- [(LL) '(long-long)]
+ [(LL) '(long long)]
[(L) '(long)]
[(U) '(unsigned)])
(scm-error 'c-parse-error "parse-integer-suffix"
"Invalid integer suffix ~s"
(list str) #f)))
+(define (parse-float-suffix str)
+ (case (string->symbol str)
+ ((f F) '(float))
+ ((l L) '(long double))))
+
+(define (group-body->type vars)
+ (concatenate
+ (map
+ (match-lambda (('variable var) (list (parse-lexeme-tree `(variable ,var))))
+ (('postfix ('variable var)
+ ('postfix-operator "*"))
+ (list (parse-lexeme-tree `(variable ,var))
+ '*))
+ (else (scm-error 'c-parse-error "parse-lexeme-tree"
+ "Invalid token ~s in typecast form: ~s"
+ (list else vars) #f)))
+ vars)))
+
+;; Takes a list of strings and integers, and merges it all into a single
+;; bytevector representing a c string
+(define* (string-fragments->c-string
+ fragments optional: (transcoder (make-transcoder (utf-8-codec))))
+
+ (define fragments-fixed
+ (map (lambda (frag)
+ (if (string? frag)
+ (string->bytevector frag transcoder)
+ frag))
+ fragments))
+
+ (define bv-length
+ (fold (lambda (item sum) (+ sum (if (bytevector? item)
+ (bytevector-length item)
+ 1)))
+ 0 fragments-fixed))
+
+ (define bv (make-bytevector (1+ bv-length)))
+ ;; trailing null byte
+ (bytevector-u8-set! bv bv-length 0)
+ (fold (lambda (item idx)
+ (cond ((bytevector? item)
+ (bytevector-copy! item 0
+ bv idx
+ (bytevector-length item))
+ (+ idx (bytevector-length item)))
+ (else (bytevector-u8-set! bv idx item)
+ (+ idx 1))))
+ 0
+ fragments-fixed)
+ bv)
+
+(define (parse-float-form float-form)
+ (let ((float-string
+ (fold (lambda (arg str)
+ (string-append
+ str
+ (match arg
+ (('float-integer ('base-10 n)) n)
+ (('float-decimal ('base-10 n)) (string-append "." n))
+ (('exponent "+" ('base-10 n)) (string-append "e" n))
+ (('exponent ('base-10 n)) (string-append "e" n))
+ (('exponent "-" ('base-10 n)) (string-append "e-" n)))))
+ "" float-form)))
+ ;; exact->inexact is a no-op if we already have an inexact number, but
+ ;; ensures we get an inexact number when we have an exact number (which we
+ ;; can get from the "1." case). Returning an inexact number here is important
+ ;; to avoid arithmetic suprises later.
+ (exact->inexact
+ (or (string->number float-string)
+ (scm-error 'c-parse-error "parse-lexeme-tree"
+ "Couldn't parse expression as float: ~s"
+ (list `(float ,@args)) #f)))))
+
+
+(define (resolve-escaped-char form)
+ (match form
+ (('base-8-char n) (string->number n 8))
+ (('base-16-char n) (string->number n 16))
+ (c (char->integer
+ (case (string-ref c 0)
+ ((#\a) #\alarm)
+ ((#\b) #\backspace)
+ ((#\e) #\esc) ;; non-standard
+ ((#\f) #\page)
+ ((#\n) #\newline)
+ ((#\r) #\return)
+ ((#\t) #\tab)
+ ((#\v) #\vtab)
+ ((#\\) #\\)
+ ;; These are valid in both strings and chars
+ ((#\') #\')
+ ((#\") #\"))))))
+
+;; Takes a list of strings and escaped-char form
+;; and returns a list of strings and integers
+(define (resolve-string-fragment fragment)
+ (match fragment
+ (('escaped-char c)
+ (resolve-escaped-char c))
+ (fargment fragment)))
+
(define (parse-lexeme-tree tree)
(match tree
['() '()]
@@ -50,55 +154,67 @@
[('integer n ('integer-suffix suffix))
`(as-type
,(parse-integer-suffix suffix)
- ,(parse-lexeme-tree n))
- ]
+ ,(parse-lexeme-tree n))]
+
[('integer n)
(parse-lexeme-tree n)]
+
+ [('float args ... ('float-suffix suffix))
+ `(as-type ,(parse-float-suffix suffix)
+ ;; parse rest of float as if it lacked a suffix
+ ,(parse-lexeme-tree `(float ,@args)))]
+
+ [('float args ...) (parse-float-form args)]
+
;; Character literals, stored as raw integers
;; so mathematical operations keep working on them.
- [('char ('escaped-char ('base-8-char n)))
- (-> n (string->number 8) #; integer->char)]
- [('char ('escaped-char ('base-16-char n)))
- (-> n (string->number 16) #; integer->char)]
- [('char ('escaped-char c))
- (char->integer
- (case (string-ref c 0)
- ((#\a) #\alarm)
- ((#\b) #\backspace)
- ((#\e) #\esc)
- ((#\f) #\page)
- ((#\n) #\newline)
- ((#\r) #\return)
- ((#\t) #\tab)
- ((#\v) #\vtab)
- ((#\\) #\\)
- ((#\') #\')))]
+ [('char ('escaped-char c)) (resolve-escaped-char c)]
+
[('char c) (char->integer (string-ref c 0))]
[('variable var) (string->symbol var)]
+
+ ;; normalize some binary operators to their wordy equivalent
+ ;; (which also happens to match better with scheme)
+ [('operator "&&") 'and]
+ [('operator "&=") 'and_eq]
+ [('operator "&") 'bitand]
+ [('operator "|") 'bitor]
+ [('operator "!=") 'not_eq]
+ [('operator "||") 'or]
+ [('operator "|=") 'or_eq]
+ [('operator "^") 'xor]
+ [('operator "^=") 'xor_eq]
+ ;; Change these names to something scheme can handle better
+ [('operator ".") 'object-slot]
+ [('operator "->") 'dereference-slot]
[('operator op) (string->symbol op)]
+
[('prefix-operator op)
(case (string->symbol op)
+ ((!) 'not)
+ ((~) 'compl)
((*) 'dereference)
((&) 'pointer)
((++) 'pre-increment)
((--) 'pre-decrement)
- (else => identity))]
+ ((-) '-)
+ (else (scm-error 'c-parse-error "parse-lexeme-tree"
+ "Unknown prefix operator ~s"
+ (list op) #f)))]
[('postfix-operator op)
(case (string->symbol op)
[(++) 'post-increment]
[(--) 'post-decrement]
- [else => identity])]
+ [else (scm-error 'c-parse-error "parse-lexeme-tree"
+ "Unknown postfix operator ~s"
+ (list op) #f)])]
;; Parenthesis grouping
- [('group args)
+ [('group args ...)
(parse-lexeme-tree args)]
- ;; Atomic item. Used by flatten-infix
- [('atom body)
- (parse-lexeme-tree body)]
-
[('prefix op arg)
`(,(parse-lexeme-tree op)
,(parse-lexeme-tree arg))]
@@ -107,81 +223,204 @@
`(,(parse-lexeme-tree op)
,(parse-lexeme-tree arg))]
+
+
+
+
+ ;; resolved-operator and ternary are the return "types"
+ ;; of resolve-order-of-operations
+ [(('resolved-operator op) args ...)
+ `(,op ,@(map parse-lexeme-tree args))]
+
+ [('ternary a b c)
+ `(ternary ,(parse-lexeme-tree a)
+ ,(parse-lexeme-tree b)
+ ,(parse-lexeme-tree c))]
+
+
+
+
+ ;; Is it OK for literal strings to be "stored" inline?
+ ;; Or must they be a pointer?
+ ['string #vu8(0)]
+ [('string str ...)
+ (-> (map resolve-string-fragment str)
+ string-fragments->c-string)]
+
+ ;; implicit concatenation of string literals
+ [(('string str ...) ...)
+ (-> (map resolve-string-fragment (concatenate str))
+ string-fragments->c-string)]
+
[('infix args ...)
- (resolve-order-of-operations
- (flatten-infix (cons 'infix args)))]
+ (let ((r (resolve-order-of-operations
+ (flatten-infix (cons 'infix args)))))
+ (parse-lexeme-tree r))]
+
[('funcall function ('group arguments))
`(funcall ,(parse-lexeme-tree function)
,(parse-lexeme-tree arguments))]
- [bare (scm-error 'c-parse-error
- "parse-lexeme-tree"
- "Naked literal in lex-tree: ~s"
- (list bare)
- #f)]))
+ [(('variable "struct") ('variable value) ..1)
+ `(struct-type ,@(map string->symbol value))
+ ]
+
+ ;; A list of variables. Most likely a type signature
+ ;; [(('variable value) ..1)
+ ;; ]
+
+ ;; A typecast with only variables must (?) be a typecast?
+ [(('group groups ..1) ... value)
+ (fold-right (lambda (type done) `(as-type ,type ,done))
+ (parse-lexeme-tree value)
+ (map group-body->type groups))]
+
+ ;; Type name resolution?
+ ;; https://en.wikipedia.org/wiki/C_data_types
+ ;; base types with spaces:
+ ;; =======================
+ ;; [[un]signed] char
+ ;; [[un]signed] short [int]
+ ;; [[un]signed] int
+ ;; [un]signed [int]
+ ;; [[un]signed] long [int]
+ ;; [[un]signed] long long [int]
+ ;; float
+ ;; [long] double
+
+ ;; https://en.wikipedia.org/wiki/Type_qualifier
+ ;; qualifiers
+ ;; const
+ ;; volatile
+ ;; restrict
+ ;; _Atomic
+
+
+ ;; Storage specifiers
+ ;; auto
+ ;; register
+ ;; static
+ ;; extern
+
+ ;; struct <typename>
+ ;; enum <typename>
+ ;; union <typename>
+
+ ;; https://en.wikipedia.org/wiki/C_syntax
+ ;; int (*ptr_to_array)[100]
+
+
+ [(? symbol? bare)
+ (scm-error 'c-parse-error
+ "parse-lexeme-tree"
+ "Naked literal in lex-tree: ~s"
+ (list bare)
+ #f)]
+
+ [form
+ (scm-error 'c-parse-error
+ "parse-lexeme-tree"
+ "Unknown form in lex-tree: ~s"
+ (list form) #f)
+ ]))
;; https://en.wikipedia.org/wiki/Operators_in_C_and_C%2B%2B
+;; https://en.cppreference.com/w/c/language/operator_precedence
(define order-of-operations
(reverse
- (concatenate
- ;; This is only for binary operations
- `((-> ,(symbol #\.))
- (* / %)
- (+ -)
- (<< >>)
- (< <= > >=)
- (== !=)
- (&)
- (^)
- (,(symbol #\|))
- (&&)
- (,(symbol #\| #\|))
- (= += -= *= /= %= <<= >>= &= ^= ,(symbol #\| #\=))
- (,(symbol #\,))
- ))))
-
-(define (mark-other form)
- (if (list? form) (cons '*other* form) form))
+ ;; This is only for binary operations
+ `((-> ,(symbol #\.))
+ ;; All unary procedures go here, incnluding typecasts, and sizeof
+ (* / %)
+ (+ -)
+ (<< >>)
+ (< <= > >=)
+ (== != not_eq)
+ (& bitand)
+ (^ xorg)
+ (,(symbol #\|) bitor)
+ (&& and)
+ (,(symbol #\| #\|) or)
+ (? :)
+ (= += -= *= /= %= <<= >>= &= ^= ,(symbol #\| #\=)
+ and_eq or_eq xor_eq)
+ (,(symbol #\,))
+ )))
+
+;; a.b->c.d (. (-> (. a b) c) d)
+;; 2 * 3 / 4 * 5 => (* (/ (* 2 3) 4) 5)
+;; eller => (* 2 (/ 3 4) 5)
(define* (resolve-order-of-operations
tree optional: (order order-of-operations))
(if (null? order)
- (car tree)
+ (scm-error 'c-parse-error
+ "resolve-order-of-operations"
+ "Out of operations to resolve when resolving expression ~s"
+ (list tree) #f)
(match tree
- [('*other* body ...) body]
- [(form) (resolve-order-of-operations form order)]
- [(forms ...)
- (match (split-by forms (car order))
- [(group) (resolve-order-of-operations group (cdr order))]
- [groups
- (cons (car order)
- (map (lambda (form) (resolve-order-of-operations
- form order-of-operations))
- groups))])]
- [a a])))
+ [('fixed-infix form) form]
+ [('fixed-infix forms ...)
+ (match (split-by-one-of forms (car order))
+ [(group)
+ (resolve-order-of-operations (cons 'fixed-infix group)
+ (cdr order))]
+ [(a ('? b ...) (': c ...))
+ `(ternary ,(resolve-order-of-operations (cons 'fixed-infix a) (cdr order))
+ ,(resolve-order-of-operations (cons 'fixed-infix b) (cdr order))
+ ,(resolve-order-of-operations (cons 'fixed-infix c) (cdr order)))]
+ [(first rest ...)
+ ;; TODO this is only valid for the associative operators (+, ...)
+ ;; but not some other (<, ...)
+ (if (apply eq? (map car rest))
+ (let ((op (caar rest)))
+ `((resolved-operator ,op)
+ ,@(map (lambda (x) (resolve-order-of-operations (cons 'fixed-infix x)
+ (cdr order)))
+ (cons first (map cdr rest)))))
+ (fold (lambda (item done)
+ (let ((operator args (car+cdr item)))
+ `((resolved-operator ,operator)
+ ,done ,(resolve-order-of-operations
+ (cons 'fixed-infix args)
+ (cdr order)))))
+ (resolve-order-of-operations (cons 'fixed-infix first)
+ (cdr order))
+ rest))])])))
+
+;; 1 * 2 / 3 * 4
+;; ⇒ ((1) (* 2) (/ 3) (* 4))
+;; (1)
+;; (* (1) 2)
+;; (/ (* (1) 2) 3)
+;; (* (/ (* (1) 2) 3) 4)
;; Flatens a tree of infix triples. Stops when it should.
;; (parenthesis, function calls, ...)
(define (flatten-infix form)
- (match form
- [('infix left op ('infix right ...))
- (cons* (parse-lexeme-tree left)
- (parse-lexeme-tree op)
- (flatten-infix (cons 'infix right)))]
-
- [('infix left op right)
- (list (mark-other (parse-lexeme-tree left))
- (parse-lexeme-tree op)
- (mark-other (parse-lexeme-tree right)))]
-
- [other (scm-error 'c-parse-error
- "flatten-infix"
- "Not an infix tree ~a"
- (list other)
- #f)]))
+ (cons 'fixed-infix
+ (let loop ((form form))
+ (match form
+ [('infix left op ('infix right ...))
+ (cons* left
+ (parse-lexeme-tree op)
+ (loop (cons 'infix right)))]
+
+ [('infix left op right)
+ (list left
+ (parse-lexeme-tree op)
+ right)]
+
+ [('infix form) form]
+
+ [other (scm-error 'c-parse-error
+ "flatten-infix"
+ "Not an infix tree ~a"
+ (list other)
+ #f)]))))
diff --git a/module/hnh/util.scm b/module/hnh/util.scm
index d2c0dd5f..9a45704b 100644
--- a/module/hnh/util.scm
+++ b/module/hnh/util.scm
@@ -34,6 +34,8 @@
group-by
split-by
+ split-by-one-of
+
span-upto
cross-product
@@ -341,6 +343,29 @@
(cdr rem))])))
+(define (split-by-one-of lst items)
+ (cond ((null? items)
+ (scm-error 'wrong-type-arg "split-by-one-of"
+ "Must have at least one item to split by, when splitting ~s"
+ (cons items '()) #f))
+ ((not (list? items))
+ (scm-error 'wrong-type-arg "split-by-one-of"
+ "Items must be list of list of symbols, got ~s"
+ (list items) #f))
+ (else
+ (call-with-values
+ (lambda ()
+ (car+cdr
+ (let loop ((token 'sentinel-token) (lst lst))
+ (let ((head tail (break (lambda (item) (memv item items))
+ lst)))
+ (let ((group (cons token head)))
+ (if (null? tail)
+ (list group)
+ (cons group (loop (car tail) (cdr tail)))))))))
+ ;; Remove the sentinel token
+ (lambda (first rest) (cons (cdr first) rest))))))
+
;; Simar to span from srfi-1, but never takes more than
;; @var{count} items. Can however still take less.
diff --git a/module/hnh/util/path.scm b/module/hnh/util/path.scm
index ea081e85..0c8af48a 100644
--- a/module/hnh/util/path.scm
+++ b/module/hnh/util/path.scm
@@ -3,6 +3,7 @@
:use-module (srfi srfi-71)
:use-module (hnh util)
:export (path-append
+ path-absolute?
path-join
path-split
file-hidden?
@@ -12,6 +13,8 @@
(define // file-name-separator-string)
(define /? file-name-separator?)
+(define path-absolute? absolute-file-name?)
+
(define (path-append . strings)
(fold (lambda (s done)
(string-append
diff --git a/module/srfi/srfi-64/util.scm b/module/srfi/srfi-64/util.scm
new file mode 100644
index 00000000..a371227f
--- /dev/null
+++ b/module/srfi/srfi-64/util.scm
@@ -0,0 +1,11 @@
+(define-module (srfi srfi-64 util)
+ :use-module (ice-9 curried-definitions)
+ :use-module ((srfi srfi-1) :select (every))
+ :use-module (srfi srfi-64)
+ :export (test-match-group))
+
+;; Specifier for name of group
+(define ((test-match-group name . names) runner)
+ (every string=?
+ (reverse (cons name names))
+ (test-runner-group-stack runner)))
diff --git a/tests/run-tests.scm b/tests/run-tests.scm
index 5270636e..3955a6a2 100755
--- a/tests/run-tests.scm
+++ b/tests/run-tests.scm
@@ -27,6 +27,7 @@ fi
(ice-9 pretty-print)
(ice-9 getopt-long)
(ice-9 match)
+ (ice-9 regex)
(system vm coverage)
((all-modules) :select (fs-find))
)
@@ -54,6 +55,24 @@ fi
(define (make-indent depth)
(make-string (* 2 depth) #\space))
+(define (string-replace-head s1 s2)
+ (string-replace s1 s2
+ 0 (string-length s2)))
+
+(define (pp form indent prefix-1)
+ (let ((prefix (make-string (+ (string-length indent)
+ (string-length prefix-1))
+ #\space)))
+ (display
+ (string-replace-head
+ (with-output-to-string
+ (lambda () (pretty-print
+ form
+ per-line-prefix: prefix
+ width: (- 79 (string-length indent)))))
+ (string-append indent prefix-1)))))
+
+
(define (construct-test-runner)
(define runner (test-runner-null))
(define depth 0)
@@ -75,7 +94,10 @@ fi
(cond ((test-runner-test-name runner)
(negate string-null?) => identity)
((test-result-ref runner 'expected-value)
- => (lambda (p) (with-output-to-string (lambda () (display (bold "[SOURCE]: ")) (truncated-print p width: 60))))))))
+ => (lambda (p) (with-output-to-string
+ (lambda ()
+ (display (bold "[SOURCE]: "))
+ (truncated-print p width: 60))))))))
(when (eq? 'fail (test-result-kind))
(cond ((test-result-ref runner 'actual-error)
=> (lambda (err)
@@ -94,12 +116,12 @@ fi
(unknown-actual (gensym)))
(let ((expected (test-result-ref runner 'expected-value unknown-expected))
(actual (test-result-ref runner 'actual-value unknown-actual)))
- (if (eq? expected unknown-expected)
- (format #t "~aAssertion failed, received ~s~%"
- (make-indent (1+ depth)) actual)
- (format #t "~aExpected: ~s~%~aReceived: ~s~%"
- (make-indent (1+ depth)) expected
- (make-indent (1+ depth)) actual))))))
+ (let ((indent (make-indent (1+ depth))))
+ (if (eq? expected unknown-expected)
+ (format #t "~aAssertion failed~%" indent)
+ (begin
+ (pp expected indent "Expected: ")
+ (pp actual indent "Received: "))))))))
(format #t "~aNear ~a:~a~%"
(make-indent (1+ depth))
(test-result-ref runner 'source-file)
@@ -203,9 +225,6 @@ fi
;; (format #t "Running on:~%~y~%" files)
-(awhen (option-ref options 'only #f)
- (set! files (list (path-append "test" it))))
-
((@ (hnh util exceptions) warnings-are-errors) #t)
@@ -240,9 +259,38 @@ fi
(test-begin "suite")
-(awhen (option-ref options 'skip #f)
- (format #t "Skipping ~s~%" it)
- (test-skip it))
+
+(define onlies
+ (let %loop ((args (command-line)) (onlies '()))
+ (define* (loop args key: only)
+ (if only
+ (%loop args (cons only onlies))
+ (%loop args onlies)))
+ (if (null? args)
+ onlies
+ (cond ((string-match "^--skip(=.*)?$" (car args))
+ => (lambda (m)
+ (cond ((match:substring m 1)
+ => (lambda (s)
+ (format #t "Skipping ~s~%" s)
+ (test-skip s)
+ (loop (cdr args))))
+ (else (format #t "Skipping ~s~%" (cadr args))
+ (test-skip (cadr args))
+ (loop (cddr args))))))
+ ((string-match "^--only(=.*)?$" (car args))
+ => (lambda (m)
+ (cond ((match:substring m 1)
+ => (lambda (s)
+ (loop (cdr args) only: s)))
+ (else (loop (cddr args) only: (cadr args))))))
+ (else (loop (cdr args)))))))
+
+(unless (null? onlies)
+ (set! files
+ (map (lambda (x) (path-append "test" x))
+ ;; reverse only until I have built a dependency graph for tests
+ (reverse onlies))))
(finalizer (lambda () (for-each (lambda (f) (catch/print-trace (lambda () (test-group f (load f)))))
files)))
diff --git a/tests/test/c-parse.scm b/tests/test/c-parse.scm
new file mode 100644
index 00000000..c16958de
--- /dev/null
+++ b/tests/test/c-parse.scm
@@ -0,0 +1,69 @@
+;;; Commentary
+;; Test implementation details of C parser
+;; TODO Should be ran before (test cpp)
+;;; Code
+
+(define-module (test cpp)
+ :use-module (srfi srfi-64)
+ :use-module (srfi srfi-88)
+ :use-module ((c lex) :select (lex))
+ :use-module (c parse))
+
+(define flatten-infix (@@ (c parse) flatten-infix))
+(define resolve-order-of-operations (@@ (c parse) resolve-order-of-operations))
+
+(test-group "Flatten infix"
+ (test-equal "Simple binary operator"
+ '(fixed-infix (integer (base-10 "1"))
+ +
+ (integer (base-10 "2")))
+ (flatten-infix (lex "1 + 2")))
+
+ (test-equal "Simple binary operator, with compound structure in on branch"
+ '(fixed-infix (integer (base-10 "1"))
+ +
+ (funcall (variable "f")
+ (group (integer (base-10 "2")))))
+ (flatten-infix (lex "1 + f(2)"))))
+
+(test-group "Order of operations"
+ (test-equal "Basic binary operator"
+ '((resolved-operator +)
+ (integer (base-10 "1"))
+ (integer (base-10 "2")))
+ (resolve-order-of-operations (flatten-infix (lex "1 + 2"))))
+
+ (test-equal "Multiple operators, with non-left-associative application"
+ '((resolved-operator +)
+ (integer (base-10 "1"))
+ ((resolved-operator *)
+ (integer (base-10 "2"))
+ (integer (base-10 "3"))))
+ (resolve-order-of-operations (flatten-infix (lex "1 + 2 * 3"))))
+
+ (test-equal "Multiple of the same operation gets clumed together"
+ '((resolved-operator +)
+ (integer (base-10 "1"))
+ (integer (base-10 "2"))
+ (integer (base-10 "3")))
+ (resolve-order-of-operations (flatten-infix (lex "1 + 2 + 3"))))
+
+ (test-equal "Simple Ternary"
+ '(ternary
+ (integer (base-10 "1"))
+ (integer (base-10 "2"))
+ (integer (base-10 "3")))
+ (resolve-order-of-operations (flatten-infix (lex "1 ? 2 : 3"))))
+
+ (test-equal "ternary with further infix operators"
+ '(ternary ((resolved-operator +)
+ (integer (base-10 "1"))
+ (integer (base-10 "2")))
+ ((resolved-operator %)
+ (integer (base-10 "3"))
+ (integer (base-10 "4")))
+ ((resolved-operator *)
+ (integer (base-10 "5"))
+ (integer (base-10 "6"))))
+ (resolve-order-of-operations (flatten-infix (lex "1 + 2? 3 % 4 : 5 * 6")))))
+
diff --git a/tests/test/cpp.scm b/tests/test/cpp.scm
index 9c720fde..1294bc96 100644
--- a/tests/test/cpp.scm
+++ b/tests/test/cpp.scm
@@ -3,37 +3,602 @@
;;; Code:
(define-module (test cpp)
+ :use-module (srfi srfi-1)
:use-module (srfi srfi-64)
:use-module (srfi srfi-88)
:use-module ((c lex) :select (lex))
- :use-module ((c parse) :select (parse-lexeme-tree)))
+ :use-module ((c parse) :select (parse-lexeme-tree))
+ :use-module ((c eval) :select (c-eval))
+ :use-module ((c eval environment) :select (make-environment env-set!))
+ :use-module ((rnrs arithmetic bitwise)
+ :select (bitwise-xor)))
+
+;; Note that the lexer's output isn't stable.
+;; The tests here are more to see where the lexer succeeds but the parser fails.
+;; So changing the lexer test cases isn't a problem
+;; but don't change the parser test cases
+
+;; __asm__ always has strings as arguments
+(test-skip "__asm__")
+
+;; Lexer produces garbage when attempted. Fixing this would also fix cast
+;; operations.
+(test-skip "Float in infix expression")
+;; order of operation is wrong, leading to an incorrect result
+(test-skip "Cast with operation")
+
+;; not implemented
+(test-skip "Token concatenation")
+
+;; A string follewed by a macro (which expands to a string)
+;; should be concatenated. This is however not yet implemented
+(test-skip "Implicit concatenation of string and macro")
(define run (compose parse-lexeme-tree lex))
-(test-equal
- '(+ (post-increment (dereference C)) 3)
- (run "(*C)++ + 3"))
+(define (alist->environment alist)
+ (fold (lambda (pair env)
+ (apply env-set! env pair))
+ (make-environment)
+ alist))
+
+(define (exec form . base-bindings)
+ (call-with-values
+ (lambda () (c-eval (alist->environment base-bindings)
+ (run form)))
+ (lambda (env value) value)))
+
+(define-syntax let-group
+ (syntax-rules ()
+ ((let ((form name) rest ...) body ...)
+ (test-group name
+ (let ((form name)
+ rest ...)
+ body ...)))))
+
+(let-group
+ ((form "(*C)++ + 3"))
+ (test-equal '(infix (postfix (group (prefix (prefix-operator "*")
+ (variable "C")))
+ (postfix-operator "++"))
+ (operator "+")
+ (integer (base-10 "3")))
+ (lex form))
+ (test-equal '(+ (post-increment (dereference C)) 3)
+ (run form)))
+
+(let-group
+ ((form "*C++ + 3"))
+ (test-equal '(infix (postfix (prefix (prefix-operator "*")
+ (variable "C"))
+ (postfix-operator "++"))
+ (operator "+")
+ (integer (base-10 "3")))
+ (lex form))
+ (test-equal '(+ (post-increment (dereference C)) 3)
+ (run form)))
+
+(let-group
+ ((form "*C++"))
+ (test-equal '(postfix (prefix (prefix-operator "*")
+ (variable "C"))
+ (postfix-operator "++"))
+ (lex form))
+ (test-equal '(post-increment (dereference C))
+ (run form)))
+
+(let-group
+ ((form "C++ + C++"))
+ (test-equal '(infix (postfix (variable "C")
+ (postfix-operator "++"))
+ (operator "+")
+ (postfix (variable "C")
+ (postfix-operator "++")))
+ (lex form))
+ (test-equal '(+ (post-increment C) (post-increment C))
+ (run form)))
+
+(let-group
+ ((form "++C + ++C"))
+ (test-equal '(infix (prefix (prefix-operator "++")
+ (variable "C"))
+ (operator "+")
+ (prefix (prefix-operator "++")
+ (variable "C")))
+ (lex form))
+ (test-equal '(+ (pre-increment C) (pre-increment C))
+ (run form)))
+
+(let-group
+ ((form "2 + 2 * 2"))
+ (test-equal '(infix (integer (base-10 "2"))
+ (operator "+")
+ (infix (integer (base-10 "2"))
+ (operator "*")
+ (integer (base-10 "2"))))
+ (lex form))
+ (test-equal '(+ 2 (* 2 2)) (run form))
+ (test-equal 6 (exec form)))
+
+(let-group
+ ((form "2 * 2 + 2"))
+ (test-equal '(infix (integer (base-10 "2"))
+ (operator "*")
+ (infix (integer (base-10 "2"))
+ (operator "+")
+ (integer (base-10 "2"))))
+ (lex form))
+ (test-equal '(+ (* 2 2) 2) (run form))
+ (test-equal 6 (exec form)))
+
+(let-group
+ ((form "2+2+2"))
+ (test-equal '(infix (integer (base-10 "2"))
+ (operator "+")
+ (infix (integer (base-10 "2"))
+ (operator "+")
+ (integer (base-10 "2")))) (lex form))
+ (test-equal '(+ 2 2 2) (run form))
+ (test-equal 6 (exec form)))
+
+(test-group "Unary minus"
+ (test-group "Without space"
+ (let ((form "-1"))
+ (test-equal '(prefix (prefix-operator "-")
+ (integer (base-10 "1")))
+ (lex form))
+ (test-equal '(- 1) (run form))
+ (test-equal -1 (exec form))))
+
+ (test-group "With space"
+ (let ((form "- 1"))
+ (test-equal '(prefix (prefix-operator "-")
+ (integer (base-10 "1")))
+ (lex form))
+ (test-equal '(- 1) (run form))
+ (test-equal -1 (exec form))))
+
+ (test-group "Before variable"
+ (let ((form "-x"))
+ (test-equal '(prefix (prefix-operator "-")
+ (variable "x"))
+ (lex form))
+ (test-equal '(- x) (run form))
+ (test-equal -5 (exec form '(x 5)))))
+
+ (test-group "Before infix"
+ (let ((form "-x+3"))
+ (test-equal '(infix (prefix (prefix-operator "-")
+ (variable "x"))
+ (operator "+")
+ (integer (base-10 "3")))
+ (lex form))
+ (test-equal '(+ (- x) 3) (run form))
+ (test-equal -2 (exec form '(x 5)))))
+
+ (test-group "Inside infix expression"
+ (let ((form "x+-3"))
+ (test-equal '(infix (variable "x")
+ (operator "+")
+ (prefix (prefix-operator "-")
+ (integer (base-10 "3"))))
+ (lex form))
+ (test-equal '(+ x (- 3)) (run form))
+ (test-equal 2 (exec form '(x 5)))))
+ )
+
+
+
+
+;; Hand picked forms from output of `cpp -dM /usr/include/termios.h` on
+;; FreeBSD 13.1-RELEASE releng/13.1-n250148-fc952ac2212 GENERIC amd64
+;; 2022-06-28
+
+(let ((form "00000200"))
+ (test-equal '(integer (base-8 "0000200")) (lex form))
+ (test-equal 128 (run form)))
+
+(let ((form "0"))
+ (test-equal '(integer (base-10 "0")) (lex form))
+ (test-equal 0 (run form)))
+
+(let ((form "1000000U"))
+ (test-equal '(integer (base-10 "1000000") (integer-suffix "U")) (lex form))
+ (test-equal '(as-type (unsigned) 1000000) (run form))
+ (test-equal 1000000 (exec form)))
+
+
+(let ((form "0x10c"))
+ (test-equal '(integer (base-16 "10c")) (lex form))
+ (test-equal 268 (run form)))
+
+;; Lexer keeps original case, handled later by parser
+(let ((form "0X10C"))
+ (test-equal '(integer (base-16 "10C")) (lex form))
+ (test-equal 268 (run form)))
+
+(let ((form "a != b"))
+ (test-equal '(infix (variable "a")
+ (operator "!=")
+ (variable "b"))
+ (lex form))
+ (test-equal '(not_eq a b) (run form))
+ (test-equal 1 (exec form '(a 1) '(b 2)))
+ (test-equal 0 (exec form '(a 1) '(b 1)))
+ )
+
+(let ((form "((c) == (val) && (val) != _POSIX_VDISABLE)"))
+ ;; (test-equal '() (lex form))
+ (test-equal '(and (== c val)
+ (not_eq val _POSIX_VDISABLE))
+ (run form))
+ (test-equal 0 (exec form '(c 1) '(val 2) '(_POSIX_VDISABLE 3)))
+ )
+
+(let ((form "CTRL('O')"))
+ (test-equal '(funcall (variable "CTRL") (group (char "O"))) (lex form))
+ (test-equal '(funcall CTRL 79) (run form))
+ (test-equal (bitwise-xor #x40 (char->integer #\O))
+ (exec form
+ ;; Definition copied from our parsers output of
+ ;; preprocessing output as defined above
+ '(CTRL (lambda (x)
+ (ternary (and (>= x 97) (<= x 122))
+ (+ (- x 97) 1)
+ (bitand (+ (- x 65) 1) 127)))))))
+
+(let ((form "CREPRINT"))
+ (test-equal '(variable "CREPRINT") (lex form))
+ (test-equal 'CREPRINT (run form)))
+
+(let ((form "(CCTS_OFLOW | CRTS_IFLOW)"))
+ (test-equal '(group (infix (variable "CCTS_OFLOW")
+ (operator "|")
+ (variable "CRTS_IFLOW")))
+ (lex form))
+ (test-equal '(bitor CCTS_OFLOW CRTS_IFLOW) (run form)))
+
+;; ((x) >= 'a' && (x) <= 'z'
+;; ? ((x) - 'a' + 1)
+;; : (((x) - 'a' + 1) & 0x7f))
+(let ((form "((x) >= 'a' && (x) <= 'z' ? ((x) - 'a' + 1) : (((x) - 'a' + 1) & 0x7f))"))
+ ;; (test-equal '() (lex form))
+ (test-equal '(ternary
+ (and (>= x #x61)
+ (<= x #x7A))
+ (+ (- x #x61) 1)
+ (bitand (+ (- x #x61) 1) 127))
+ (run form)))
+
+(let ((form "((x) & ~(IOCPARM_MASK << 16))"))
+ ;; (test-equal '() (lex form))
+ (test-equal '(bitand x (compl (<< IOCPARM_MASK 16))) (run form)))
+
+(let ((form "(((x) >> 8) & 0xff)"))
+ ;; (test-equal '() (lex form))
+ (test-equal '(bitand (>> x 8) 255) (run form)))
+
+(let ((form "(((x) >> 16) & IOCPARM_MASK)"))
+ ;; (test-equal '() (lex form))
+ (test-equal '(bitand (>> x 16) IOCPARM_MASK) (run form)))
+
+(let ((form "((1 << IOCPARM_SHIFT) - 1)"))
+ ;; (test-equal '() (lex form))
+ (test-equal '(- (<< 1 IOCPARM_SHIFT) 1) (run form)))
+
+(let ((form "_IO('t', 120)"))
+ (test-equal '(funcall
+ (variable "_IO")
+ (group (infix (char "t")
+ (operator ",")
+ (integer (base-10 "120")))))
+ (lex form))
+ (test-equal '(funcall _IO (#{,}# 116 120)) (run form)))
+
+;; note the lone type
+(let ((form "_IOW('t', 98, int)"))
+ ;; (test-equal '() (lex form))
+ (test-equal '(funcall _IOW (#{,}# 116 98 int))
+ (run form)))
+
+;; note the multi-word type
+(let ((form "_IOR('t', 19, struct termios)"))
+ ;; (test-equal '() (lex form))
+ (test-equal '(funcall _IOR (#{,}# 116 19 (struct-type termios))) (run form)))
+
+
+;; TODO concatenation rules
+;; #define __CONCAT(x,y) __CONCAT1(x,y)
+;; #define __CONCAT1(x,y) x ## y
+;; #define __CONSTANT_CFSTRINGS__ 1
+;; #define __COPYRIGHT(s) __IDSTRING(__CONCAT(__copyright_,__LINE__),s)
+
+(test-group "Token concatenation"
+ (let ((form "x ## y"))
+ (test-equal '() (lex form))
+ (test-equal '0 (run form))))
+
+(test-group "Floating point numbers"
+
+ (test-group "Diffent forms"
+ (test-group "No decimal point, exponent, no suffix"
+ (let ((form "10e10"))
+ (test-equal '(float (float-integer (base-10 "10"))
+ (exponent (base-10 "10")))
+ (lex form))
+ (test-equal 10e10 (run form))))
+
+ (test-group "No decimal point, negative exponent"
+ (let ((form "10e-10"))
+ (test-equal '(float (float-integer (base-10 "10"))
+ (exponent "-" (base-10 "10")))
+ (lex form))
+ (test-equal 10e-10 (run form))))
+
+ (test-group "No decimal point, exponent and suffix"
+ (let ((form "10e10L"))
+ (test-equal '(float (float-integer (base-10 "10"))
+ (exponent (base-10 "10"))
+ (float-suffix "L"))
+ (lex form))
+ (test-equal '(as-type (long double) 10e10)
+ (run form))))
+
+ (test-group "Leading period, no exponent or suffix"
+ (let ((form ".1"))
+ (test-equal '(float (float-decimal (base-10 "1"))) (lex form))
+ (test-equal 0.1 (run form))))
+
+ (test-group "Trailing period, no exponent or suffix"
+ (let ((form "1."))
+ (test-equal '(float (float-integer (base-10 "1"))) (lex form))
+ (test-equal 1.0 (run form)))))
+
+
+ (test-group "Negative float"
+ (let ((form "-1.0"))
+ (test-equal '(prefix (prefix-operator "-")
+ (float (float-integer (base-10 "1"))
+ (float-decimal (base-10 "0"))))
+ (lex form))
+ (test-equal '(- 1.0) (run form))))
+
+
+
+ (test-group "Real world examples"
+ (let ((form "4.9406564584124654e-324"))
+ (test-equal '(float (float-integer (base-10 "4"))
+ (float-decimal (base-10 "9406564584124654"))
+ (exponent "-" (base-10 "324")))
+ (lex form))
+ (test-equal 4.9406564584124654e-324 (run form)))
+
+ (let ((form "1.7976931348623157e+308"))
+ (test-equal '(float (float-integer (base-10 "1"))
+ (float-decimal (base-10 "7976931348623157"))
+ (exponent "+" (base-10 "308")))
+ (lex form))
+ (test-equal 1.7976931348623157e+308 (run form))))
+
+ (test-group "Float in infix expression"
+ (test-group "Simple case"
+ (let ((form "1. + .1"))
+ (test-equal '(infix (float (float-integer (base-10 "1")))
+ (operator "+")
+ (float (float-decimal (base-10 "1"))))
+ (lex form))
+ (test-equal '(+ 1.0 0.1) (run form))))
+ ;; (test-group "Complicated case")
+ ))
+
+(test-group "Typecasts"
+
+ (let ((form "(unsigned) 5"))
+ (test-equal '((group (variable "unsigned"))
+ (integer (base-10 "5")))
+ (lex form))
+ (test-equal '(as-type (unsigned) 5)
+ (run form)))
+
+ (let ((form "(unsigned integer) 5"))
+ (test-equal '((group (variable "unsigned")
+ (variable "integer"))
+ (integer (base-10 "5")))
+ (lex form))
+ (test-equal '(as-type (unsigned integer) 5) (run form)))
+
+ (test-group "Pointer with space"
+ (let ((form "(int *) 5"))
+ (test-equal '((group (postfix (variable "int")
+ (postfix-operator "*")))
+ (integer (base-10 "5")))
+ (lex form))
+ (test-equal '(as-type (int *) 5)
+ (run form))))
+
+ (test-group "Pointer without space"
+ (let ((form "(int*) 5"))
+ (test-equal '((group (postfix (variable "int")
+ (postfix-operator "*")))
+ (integer (base-10 "5")))
+ (lex form))
+ (test-equal '(as-type (int *) 5)
+ (run form))))
+
+ (test-group "Multi word type pointer"
+ (let ((form "(unsigned int*) 5"))
+ (test-equal '((group (variable "unsigned")
+ (postfix (variable "int")
+ (postfix-operator "*")))
+ (integer (base-10 "5")))
+ (lex form))
+ (test-equal '(as-type (unsigned int *) 5)
+ (run form))))
+
+ (test-group "Double cast"
+ (let ((form "(int)(unsigned) 5"))
+ (test-equal '((group (variable "int"))
+ (group (variable "unsigned"))
+ (integer (base-10 "5"))) (lex form))
+ (test-equal '(as-type (int) (as-type (unsigned) 5))
+ (run form))))
+
+ (test-group "Cast with operation"
+ (let ((form "(int) 5 + 7"))
+ (test-equal '((group (variable "int"))
+ (infix (integer (base-10 "5"))
+ (operator "+")
+ (integer (base-10 "7"))))
+ (lex form))
+
+ (test-equal '(+ (as-type (int) 5) 7)
+ (run form))))
+
+
+
+ (test-group "Tripple cast, with value inside paranthesis"
+ (let ((form "(type)(__uintptr_t)(const void *)(var)"))
+ (test-equal '((group (variable "type"))
+ (group (variable "__uintptr_t"))
+ (group (variable "const")
+ (postfix (variable "void")
+ (postfix-operator "*")))
+ (group (variable "var")))
+ (lex form))
+ (test-equal '(as-type (type)
+ (as-type (__uintptr_t)
+ (as-type (const void *)
+ var)))
+ (run form))))
+
+ (test-group "Same as above, but whole thing inside parenthesis"
+ (let ((form "((type)(__uintptr_t)(const void *)(var))"))
+ (test-equal '(group (group (variable "type"))
+ (group (variable "__uintptr_t"))
+ (group (variable "const")
+ (postfix (variable "void")
+ (postfix-operator "*")))
+ (group (variable "var")))
+ (lex form))
+ (test-equal '(as-type (type)
+ (as-type (__uintptr_t)
+ (as-type (const void *)
+ var)))
+ (run form))))
+
+ (let ((form "((type)(__uintptr_t)(const volatile void *)(var))"))
+ ;; (test-equal '() (lex form))
+ (test-equal '(as-type (type)
+ (as-type (__uintptr_t)
+ (as-type (const volatile void *)
+ var)))
+ (run form)))
+
+ (let ((form "((unsigned long) ((inout) | (((len) & IOCPARM_MASK) << 16) | ((group) << 8) | (num)))"))
+ (test-equal '(group (group (variable "unsigned") (variable "long"))
+ (group (infix (group (variable "inout"))
+ (operator "|")
+ (infix (group (infix (group (infix (group (variable "len"))
+ (operator "&")
+ (variable "IOCPARM_MASK")))
+ (operator "<<")
+ (integer (base-10 "16"))))
+ (operator "|")
+ (infix (group (infix (group (variable "group"))
+ (operator "<<")
+ (integer (base-10 "8"))))
+ (operator "|")
+ (group (variable "num")))))))
+ (lex form))
+ (test-equal '(as-type (unsigned long)
+ (bitor inout
+ (<< (bitand len IOCPARM_MASK) 16)
+ (<< group 8)
+ num))
+ (run form))))
+
+(test-group "Characters"
+ (let ((form "'c'"))
+ (test-equal '(char "c") (lex form))
+ (test-equal #x63 (run form)))
+
+ (let ((form "'\\n'"))
+ (test-equal '(char (escaped-char "n")) (lex form))
+ (test-equal (char->integer #\newline) (run form))))
+
+(test-group "Strings"
+ (test-group "Empty string"
+ (let ((form "\"\""))
+ (test-equal 'string (lex form))
+ (test-equal #vu8(0) (run form))))
+
+ (test-group "Simple string"
+ (let ((form "\"li\""))
+ (test-equal '(string "li") (lex form))
+ (test-equal #vu8(#x6C #x69 0) (run form))))
+
+ (test-group "Implicit concatenation of strings"
+ (let ((form "\"a\" \"b\""))
+ (test-equal '((string "a")
+ (string "b"))
+ (lex form))
+ (test-equal #vu8(#x61 #x62 0)
+ (run form))))
-(test-equal
- '(+ (post-increment (dereference C)) 3)
- (run "*C++ + 3"))
+ (test-group "Implicit concatenation of string and macro"
+ (let ((form "\"a\" MACRO"))
+ (test-equal '((string "a") (variable "MACRO")) (lex form))
+ (test-equal '() (run form))))
-(test-equal
- '(post-increment (dereference C))
- (run "*C++"))
+ (test-group "String with only escape"
+ (let ((form (string #\" #\\ #\" #\")))
+ (test-equal `(string (escaped-char "\"")) (lex form))
+ (test-equal #vu8(34 0) (run form))))
-(test-equal
- '(+ (post-increment C) (post-increment C))
- (run "C++ + C++"))
+ (test-group "String with escape at start"
+ (let ((form (string #\" #\\ #\" #\a #\")))
+ (test-equal `(string (escaped-char "\"") "a") (lex form))
+ (test-equal #vu8(34 #x61 0) (run form))))
-(test-equal
- '(+ (pre-increment C) (pre-increment C))
- (run "++C + ++C"))
+ (test-group "String with escape at end"
+ (let ((form (string #\" #\a #\\ #\" #\")))
+ (test-equal `(string "a" (escaped-char "\"")) (lex form))
+ (test-equal #vu8(#x61 34 0) (run form))))
-(test-equal '(+ 2 (* 2 2)) (run "2 + 2 * 2"))
+ (test-group "String with escape in middle"
+ (let ((form (string #\" #\a #\\ #\" #\b #\")))
+ (test-equal `(string "a" (escaped-char "\"") "b") (lex form))
+ (test-equal #vu8(#x61 34 #x62 0) (run form))))
-(test-equal '(+ (* 2 2) 2) (run "2 * 2 + 2"))
+ ;; \e is semi non-standard
+ (test-group "String with bakslash-e esacpe"
+ (let ((form "\"\\e\""))
+ (test-equal '(string (escaped-char "e")) (lex form))
+ (test-equal #vu8(#x1b 0) (run form))))
-(test-equal '(+ 2 2 2) (run "2+2+2"))
+ (test-group "String with byte secquence escape"
+ (let ((form "\"\\xf0\\x9f\\x92\\xa9\""))
+ (test-equal '(string (escaped-char (base-16-char "f0"))
+ (escaped-char (base-16-char "9f"))
+ (escaped-char (base-16-char "92"))
+ (escaped-char (base-16-char "a9")))
+ (lex form))
+ (test-equal #vu8(#xf0 #x9f #x92 #xa9 0) (run form)))))
+(test-group "__asm__"
+ (let ((form "__asm__(\".globl \" __XSTRING(sym))"))
+ (test-equal '() (lex form))
+ ;; TODO implicit string concatenation
+ (test-equal '(funcall __asm__
+ (string ".globl ")
+ (funcall __XSTRING sym)) (run form))))
+(let ((form "__attribute__((__aligned__(x)))"))
+ (test-equal '(funcall (variable "__attribute__")
+ (group (group (funcall (variable "__aligned__")
+ (group (variable "x"))))))
+ (lex form))
+ ;; This drops the extra set of parenthesis. Do we care?
+ (test-equal '(funcall __attribute__
+ (funcall __aligned__ x))
+ (run form)))
diff --git a/tests/test/util.scm b/tests/test/util.scm
index 1de96a37..aa37d20c 100644
--- a/tests/test/util.scm
+++ b/tests/test/util.scm
@@ -187,6 +187,25 @@
(test-error 'wrong-type-arg (find-extreme '()))
+;; TODO group-by
+;; TODO split-by
+
+(test-group "Split-by-one-of"
+
+ (test-equal "Empty input"
+ '(()) (split-by-one-of '() '(+)))
+
+ (test-equal "No matching tokens"
+ '((1 + 2)) (split-by-one-of '(1 + 2) '(/)))
+
+ (test-equal "Matching tokens"
+ '((1) (+ 2) (- 3))
+ (split-by-one-of '(1 + 2 - 3) '(+ -)))
+
+ (test-equal "Maching tokens, multiple values in each group"
+ '((1 + 2) (* 3 + 4))
+ (split-by-one-of '(1 + 2 * 3 + 4) '(*))))
+
(call-with-values
(lambda ()
(span-upto