aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorHugo Hörnquist <hugo@lysator.liu.se>2022-06-30 07:07:59 +0200
committerHugo Hörnquist <hugo@lysator.liu.se>2022-07-07 21:17:22 +0200
commit85bb810edc083b0bcc08f76e22d51d1555dc828d (patch)
tree7f686666507e0c97546c98ae9c1da408abddf16f
parentC parser minor cleanup. (diff)
downloadcalp-85bb810edc083b0bcc08f76e22d51d1555dc828d.tar.gz
calp-85bb810edc083b0bcc08f76e22d51d1555dc828d.tar.xz
Add basic c evaluator.
-rw-r--r--module/c/cpp.scm2
-rw-r--r--module/c/eval.scm265
-rw-r--r--module/c/eval/environment.scm34
-rw-r--r--tests/test/cpp.scm66
4 files changed, 352 insertions, 15 deletions
diff --git a/module/c/cpp.scm b/module/c/cpp.scm
index 861b8ee2..aed496f2 100644
--- a/module/c/cpp.scm
+++ b/module/c/cpp.scm
@@ -137,7 +137,7 @@
(define graph* (load-cpp-file header-file))
;; Hack for termios since this symbol isn't defined.
;; (including in the above removed private c symbols)
- (define graph (add-node graph* (cons '_POSIX_VDISABLE #f) '()))
+ (define graph (add-node graph* (cons '_POSIX_VDISABLE 0) '()))
;; TODO expand bodies
;; (remove (compose private-c-symbol? car))
(resolve-dependency-graph graph))
diff --git a/module/c/eval.scm b/module/c/eval.scm
new file mode 100644
index 00000000..67d0075d
--- /dev/null
+++ b/module/c/eval.scm
@@ -0,0 +1,265 @@
+(define-module (c eval)
+ :use-module (hnh util)
+ :use-module (srfi srfi-1)
+ :use-module (srfi srfi-71)
+ :use-module (srfi srfi-88)
+ :use-module (ice-9 match)
+ :use-module (ice-9 curried-definitions)
+ :use-module ((rnrs bytevectors)
+ :select (bytevector?))
+ :use-module ((rnrs arithmetic bitwise)
+ :select (bitwise-not
+ bitwise-and
+ bitwise-ior
+ bitwise-xor
+ bitwise-arithmetic-shift-left
+ bitwise-arithmetic-shift-right))
+ :use-module (c eval environment)
+ :export (c-procedure?
+ procedure-formals
+ procedure-body
+ procedure-arity
+
+ c-eval
+ ))
+
+(define C-TRUE 1)
+(define C-FALSE 0)
+
+(define (boolean->c-boolean bool)
+ (if bool C-TRUE C-FALSE))
+
+(define (c-boolean->boolean bool)
+ (not (zero? bool)))
+
+(define (c-not b)
+ (-> b c-boolean->boolean not boolean->c-boolean))
+
+(define (c-procedure? expr)
+ (and (list? expr)
+ (not (null? expr))
+ (eq? 'lambda (car expr))))
+
+(define* (ensure-c-procedure expr optional: calling-procedure)
+ (unless (c-procedure? expr)
+ (scm-error 'c-eval-error calling-procedure
+ "Value not a procedure: ~s"
+ (list procedure #f))))
+
+(define (procedure-formals procedure)
+ (ensure-c-procedure procedure "procedure-formals")
+ (list-ref procedure 1))
+
+(define (procedure-body procedure)
+ (ensure-c-procedure procedure "procedure-body")
+ (list-ref procedure 2))
+
+(define (procedure-arity procedure)
+ (length (procedure-formals procedure)))
+
+(define (literal? expression)
+ (or (number? expression)
+ (bytevector? expression)))
+
+
+
+;; Internal helper procedures
+
+(define (mod-set operation)
+ (lambda (env slot value)
+ ;; a += b
+ ;; a = a + b
+ (c-eval env `(= ,slot (,operation ,slot ,value)))))
+
+(define (fold-2 proc init lst)
+ (car+cdr
+ (fold (lambda (arg env+done)
+ (let ((env* arg* (proc (car env+done) arg)))
+ (cons* env* arg* (cdr env+done))))
+ init
+ lst)))
+
+;; TODO this disregards
+;; - floating point convertions
+;; - integer truncation
+(define ((simple-infix operation) env . operands)
+ (let ((env done (fold-2 c-eval (cons env '()) operands)))
+ (values env (apply operation (reverse done)))))
+
+(define ((binary-operator proc) env i c)
+ (let ((env* i* (c-eval env i)))
+ (let ((env** c* (c-eval env* c)))
+ (values env** (proc i* c*)))))
+
+
+
+
+;; The order of evaluation for a number of these is undefined, meaning
+;; that any side effects without sequence points is undefined.
+;; However, for many of these I do the sensible thing and evaluate them
+;; from left to right, instead of ignoring all side effects.
+
+;; TODO double check these with a C standard
+
+
+;; Operators have their own namespace. They are called without funcall
+;; in the pseudo-lisp which C is compiled to, and they expand more like
+;; lisp macros, since its up to each operator what to do with its operands.
+;; This to allow assignment and short circuting.
+(define primitives
+ `((and . ,(lambda (env . operands)
+ (let loop ((env env) (operands operands))
+ (if (null? operands)
+ (values env C-TRUE)
+ (let* ((env* result (c-eval env (car operands))))
+ (if (c-boolean->boolean result)
+ (loop env* (cdr operands))
+ (values env* result)))))))
+ (or . ,(lambda (env . operands)
+ (let loop ((env env) (operands operands))
+ (if (null? operands)
+ (values env C-FALSE)
+ (let* ((env* result (c-eval env (car operands))))
+ (if (false? result)
+ (values env* result)
+ (loop env* (cdr operands))))))))
+ (= . ,(lambda (env slot value)
+ ;; TOOD if slot isn't a variable, but a field (or array index)
+ ;; then it needs to be resolved...
+ (let ((env* result (c-eval env value)))
+ (values (env-set! env* slot result)
+ result))))
+ (and_eq ,(mod-set 'bitand)) ; &=
+ (or_eq ,(mod-set 'bitor)) ; |=
+ (xor_eq ,(mod-set 'xor)) ; ^=
+ (+= ,(mod-set '+))
+ (-= ,(mod-set '-))
+ (*= ,(mod-set '*))
+ (/= ,(mod-set '/))
+ (<<= ,(mod-set '<<))
+ (>>= ,(mod-set '>>))
+ (%= ,(mod-set '%))
+ (+ . ,(simple-infix +))
+ (* . ,(simple-infix *))
+ (/ . ,(simple-infix /))
+ (- . ,(lambda (env op . operands)
+ (if (null? operands)
+ (let ((env* value (c-eval env op)))
+ (values env* (- value)))
+ (apply (simple-infix -)
+ env op operands))))
+ (bitor . ,(simple-infix bitwise-ior))
+ (bitand . ,(simple-infix bitwise-and))
+ (xor . ,(simple-infix bitwise-xor))
+ (not_eq . ,(lambda (env a b) (c-eval env `(not (== ,a ,b))))) ; !=
+ (<< . ,(binary-operator bitwise-arithmetic-shift-left))
+ (>> . ,(binary-operator bitwise-arithmetic-shift-right))
+ (< . ,(binary-operator (compose boolean->c-boolean <)))
+ (> . ,(binary-operator (compose boolean->c-boolean >)))
+ ;; this assumes that = handles pointers
+ (== . ,(binary-operator (compose boolean->c-boolean =)))
+ (<= . ,(binary-operator (compose boolean->c-boolean <=)))
+ (>= . ,(binary-operator (compose boolean->c-boolean >=)))
+ (% . ,(binary-operator modulo))
+
+ (not . ,(lambda (env value)
+ (let ((env* result (c-eval env value)))
+ (values env* (c-not result)))))
+ (compl . ,(lambda (env value)
+ (let ((env* result (c-eval env value)))
+ (values env* (bitwise-not result)))))
+
+ ;; ++C
+ (pre-increment . ,(lambda (env slot) (c-eval env `(+= ,slot 1))))
+ (pre-decrement . ,(lambda (env slot) (c-eval env `(-= ,slot 1))))
+ ;; TODO these (C++, C--) need to handle if slot isn't a direct variabl
+ (post-increment . ,(lambda (env slot)
+ (let ((value (env-ref env slot)))
+ (values (env-set! env slot (1+ value))
+ value))))
+ (pre-decrement . ,(lambda (env slot)
+ (let ((value (env-ref env slot)))
+ (values (env-set! env slot (1+ value))
+ value))))
+
+ (ternary . ,(lambda (env test true-clause false-clause)
+ (let ((env* value (c-eval env test)))
+ (c-eval env*
+ (if (c-boolean->boolean value)
+ true-clause false-clause)))))
+
+ ;; TODO remaining operations
+ (as-type . ,(lambda (env target-type value)
+ (format (current-error-port) "cast<~s>(~s)~%" target-type value)
+ (values env value)))
+
+ (object-slot . ,(lambda (env object slot)
+ (scm-error 'not-implemented "object-slot"
+ "Object slots are not implemented, when accessing ~s.~s"
+ (list object slot) #f)))
+ (dereference-slot ,(lambda (env ptr slot)
+ (scm-error 'not-implemented "dereference-slot"
+ "Object slots are not implemented, when accessing ~s->~s"
+ (list object slot) #f)))
+ (dereference . ,(lambda (env ptr)
+ (scm-error 'not-implemented "dereference"
+ "Poiner dereferencing is not implemented: *~s"
+ (list ptr) #f)))
+ (pointer . ,(lambda (env value)
+ (scm-error 'not-implemented "pointer"
+ "Pointer of is not implemented: &~s"
+ (list value) #f)))))
+
+;; TODO |,|
+
+
+(define (c-eval environment expression)
+ (match expression
+ (('lambda formals body) (values environment `(lambda ,formals ,body)))
+ ;; hack since sizeof really should be a operator
+ (('funcall 'sizeof arg)
+ ;; TODO
+ (format (current-error-port) "sizeof ~s~%" arg)
+ (values environment 1))
+
+ (('funcall procedure-name ('#{,}# args ...))
+ (let ((procedure (env-ref environment procedure-name)))
+ (ensure-c-procedure procedure "c-eval")
+ (unless (= (length args) (procedure-arity procedure))
+ (scm-error 'c-eval-error "c-eval"
+ "Procedure arity mismatch for ~s, expected ~s, got ~s"
+ (list procedure-name
+ (procedure-arity procedure)
+ (length args))
+ #f))
+ (let ((env args* (fold-2 c-eval (cons environment '()) args )))
+ (let ((inner-environment
+ (fold (lambda (name value env) (env-set! env name value))
+ (push-frame! env)
+ (procedure-formals procedure) args*)))
+ (let ((resulting-environment
+ result-value
+ (c-eval inner-environment (procedure-body procedure))))
+ (values (pop-frame! resulting-environment)
+ result-value))))))
+
+ (('funcall procedure arg)
+ (c-eval environment `(funcall ,procedure (#{,}# ,arg))))
+
+ ((operator operands ...)
+ (apply (or (assoc-ref primitives operator)
+ (scm-error 'c-eval-error "c-eval"
+ "Applying non-existant primitive operator: ~s, operands: ~s"
+ (list operator operands) #f))
+ environment operands))
+
+ ;; "f()" gets compiled to simply f
+ ;; meaning that we instead use the environment to determine
+ ;; if something is a variable or procedure
+ (expr
+ (if (literal? expr)
+ (values environment expr)
+ (let ((value (env-ref environment expr)))
+ (if (c-procedure? value)
+ (c-eval environment `(funcall ,value (#{,}#)))
+ (values environment value)))))))
diff --git a/module/c/eval/environment.scm b/module/c/eval/environment.scm
new file mode 100644
index 00000000..12eefaf7
--- /dev/null
+++ b/module/c/eval/environment.scm
@@ -0,0 +1,34 @@
+(define-module (c eval environment)
+ :use-module (srfi srfi-1)
+ :export (make-environment
+ env-set! env-ref push-frame! pop-frame!))
+
+(define (make-frame)
+ (make-hash-table))
+
+(define (make-environment)
+ (list (make-frame)))
+
+;; Returns an updated environment, linear update
+(define (env-set! env key value)
+ ;; get handle to differentiate #f
+ ;; (even though #f should never be stored since it's not a C value)
+ (cond ((find (lambda (frame) (hashq-get-handle frame key)) env)
+ => (lambda (frame) (hashq-set! frame key value)))
+ (else (hashq-set! (car env) key value)))
+ env)
+
+(define (env-ref env key)
+ (cond ((null? env)
+ (scm-error 'misc-error "env-ref"
+ "~s unbound"
+ (list key)
+ #f))
+ ((hashq-get-handle (car env) key) => cdr)
+ (else (env-ref (cdr env) key))))
+
+(define (push-frame! environment)
+ (cons (make-frame) environment))
+
+(define (pop-frame! environment)
+ (cdr environment))
diff --git a/tests/test/cpp.scm b/tests/test/cpp.scm
index fa767fe3..1294bc96 100644
--- a/tests/test/cpp.scm
+++ b/tests/test/cpp.scm
@@ -3,10 +3,15 @@
;;; Code:
(define-module (test cpp)
+ :use-module (srfi srfi-1)
:use-module (srfi srfi-64)
:use-module (srfi srfi-88)
:use-module ((c lex) :select (lex))
- :use-module ((c parse) :select (parse-lexeme-tree)))
+ :use-module ((c parse) :select (parse-lexeme-tree))
+ :use-module ((c eval) :select (c-eval))
+ :use-module ((c eval environment) :select (make-environment env-set!))
+ :use-module ((rnrs arithmetic bitwise)
+ :select (bitwise-xor)))
;; Note that the lexer's output isn't stable.
;; The tests here are more to see where the lexer succeeds but the parser fails.
@@ -31,6 +36,18 @@
(define run (compose parse-lexeme-tree lex))
+(define (alist->environment alist)
+ (fold (lambda (pair env)
+ (apply env-set! env pair))
+ (make-environment)
+ alist))
+
+(define (exec form . base-bindings)
+ (call-with-values
+ (lambda () (c-eval (alist->environment base-bindings)
+ (run form)))
+ (lambda (env value) value)))
+
(define-syntax let-group
(syntax-rules ()
((let ((form name) rest ...) body ...)
@@ -100,7 +117,8 @@
(operator "*")
(integer (base-10 "2"))))
(lex form))
- (test-equal '(+ 2 (* 2 2)) (run form)))
+ (test-equal '(+ 2 (* 2 2)) (run form))
+ (test-equal 6 (exec form)))
(let-group
((form "2 * 2 + 2"))
@@ -110,7 +128,8 @@
(operator "+")
(integer (base-10 "2"))))
(lex form))
- (test-equal '(+ (* 2 2) 2) (run form)))
+ (test-equal '(+ (* 2 2) 2) (run form))
+ (test-equal 6 (exec form)))
(let-group
((form "2+2+2"))
@@ -119,7 +138,8 @@
(infix (integer (base-10 "2"))
(operator "+")
(integer (base-10 "2")))) (lex form))
- (test-equal '(+ 2 2 2) (run form)))
+ (test-equal '(+ 2 2 2) (run form))
+ (test-equal 6 (exec form)))
(test-group "Unary minus"
(test-group "Without space"
@@ -127,21 +147,24 @@
(test-equal '(prefix (prefix-operator "-")
(integer (base-10 "1")))
(lex form))
- (test-equal '(- 1) (run form))))
+ (test-equal '(- 1) (run form))
+ (test-equal -1 (exec form))))
(test-group "With space"
(let ((form "- 1"))
(test-equal '(prefix (prefix-operator "-")
(integer (base-10 "1")))
(lex form))
- (test-equal '(- 1) (run form))))
+ (test-equal '(- 1) (run form))
+ (test-equal -1 (exec form))))
(test-group "Before variable"
(let ((form "-x"))
(test-equal '(prefix (prefix-operator "-")
(variable "x"))
(lex form))
- (test-equal '(- x) (run form))))
+ (test-equal '(- x) (run form))
+ (test-equal -5 (exec form '(x 5)))))
(test-group "Before infix"
(let ((form "-x+3"))
@@ -150,8 +173,8 @@
(operator "+")
(integer (base-10 "3")))
(lex form))
- (test-equal '(+ (- x) 3)
- (run form))))
+ (test-equal '(+ (- x) 3) (run form))
+ (test-equal -2 (exec form '(x 5)))))
(test-group "Inside infix expression"
(let ((form "x+-3"))
@@ -160,7 +183,8 @@
(prefix (prefix-operator "-")
(integer (base-10 "3"))))
(lex form))
- (test-equal '(+ x (- 3)) (run form))))
+ (test-equal '(+ x (- 3)) (run form))
+ (test-equal 2 (exec form '(x 5)))))
)
@@ -180,7 +204,8 @@
(let ((form "1000000U"))
(test-equal '(integer (base-10 "1000000") (integer-suffix "U")) (lex form))
- (test-equal '(as-type (unsigned) 1000000) (run form)))
+ (test-equal '(as-type (unsigned) 1000000) (run form))
+ (test-equal 1000000 (exec form)))
(let ((form "0x10c"))
@@ -197,17 +222,30 @@
(operator "!=")
(variable "b"))
(lex form))
- (test-equal '(not_eq a b) (run form)))
+ (test-equal '(not_eq a b) (run form))
+ (test-equal 1 (exec form '(a 1) '(b 2)))
+ (test-equal 0 (exec form '(a 1) '(b 1)))
+ )
(let ((form "((c) == (val) && (val) != _POSIX_VDISABLE)"))
;; (test-equal '() (lex form))
(test-equal '(and (== c val)
(not_eq val _POSIX_VDISABLE))
- (run form)))
+ (run form))
+ (test-equal 0 (exec form '(c 1) '(val 2) '(_POSIX_VDISABLE 3)))
+ )
(let ((form "CTRL('O')"))
(test-equal '(funcall (variable "CTRL") (group (char "O"))) (lex form))
- (test-equal '(funcall CTRL 79) (run form)))
+ (test-equal '(funcall CTRL 79) (run form))
+ (test-equal (bitwise-xor #x40 (char->integer #\O))
+ (exec form
+ ;; Definition copied from our parsers output of
+ ;; preprocessing output as defined above
+ '(CTRL (lambda (x)
+ (ternary (and (>= x 97) (<= x 122))
+ (+ (- x 97) 1)
+ (bitand (+ (- x 65) 1) 127)))))))
(let ((form "CREPRINT"))
(test-equal '(variable "CREPRINT") (lex form))