diff options
Diffstat (limited to '')
30 files changed, 3791 insertions, 172 deletions
@@ -14,7 +14,153 @@ GUILE_VERSION=$(shell $(GUILE) -c '(display (version))') GUILE_SITE_DIR=$(shell $(GUILE) -c "(display (%site-dir))") GUILE_CCACHE_DIR=$(shell $(GUILE) -c "(display (%site-ccache-dir))") -SCM_FILES = $(shell find module/ -type f -name \*.scm) +not_scm_files = \ + c/operators \ + c/preprocessor \ + c/cpp \ + c/parse \ + c/lex \ + c/eval \ + c/eval/environment \ + zzz_sentinel + + +scm_files = \ + c/cpp-environment \ + c/cpp-environment/object-like-macro \ + c/cpp-environment/internal-macro \ + c/cpp-environment/function-like-macro \ + c/cpp \ + c/eval2 \ + c/lex2 \ + c/line-fold \ + c/preprocessor2 \ + c/trigraph \ + hnh/util \ + hnh/util/object \ + hnh/util/path + +# scm_files = \ + base64 \ + c/compiler \ + c/cpp-environment \ + c/cpp-environment/function-like-macro \ + c/cpp-environment/internal-macro \ + c/cpp-environment/object-like-macro \ + c/eval2 \ + c/lex2 \ + c/line-fold \ + c/preprocessor2 \ + c/trigraph \ + calp \ + calp/benchmark/parse \ + calp/config-base \ + calp/entry-points/benchmark \ + calp/entry-points/convert \ + calp/entry-points/html \ + calp/entry-points/ical \ + calp/entry-points/import \ + calp/entry-points/server \ + calp/entry-points/terminal \ + calp/entry-points/text \ + calp/entry-points/tidsrapport \ + calp/entry-points/update-zoneinfo \ + calp/html/caltable \ + calp/html/components \ + calp/html/config \ + calp/html/filter \ + calp/html/util \ + calp/html/vcomponent \ + calp/html/view/calendar \ + calp/html/view/calendar/month \ + calp/html/view/calendar/shared \ + calp/html/view/calendar/week \ + calp/html/view/search \ + calp/html/view/small-calendar \ + calp/main \ + calp/repl \ + calp/server/routes \ + calp/server/server \ + calp/terminal \ + calp/translation \ + calp/util/config \ + calp/util/exceptions \ + calp/util/hooks \ + calp/util/time \ + crypto \ + datetime \ + datetime/instance \ + datetime/timespec \ + datetime/zic \ + glob \ + hnh/util \ + hnh/util/env \ + hnh/util/exceptions \ + hnh/util/graph \ + hnh/util/io \ + hnh/util/language \ + hnh/util/lens \ + hnh/util/object \ + hnh/util/options \ + hnh/util/path \ + hnh/util/tree \ + hnh/util/uuid \ + srfi/srfi-41/util \ + srfi/srfi-64/test-error \ + srfi/srfi-64/util \ + sxml/html \ + sxml/namespace \ + sxml/transformations \ + text/flow \ + text/markup \ + text/numbers \ + text/numbers/en \ + text/numbers/sv \ + text/util \ + vcomponent \ + vcomponent/base \ + vcomponent/config \ + vcomponent/control \ + vcomponent/datetime \ + vcomponent/datetime/output \ + vcomponent/duration \ + vcomponent/formats/common/types \ + vcomponent/formats/ical/output \ + vcomponent/formats/ical/parse \ + vcomponent/formats/ical/types \ + vcomponent/formats/vdir/parse \ + vcomponent/formats/vdir/save-delete \ + vcomponent/formats/xcal/output \ + vcomponent/formats/xcal/parse \ + vcomponent/formats/xcal/types \ + vcomponent/geo \ + vcomponent/recurrence \ + vcomponent/recurrence/display \ + vcomponent/recurrence/display/common \ + vcomponent/recurrence/display/en \ + vcomponent/recurrence/display/sv \ + vcomponent/recurrence/generate \ + vcomponent/recurrence/internal \ + vcomponent/recurrence/parse \ + vcomponent/util/control \ + vcomponent/util/describe \ + vcomponent/util/group \ + vcomponent/util/instance \ + vcomponent/util/instance/methods \ + vcomponent/util/parse-cal-path \ + vcomponent/util/search \ + vulgar \ + vulgar/color \ + vulgar/components \ + vulgar/info \ + vulgar/termios \ + web/http/make-routes \ + web/query \ + web/uri-query \ + xdg/basedir + +SCM_FILES = $(scm_files:%=module/%.scm) + GO_FILES = $(SCM_FILES:module/%.scm=obj-$(GUILE_VERSION)/%.go) GUILE_ENV = GUILE_LOAD_PATH=$(PWD)/module \ diff --git a/doc/ref/guile/util-path.texi b/doc/ref/guile/util-path.texi index 322c50ec..2a53ba91 100644 --- a/doc/ref/guile/util-path.texi +++ b/doc/ref/guile/util-path.texi @@ -3,7 +3,10 @@ Provided by the module @code{(hnh util path)}. -See also @code{absolute-file-name?} from Guile. + +@defun path-absolute? string +Alias of @code{absolute-file-name?} from Guile. +@end defun @defun path-append strings ... Joins all strings into a path, squeezing duplicated delimiters, but diff --git a/doc/ref/guile/util.texi b/doc/ref/guile/util.texi index 32df5fce..222b59c5 100644 --- a/doc/ref/guile/util.texi +++ b/doc/ref/guile/util.texi @@ -198,6 +198,27 @@ Split a list into sub-lists on @var{element} @end lisp @end defun +@defun split-by-one-of lst items +Like split-by, but takes a list of delimiters. +Returns a list where the first element is everything before the first +delimiter, and the remaining elements is the splitting delimiter +consed with everything until the next delimiter. + +@lisp +(split-by-one-of '() '(+))) +⇒ (()) + +(split-by-one-of '(1 + 2) '(/)) +⇒ ((1 + 2)) + +(split-by-one-of '(1 + 2 - 3) '(+ -)) +⇒ ((1) (+ 2) (- 3)) + +(split-by-one-of '(1 + 2 * 3 + 4) '(*)) +⇒ ((1 + 2) (* 3 + 4)) +@end lisp +@end defun + @defun span-upto count predicate list Simar to span from srfi-1, but never takes more than diff --git a/module/c/compiler.scm b/module/c/compiler.scm new file mode 100644 index 00000000..801c3752 --- /dev/null +++ b/module/c/compiler.scm @@ -0,0 +1,66 @@ +(define-module (c compiler) + :use-module ((c lex2) :select (lex)) + :use-module ((c trigraph) :select (replace-trigraphs)) + :use-module ((c line-fold) :select (fold-lines)) + :use-module (c cpp-environment object-like-macro) + :use-module ((c cpp-environment) + :select (make-environment + extend-environment + enter-file)) + :use-module (hnh util) + ;; TODO importort + ;; handle-preprocessing-tokens + ;; load-and-tokenize-file + :export (run-compiler)) + +" +#define __STDC__ 1 +#define __STDC_HOSTED__ 1 +#define __STDC_VERSION__ 201112L +" + +(define now (localtime (current-time))) +(define default-macros + (list + ;; 6.10.8 + (object-like-macro + identifier: "__STDC__" + body: '(preprocessing-token (pp-number "1"))) + (object-like-macro + identifier: "__STDC_HOSTED__" + body: '(preprocessing-token (pp-number "1"))) + (object-like-macro + identifier: "__STDC_VERSION__" + body: '(preprocessing-token (pp-number "201112L"))) + (object-like-macro + identifier: "__DATE__" + ;; TODO format should always be in + ;; english, and not tranlated + body: `(preprocessing-token (string-literal ,(strftime "%b %_d %Y" now)))) + (object-like-macro + identifier: "__TIME__" + body: `(preprocessing-token + (string-literal + ,(strftime "%H:%M:%S" now)))))) + +(define environment + (-> (make-environment) + (extend-environment default-macros))) + + + +;;; 5.1.11.2 Translation phases + + + +(define (run-compiler path) + (define environment (enter-file (make-environment) path)) + (-> (load-and-tokenize-file path) + (handle-preprocessing-tokens environment)) +;;; 5. (something with character sets) +;;; 6. concatenation of string literals +;;; 7. Whitespace tokens are discarded, each preprocessing token is converted into a token + ;; 6.4 paragraph 2 + ;; Each preprocessing toket thas is converted to a token shall have the lexcal form of a keyword, an identifier, a constant, a string literal, or a puncturtor +;;; 8. All external objects and functions are resolved + ) diff --git a/module/c/cpp-environment.scm b/module/c/cpp-environment.scm new file mode 100644 index 00000000..d6c86f7a --- /dev/null +++ b/module/c/cpp-environment.scm @@ -0,0 +1,144 @@ +(define-module (c cpp-environment) + :use-module (srfi srfi-1) + :use-module (srfi srfi-88) + :use-module (ice-9 hash-table) + :use-module (hnh util object) + :use-module (hnh util type) + :use-module (hnh util lens) + :use-module ((c cpp-environment function-like-macro) :prefix #{fun:}#) + :use-module ((c cpp-environment object-like-macro) :prefix #{obj:}#) + :use-module ((c cpp-environment internal-macro) :prefix #{int:}#) + :export ( + + macro-identifier + macro-body + macro-identifier-list + macro-variadic? + macro? + + enter-active-if + enter-inactive-if + leave-if + + enter-file + leave-file + bump-line + current-line + current-file + + function-macro? + object-macro? + internal-macro? + + cpp-environment + cpp-if-status cpp-variables + + make-environment in-environment? + remove-identifier! add-identifier! + get-identifier + extend-environment + + )) + +(define (macro-identifier x) + (define identifier + (cond ((obj:object-like-macro? x) obj:identifier) + ((fun:function-like-macro? x) fun:identifier) + ((int:internal-macro? x) int:identifier) + (else (scm-error 'wrong-type-arg "macro-identifier" + "Not a macro: ~s" + (list x) #f)))) + (identifier x)) + + +(define (macro-body macro) + (define body-proc + (cond ((obj:object-like-macro? macro) obj:body) + ((fun:function-like-macro? macro) fun:body) + ((int:internal-macro? macro) int:body) + (else (scm-error 'wrong-type-arg "macro-body" + "Not a macro: ~s" + (list macro) #f)))) + (body-proc macro)) + +(define macro-identifier-list fun:identifier-list) +(define macro-variadic? fun:variadic?) + +(define function-macro? fun:function-like-macro?) +(define object-macro? obj:object-like-macro?) +(define internal-macro? int:internal-macro?) + +(define (macro? x) + (or (obj:object-like-macro? x) + (fun:function-like-macro? x) + (int:internal-macro? x))) + +(define-type (cpp-environment) + (cpp-if-status type: (list-of (memv '(outside active-if inactive-if))) + default: '(outside)) + (cpp-variables type: hash-table? default: (make-hash-table)) + (cpp-file-stack type: (and ((negate null?)) + (list-of (pair-of string? exact-integer?))) + default: '(("*outside*" . 1)))) + + + +(define (enter-active-if environment) + (modify environment cpp-if-status xcons 'active-if)) + +(define (enter-inactive-if environment) + (modify environment cpp-if-status xcons 'inactive-if)) + +(define (leave-if environment) + (modify environment cpp-if-status cdr)) + + + +(define (enter-file environment filename) + (modify environment cpp-file-stack xcons (cons filename 1))) + +(define (leave-file environment) + (modify environment cpp-file-stack cdr)) + +(define current-line (compose-lenses cpp-file-stack car* cdr*)) + +(define current-file (compose-lenses cpp-file-stack car* car*)) + +(define* (bump-line environment optional: (count 1)) + (modify environment current-line + count)) + + + + +(define (make-environment) (cpp-environment)) + +(define (in-envirnoment? environment key) + (hash-get-handle (cpp-variables environment) key)) + +(define (remove-identifier! environment key) + (hash-remove! (cpp-variables environment) key) + environment) + +(define (add-identifier! environment key value) + (unless (string? key) + (scm-error 'wrong-type-arg "add-identifier!" + "Key must be a string, got: ~s" + (list key) #f)) + (unless (macro? value) + (scm-error 'wrong-type-arg "add-identifier!" + "Value must be a macro, got: ~s" + (list value) #f)) + (hash-set! (cpp-variables environment) key value) + environment) + +(define (get-identifier environment key) + (hash-ref (cpp-variables environment) key)) + +(define (clone-hash-table ht) + (alist->hash-table (hash-map->list cons ht))) + +(define (extend-environment environment macros) + (let ((env (modify environment cpp-variables clone-hash-table))) + (fold (lambda (m env) (add-identifier! env (macro-identifier m) m)) + env macros))) + diff --git a/module/c/cpp-environment/function-like-macro.scm b/module/c/cpp-environment/function-like-macro.scm new file mode 100644 index 00000000..26512439 --- /dev/null +++ b/module/c/cpp-environment/function-like-macro.scm @@ -0,0 +1,18 @@ +(define-module (c cpp-environment function-like-macro) + :use-module (hnh util object) + :use-module (hnh util type) + :export (function-like-macro + function-like-macro? + identifier + identifier-list + body + variadic?)) + +(define-type (function-like-macro) + (identifier type: string?) + (identifier-list type: (list-of string?)) + ;; TODO import these + (body type: list? ; (list-of (or whitespace-token? preprocessing-token?)) + ) + (variadic? type: boolean? + default: #f)) diff --git a/module/c/cpp-environment/internal-macro.scm b/module/c/cpp-environment/internal-macro.scm new file mode 100644 index 00000000..3c946738 --- /dev/null +++ b/module/c/cpp-environment/internal-macro.scm @@ -0,0 +1,11 @@ +(define-module (c cpp-environment internal-macro) + :use-module (hnh util object) + :export (internal-macro + internal-macro? + identifier body)) + +(define-type (internal-macro) + (identifier type: string?) + (body type: procedure? + ;; Arity 2 + )) diff --git a/module/c/cpp-environment/object-like-macro.scm b/module/c/cpp-environment/object-like-macro.scm new file mode 100644 index 00000000..5d4c8810 --- /dev/null +++ b/module/c/cpp-environment/object-like-macro.scm @@ -0,0 +1,13 @@ +(define-module (c cpp-environment object-like-macro) + :use-module (hnh util object) + :export (object-like-macro + object-like-macro? + identifier + body)) + + +(define-type (object-like-macro) + (identifier type: string?) + ;; TODO import these + (body type: list? ; (list-of (or whitespace-token? preprocessing-token?)) + )) diff --git a/module/c/cpp.scm b/module/c/cpp.scm index a2935352..aed496f2 100644 --- a/module/c/cpp.scm +++ b/module/c/cpp.scm @@ -5,47 +5,37 @@ :use-module (ice-9 match) :use-module (ice-9 regex) :use-module ((rnrs io ports) :select (call-with-port)) + :use-module ((rnrs bytevectors) :select (bytevector?)) :use-module (ice-9 format) :use-module ((hnh util io) :select (read-lines)) :use-module (hnh util graph) :use-module (c lex) :use-module (c parse) :use-module (c operators) - :export (do-funcall replace-symbols include#) + :export (replace-symbols include#) ) ;; input "#define F(x, y) x + y" -;; 1 full define | F(x, y) +;; 1 full define | F(x,y) ;; 2 macro name | F -;; 3 macro args | (x, y) -;; 4 macro body | x + y -(define define-re (make-regexp "^#define ((\\w+)(\\([^)]*\\))?) (.*)")) +;; 3 macro args | (x,y) +;; 5 macro body | x + y or #f +(define define-re (make-regexp "^#define ((\\w+)([(][^)]*[)])?)( (.*))?")) (define (tokenize-define-line header-line) (aif (regexp-exec define-re header-line) (cons (match:substring it 1) - (match:substring it 4)) + (let ((body (match:substring it 5))) + (if (or (eqv? body #f) + (string-null? body)) + "1" body))) (scm-error 'c-parse-error "tokenize-define-line" "Line dosen't match: ~s" (list header-line) #f))) -(define (do-funcall function arguments) - (if (list? arguments) - (apply function arguments) - (function arguments))) - -(define symb-map - `((,(symbol #\|) . logior) - (funcall . (@ (c cpp) do-funcall)) - (&& . and) - (& . logand) - (== . =) - (!= . (negate =)) - )) - (define (replace-symbols tree dict) (if (not (list? tree)) (or (assoc-ref dict tree) tree) @@ -55,12 +45,27 @@ ;; Direct values. Lisp also has quoted symbols in this group. (define (immediate? x) (or (number? x) - (char? x) - (string? x))) + (bytevector? x))) +;; TODO replace this with something sensible +;; like a correct list extracted from (c eval) +;; and not thinging that types are variables ;; built in symbols. Should never be marked as dependencies (define (primitive? x) - (memv x (cons 'funcall binary-operators))) + (memv x `( + ;; language primitives + sizeof + + ;; special forms introduced by parser + funcall ternary struct-type as-type + + ;; unary operatons which aren't also binary operators + ++ -- ! ~ + not compl dereference pointer + pre-increment pre-decrement + post-increment post-decrement + ,@binary-operators + ))) @@ -77,7 +82,6 @@ [arg (list arg)])) (define right (f (cdr pair))) - (define alt-right (replace-symbols right symb-map)) (define dependencies (lset-difference eq? @@ -91,12 +95,12 @@ dependencies (match left [('funcall name ('#{,}# args ...)) - (cons name `(lambda ,args ,alt-right))] + (cons name `(lambda ,args ,right))] [('funcall name arg) - (cons name `(lambda (,arg) ,alt-right))] + (cons name `(lambda (,arg) ,right))] - [name (cons name alt-right)]))) + [name (cons name right)]))) (define (parse-cpp-file lines) @@ -104,7 +108,9 @@ (catch #t (lambda () (parse-cpp-define line)) (lambda (err caller fmt args data) - (format #t "~a ~?~%" fmt args) + (format #t "~a in ~a: ~?~%" + err caller fmt args) + (format #t "~s~%" line) #f))) lines)) @@ -114,29 +120,32 @@ (define (tokenize-header-file header-file) (map tokenize-define-line (call-with-port - (open-input-pipe - (string-append "cpp -dM " header-file)) + (open-pipe* OPEN_READ "cpp" "-dM" header-file) read-lines))) -(define-macro (include# header-file . args) - - (define define-form (if (null? args) 'define (car args))) - - (define lines (remove (compose private-c-symbol? car) - (tokenize-header-file header-file))) +(define (load-cpp-file header-file) + (define lines (tokenize-header-file header-file)) (define forms (parse-cpp-file lines)) - (define graph* - (fold (lambda (node graph) - (add-node graph (cdr node) (car node))) - (make-graph car) - (filter identity forms))) + (fold (lambda (node graph) + (add-node graph (cdr node) (car node))) + (make-graph car) + (filter identity forms))) +(define (include% header-file) + (define graph* (load-cpp-file header-file)) ;; Hack for termios since this symbol isn't defined. ;; (including in the above removed private c symbols) - (define graph (add-node graph* (cons '_POSIX_VDISABLE #f) '())) + (define graph (add-node graph* (cons '_POSIX_VDISABLE 0) '())) + ;; TODO expand bodies + ;; (remove (compose private-c-symbol? car)) + (resolve-dependency-graph graph)) + +(define-macro (include# header-file . args) + + (define define-form (if (null? args) 'define (car args))) `(begin ,@(map (lambda (pair) `(,define-form ,(car pair) ,(cdr pair))) - (resolve-dependency-graph graph)))) + (include% header-file)))) diff --git a/module/c/eval.scm b/module/c/eval.scm new file mode 100644 index 00000000..67d0075d --- /dev/null +++ b/module/c/eval.scm @@ -0,0 +1,265 @@ +(define-module (c eval) + :use-module (hnh util) + :use-module (srfi srfi-1) + :use-module (srfi srfi-71) + :use-module (srfi srfi-88) + :use-module (ice-9 match) + :use-module (ice-9 curried-definitions) + :use-module ((rnrs bytevectors) + :select (bytevector?)) + :use-module ((rnrs arithmetic bitwise) + :select (bitwise-not + bitwise-and + bitwise-ior + bitwise-xor + bitwise-arithmetic-shift-left + bitwise-arithmetic-shift-right)) + :use-module (c eval environment) + :export (c-procedure? + procedure-formals + procedure-body + procedure-arity + + c-eval + )) + +(define C-TRUE 1) +(define C-FALSE 0) + +(define (boolean->c-boolean bool) + (if bool C-TRUE C-FALSE)) + +(define (c-boolean->boolean bool) + (not (zero? bool))) + +(define (c-not b) + (-> b c-boolean->boolean not boolean->c-boolean)) + +(define (c-procedure? expr) + (and (list? expr) + (not (null? expr)) + (eq? 'lambda (car expr)))) + +(define* (ensure-c-procedure expr optional: calling-procedure) + (unless (c-procedure? expr) + (scm-error 'c-eval-error calling-procedure + "Value not a procedure: ~s" + (list procedure #f)))) + +(define (procedure-formals procedure) + (ensure-c-procedure procedure "procedure-formals") + (list-ref procedure 1)) + +(define (procedure-body procedure) + (ensure-c-procedure procedure "procedure-body") + (list-ref procedure 2)) + +(define (procedure-arity procedure) + (length (procedure-formals procedure))) + +(define (literal? expression) + (or (number? expression) + (bytevector? expression))) + + + +;; Internal helper procedures + +(define (mod-set operation) + (lambda (env slot value) + ;; a += b + ;; a = a + b + (c-eval env `(= ,slot (,operation ,slot ,value))))) + +(define (fold-2 proc init lst) + (car+cdr + (fold (lambda (arg env+done) + (let ((env* arg* (proc (car env+done) arg))) + (cons* env* arg* (cdr env+done)))) + init + lst))) + +;; TODO this disregards +;; - floating point convertions +;; - integer truncation +(define ((simple-infix operation) env . operands) + (let ((env done (fold-2 c-eval (cons env '()) operands))) + (values env (apply operation (reverse done))))) + +(define ((binary-operator proc) env i c) + (let ((env* i* (c-eval env i))) + (let ((env** c* (c-eval env* c))) + (values env** (proc i* c*))))) + + + + +;; The order of evaluation for a number of these is undefined, meaning +;; that any side effects without sequence points is undefined. +;; However, for many of these I do the sensible thing and evaluate them +;; from left to right, instead of ignoring all side effects. + +;; TODO double check these with a C standard + + +;; Operators have their own namespace. They are called without funcall +;; in the pseudo-lisp which C is compiled to, and they expand more like +;; lisp macros, since its up to each operator what to do with its operands. +;; This to allow assignment and short circuting. +(define primitives + `((and . ,(lambda (env . operands) + (let loop ((env env) (operands operands)) + (if (null? operands) + (values env C-TRUE) + (let* ((env* result (c-eval env (car operands)))) + (if (c-boolean->boolean result) + (loop env* (cdr operands)) + (values env* result))))))) + (or . ,(lambda (env . operands) + (let loop ((env env) (operands operands)) + (if (null? operands) + (values env C-FALSE) + (let* ((env* result (c-eval env (car operands)))) + (if (false? result) + (values env* result) + (loop env* (cdr operands)))))))) + (= . ,(lambda (env slot value) + ;; TOOD if slot isn't a variable, but a field (or array index) + ;; then it needs to be resolved... + (let ((env* result (c-eval env value))) + (values (env-set! env* slot result) + result)))) + (and_eq ,(mod-set 'bitand)) ; &= + (or_eq ,(mod-set 'bitor)) ; |= + (xor_eq ,(mod-set 'xor)) ; ^= + (+= ,(mod-set '+)) + (-= ,(mod-set '-)) + (*= ,(mod-set '*)) + (/= ,(mod-set '/)) + (<<= ,(mod-set '<<)) + (>>= ,(mod-set '>>)) + (%= ,(mod-set '%)) + (+ . ,(simple-infix +)) + (* . ,(simple-infix *)) + (/ . ,(simple-infix /)) + (- . ,(lambda (env op . operands) + (if (null? operands) + (let ((env* value (c-eval env op))) + (values env* (- value))) + (apply (simple-infix -) + env op operands)))) + (bitor . ,(simple-infix bitwise-ior)) + (bitand . ,(simple-infix bitwise-and)) + (xor . ,(simple-infix bitwise-xor)) + (not_eq . ,(lambda (env a b) (c-eval env `(not (== ,a ,b))))) ; != + (<< . ,(binary-operator bitwise-arithmetic-shift-left)) + (>> . ,(binary-operator bitwise-arithmetic-shift-right)) + (< . ,(binary-operator (compose boolean->c-boolean <))) + (> . ,(binary-operator (compose boolean->c-boolean >))) + ;; this assumes that = handles pointers + (== . ,(binary-operator (compose boolean->c-boolean =))) + (<= . ,(binary-operator (compose boolean->c-boolean <=))) + (>= . ,(binary-operator (compose boolean->c-boolean >=))) + (% . ,(binary-operator modulo)) + + (not . ,(lambda (env value) + (let ((env* result (c-eval env value))) + (values env* (c-not result))))) + (compl . ,(lambda (env value) + (let ((env* result (c-eval env value))) + (values env* (bitwise-not result))))) + + ;; ++C + (pre-increment . ,(lambda (env slot) (c-eval env `(+= ,slot 1)))) + (pre-decrement . ,(lambda (env slot) (c-eval env `(-= ,slot 1)))) + ;; TODO these (C++, C--) need to handle if slot isn't a direct variabl + (post-increment . ,(lambda (env slot) + (let ((value (env-ref env slot))) + (values (env-set! env slot (1+ value)) + value)))) + (pre-decrement . ,(lambda (env slot) + (let ((value (env-ref env slot))) + (values (env-set! env slot (1+ value)) + value)))) + + (ternary . ,(lambda (env test true-clause false-clause) + (let ((env* value (c-eval env test))) + (c-eval env* + (if (c-boolean->boolean value) + true-clause false-clause))))) + + ;; TODO remaining operations + (as-type . ,(lambda (env target-type value) + (format (current-error-port) "cast<~s>(~s)~%" target-type value) + (values env value))) + + (object-slot . ,(lambda (env object slot) + (scm-error 'not-implemented "object-slot" + "Object slots are not implemented, when accessing ~s.~s" + (list object slot) #f))) + (dereference-slot ,(lambda (env ptr slot) + (scm-error 'not-implemented "dereference-slot" + "Object slots are not implemented, when accessing ~s->~s" + (list object slot) #f))) + (dereference . ,(lambda (env ptr) + (scm-error 'not-implemented "dereference" + "Poiner dereferencing is not implemented: *~s" + (list ptr) #f))) + (pointer . ,(lambda (env value) + (scm-error 'not-implemented "pointer" + "Pointer of is not implemented: &~s" + (list value) #f))))) + +;; TODO |,| + + +(define (c-eval environment expression) + (match expression + (('lambda formals body) (values environment `(lambda ,formals ,body))) + ;; hack since sizeof really should be a operator + (('funcall 'sizeof arg) + ;; TODO + (format (current-error-port) "sizeof ~s~%" arg) + (values environment 1)) + + (('funcall procedure-name ('#{,}# args ...)) + (let ((procedure (env-ref environment procedure-name))) + (ensure-c-procedure procedure "c-eval") + (unless (= (length args) (procedure-arity procedure)) + (scm-error 'c-eval-error "c-eval" + "Procedure arity mismatch for ~s, expected ~s, got ~s" + (list procedure-name + (procedure-arity procedure) + (length args)) + #f)) + (let ((env args* (fold-2 c-eval (cons environment '()) args ))) + (let ((inner-environment + (fold (lambda (name value env) (env-set! env name value)) + (push-frame! env) + (procedure-formals procedure) args*))) + (let ((resulting-environment + result-value + (c-eval inner-environment (procedure-body procedure)))) + (values (pop-frame! resulting-environment) + result-value)))))) + + (('funcall procedure arg) + (c-eval environment `(funcall ,procedure (#{,}# ,arg)))) + + ((operator operands ...) + (apply (or (assoc-ref primitives operator) + (scm-error 'c-eval-error "c-eval" + "Applying non-existant primitive operator: ~s, operands: ~s" + (list operator operands) #f)) + environment operands)) + + ;; "f()" gets compiled to simply f + ;; meaning that we instead use the environment to determine + ;; if something is a variable or procedure + (expr + (if (literal? expr) + (values environment expr) + (let ((value (env-ref environment expr))) + (if (c-procedure? value) + (c-eval environment `(funcall ,value (#{,}#))) + (values environment value))))))) diff --git a/module/c/eval/environment.scm b/module/c/eval/environment.scm new file mode 100644 index 00000000..12eefaf7 --- /dev/null +++ b/module/c/eval/environment.scm @@ -0,0 +1,34 @@ +(define-module (c eval environment) + :use-module (srfi srfi-1) + :export (make-environment + env-set! env-ref push-frame! pop-frame!)) + +(define (make-frame) + (make-hash-table)) + +(define (make-environment) + (list (make-frame))) + +;; Returns an updated environment, linear update +(define (env-set! env key value) + ;; get handle to differentiate #f + ;; (even though #f should never be stored since it's not a C value) + (cond ((find (lambda (frame) (hashq-get-handle frame key)) env) + => (lambda (frame) (hashq-set! frame key value))) + (else (hashq-set! (car env) key value))) + env) + +(define (env-ref env key) + (cond ((null? env) + (scm-error 'misc-error "env-ref" + "~s unbound" + (list key) + #f)) + ((hashq-get-handle (car env) key) => cdr) + (else (env-ref (cdr env) key)))) + +(define (push-frame! environment) + (cons (make-frame) environment)) + +(define (pop-frame! environment) + (cdr environment)) diff --git a/module/c/eval2.scm b/module/c/eval2.scm new file mode 100644 index 00000000..d58f20bf --- /dev/null +++ b/module/c/eval2.scm @@ -0,0 +1,20 @@ +(define-module (c eval2) + :use-module ((hnh util) :select (->)) + :export (C-TRUE + C-FALSE + boolean->c-boolean + c-boolean->boolean + c-not)) + + +(define C-TRUE 1) +(define C-FALSE 0) + +(define (boolean->c-boolean bool) + (if bool C-TRUE C-FALSE)) + +(define (c-boolean->boolean bool) + (not (zero? bool))) + +(define (c-not b) + (-> b c-boolean->boolean not boolean->c-boolean)) diff --git a/module/c/lex.scm b/module/c/lex.scm index 34e52d88..0bde5c9e 100644 --- a/module/c/lex.scm +++ b/module/c/lex.scm @@ -43,8 +43,23 @@ (define-peg-pattern integer all (and (or base-8 base-16 base-10) (? integer-suffix))) +(define-peg-pattern float-suffix all + (* (or "f" "F" "l" "L"))) + +(define-peg-pattern exponent all + (and (ignore (or "e" "E")) (? (or "+" "-")) base-10)) + +;; Helper patterns for creating named groups in float +(define-peg-pattern float-integer all base-10) +(define-peg-pattern float-decimal all base-10) + +(define-peg-pattern float all + (or (and float-integer exponent (? float-suffix)) + (and (? float-integer) (ignore ".") float-decimal (? exponent) (? float-suffix)) + (and float-integer (ignore ".") (? exponent) (? float-suffix)))) + (define-peg-pattern number body - (or integer)) + (or float integer)) (define-peg-pattern group all (and (ignore "(") expr (ignore ")"))) @@ -65,11 +80,16 @@ (define-peg-pattern char all (and (ignore "'") (or escaped-char peg-any) (ignore "'"))) +(define-peg-pattern quot none "\"") + +(define-peg-pattern string all + (and quot (* (or escaped-char (and (not-followed-by "\"") peg-any))) quot)) (define-peg-pattern* operator all `(or ,@(map symbol->string symbol-binary-operators) ,@(map (lambda (op) `(and ,(symbol->string op) ws)) - wordy-binary-operators))) + wordy-binary-operators) + "?" ":")) ;; whitespace (define-peg-pattern ws none @@ -89,17 +109,23 @@ base-10-digit)))) (define-peg-pattern prefix-operator all - (or "!" "~" "*" "&" "++" "--" "+" "-")) + ;; It's important that ++ and -- are BEFORE + and - + ;; otherwise the first + is found, leaving the second +, which fails + ;; to lex since it's an invalid token + ;; TODO sizeof can be written as a prefix operator + ;; (without parenthesis) if the operand is an expression. + (or "*" "&" "++" "--" + "!" "~" "+" "-")) + ;;; Note that stacked pre or postfix operators without parenthesis ;;; dosen't work. So `*&C' is invalid, while `*(&C)' is valid. (define-peg-pattern prefix all - (and prefix-operator sp (or variable group funcall #; postfix - ))) + (and prefix-operator sp (or variable group funcall literal))) (define-peg-pattern postfix-operator all - (or "++" "--")) + (or "++" "--" "*")) (define-peg-pattern postfix all ;; literals can't be in-place incremented and decremented @@ -111,15 +137,25 @@ ;; first case is "same" as expr, but in different order to prevent ;; infinite self reference. Pre and postfix not here, solved by having ;; them before infix in expr - (and (or funcall postfix prefix group char number variable) + (and (or funcall postfix prefix group literal variable) sp operator sp expr)) (define-peg-pattern funcall all (and variable sp group)) +(define-peg-pattern literal body + (or char string number)) + ;;; main parser (define-peg-pattern expr body - (+ (and sp (or infix postfix prefix funcall group char number variable) + (+ (and sp (or + ;; float must be BEFORE infix, otherwise 3.2 is parsed as (infix 3 (operator ".") 2) + ;; that however breaks the infix logic, meaning that floating point numbers can't be + ;; used in basic arithmetic. + ;; TODO remove all implicit order of operations handling in the lexer, and move it to + ;; the parser. This should also fix the case of typecasts being applied incorrectly. + float + infix postfix prefix funcall group literal variable) sp))) diff --git a/module/c/lex2.scm b/module/c/lex2.scm new file mode 100644 index 00000000..6083190f --- /dev/null +++ b/module/c/lex2.scm @@ -0,0 +1,326 @@ +(define-module (c lex2) + :use-module (ice-9 peg) + :export (lex)) + +;;; A.1 Lexical grammar +;;; A.1.1 Lexical elements + +;; (6.4) +(define-peg-pattern token all + (or keyword + identifier + constant + string-literal + punctuator + )) + +;; (6.4) +(define-peg-pattern preprocessing-token all + ;; string literal moved before header-name since string literals + ;; otherwise became q-strings + (or string-literal + header-name + identifier + pp-number + character-constant + punctuator + ;; Each non-white-space character that cannot be one of the above + )) + +;;; A.1.2 Keywords + +;; (6.4.1) +(define-peg-pattern keyword all + (or "auto" "break" "case" "char" "const" "continue" "default" + "do" "double" "else" "enum" "extern" "float" "for" "goto" + "if" "inline" "int" "long" "register" "restrict" "return" + "short" "signed" "sizeof" "static" "struct" "switch" + "typedef" "union" "unsigned" "void" "volatile" "while" + "_Alignas" "_Alignof" "_Atomic" "_Bool" "_Complex" + "_Generic" "_Imaginary" "_Noreturn" "_Static_assert" + "_Thread_local")) + +;;; A.1.3 Identifiers + +;; (6.4.2.1) +(define-peg-pattern identifier all + (and identifier-nondigit (* (or identifier-nondigit digit)))) + +;; (6.4.2.1) +(define-peg-pattern identifier-nondigit body + (or nondigit + universal-character-name + ;; TODO other implementation-defined characters + )) + +;; (6.4.2.1) +(define-peg-pattern nondigit body + (or "_" + (range #\A #\Z) + (range #\a #\z))) + +;; (6.4.2.1) +(define-peg-pattern digit body + (range #\0 #\9)) + +;;; A.1.4 Universal character names + +;; (6.4.3) +(define-peg-pattern universal-character-name all + (or (and "\\u" hex-quad) + (and "\\U" hex-quad hex-quad))) + +;; (6.4.3) +(define-peg-pattern hex-quad body + (and hexadecimal-digit hexadecimal-digit + hexadecimal-digit hexadecimal-digit)) + +;;; A.1.5 Constants + +;; (6.4.4) +(define-peg-pattern constant all + ;; Int and float swapped from standard since we need to try parsing + ;; the floats beforehand + (or floating-constant + integer-constant + enumeration-constant + character-constant)) + +;; (6.4.4.1) +(define-peg-pattern integer-constant all + (and (or decimal-constant + octal-constant + hexadecimal-constant) + integer-suffix)) + +;; (6.4.4.1) +(define-peg-pattern decimal-constant all + (and nonzero-digit + (+ digit))) + +;; (6.4.4.1) +(define-peg-pattern octal-constant all + (+ octal-digit)) + +;; (6.4.4.1) +(define-peg-pattern hexadecimal-constant all + (and hexadecimal-prefix (+ hexadecimal-digit))) + +;; (6.4.4.1) +(define-peg-pattern hexadecimal-prefix none + (or "0x" "0X")) + +;; (6.4.4.1) +(define-peg-pattern nonzero-digit body + (range #\1 #\9)) + +;; (6.4.4.1) +(define-peg-pattern octal-digit body + (range #\0 #\7)) + +;; (6.4.4.1) +(define-peg-pattern hexadecimal-digit body + (or (range #\0 #\9) + (range #\a #\f) + (range #\A #\F))) + +;; (6.4.4.1) +(define-peg-pattern integer-suffix all + (or (and unsigned-suffix (? long-suffix)) + (and long-suffix (? unsigned-suffix)))) + +;; (6.4.4.1) +;; This is a merger of long-suffix and long-long-suffix +(define-peg-pattern long-suffix body + (or "l" "L" "ll" "LL")) + +;; (6.4.4.1) +(define-peg-pattern unsigned-suffix body + (or "u" "U")) + +;; (6.4.4.2) +(define-peg-pattern floating-constant all + (or decimal-floating-constant + hexadecimal-floating-constant)) + +;; (6.4.4.2) +(define-peg-pattern decimal-floating-constant all + (or (and fractional-constant (? exponent-part) (? floating-suffix)) + (and digit-sequence exponent-part (? floating-suffix)))) + +;; (6.4.4.2) +(define-peg-pattern hexadecimal-floating-constant all + (and hexadecimal-prefix + (or hexadecimal-fractional-constant + hexadecimal-digit-sequence) + binary-exponent-part + (? floating-suffix))) + +;; (6.4.4.2) +(define-peg-pattern fractional-constant all + (or (and (? digit-sequence) "." digit-sequence) + (and digit-sequence "."))) + +;; (6.4.4.2) +(define-peg-pattern exponent-part all + (and (or "e" "E") (? sign) digit-sequence)) + +;; (6.4.4.2) +(define-peg-pattern sign all + (or "+" "-")) + +;; (6.4.4.2) +(define-peg-pattern digit-sequence body + (+ digit)) + +;; (6.4.4.2) +(define-peg-pattern hexadecimal-fractional-constant all + (or (and (? hexadecimal-digit-sequence) "." hexadecimal-digit-sequence) + (and hexadecimal-digit-sequence "."))) + +;; (6.4.4.2) +(define-peg-pattern binary-exponent-part all + (and (or "p" "P") + (? sign) + digit-sequence)) + +;; (6.4.4.2) +(define-peg-pattern hexadecimal-digit-sequence body + (+ hexadecimal-digit)) + +;; (6.4.4.2) +(define-peg-pattern floating-suffix all + (or "f" "l" "F" "L")) + +;; (6.4.4.3) +(define-peg-pattern enumeration-constant all + identifier) + +(define-peg-pattern character-prefix all + (or "L" "u" "U")) + +;; (6.4.4.4) +(define-peg-pattern character-constant all + (and (? character-prefix) + (ignore "'") + (+ c-char) + (ignore "'"))) + +;; (6.4.4.4) +(define-peg-pattern c-char body + (or (and (not-followed-by (or "'" "\\" "\n")) peg-any) + escape-sequence)) + +;; (6.4.4.4) +(define-peg-pattern escape-sequence all + (or simple-escape-sequence + octal-escape-sequence + hexadecimal-escape-sequence + universal-character-name)) + +;; (6.4.4.4) +(define-peg-pattern simple-escape-sequence all + (and (ignore "\\") (or "'" "\"" "?" "\\" + "a" "b" "f" "n" "r" "t" "v"))) + +;; (6.4.4.4) +(define-peg-pattern octal-escape-sequence all + (and (ignore "\\") octal-digit (? octal-digit) (? octal-digit))) + +;; (6.4.4.4) +(define-peg-pattern hexadecimal-escape-sequence all + (and (ignore "\\x") (+ hexadecimal-digit))) + +;; A.1.6 String literals + +;; (6.4.5) +(define-peg-pattern string-literal all + (and (? encoding-prefix) + (ignore "\"") + (* s-char) + (ignore "\""))) + +;; (6.4.5) +(define-peg-pattern encoding-prefix all + (or "u8" "u" "U" "L")) + +;; (6.4.5) +(define-peg-pattern s-char body + (or (and (not-followed-by (or "\"" "\\" "\n")) peg-any) + escape-sequence)) + +;;; A.1.7 + +;; (6.4.6) +(define-peg-pattern punctuator all + (or "[" "]" "(" ")" "{" "}" + "..." ; Moved to be before "." + "." "->" + "++" "--" "&" "*" "+" "-" "~" "!" + "/" "%" "<<" ">>" "<" ">" "<=" ">=" "==" "!=" "^" "|" "&&" "||" + "?" ":" ";" + "=" "*=" "/=" "%=" "+=" "-=" "<<=" ">>=" "&=" "^=" "|=" + "," "#" "##" + "<:" ":>" "<%" "%>" "%:" "%:%:")) + +;;; A.1.8 Header names + +(define-peg-pattern h-string all (+ h-char)) +(define-peg-pattern q-string all (+ q-char)) + +;; (6.4.7) +(define-peg-pattern header-name all + (or (and (ignore "<") h-string (ignore ">")) + (and (ignore "\"") q-string (ignore "\"")))) + +;; (6.4.7) +(define-peg-pattern h-char body + (or (and (not-followed-by (or ">" "\n")) peg-any) + escape-sequence)) + +;; (6.4.7) +(define-peg-pattern q-char body + (or (and (not-followed-by (or "\"" "\n")) peg-any) + escape-sequence)) + +;;; A.1.9 Preprocessing numbers + +;; (6.4.8) +(define-peg-pattern pp-number all + (and (? ".") digit + (* (or digit + identifier-nondigit + (and (or "e" "E" "p" "P") + sign) + ".")))) + + + +(define-peg-pattern whitespace all + (or "\t" "\n" "\v" "\f" " " + ;; "\r" + )) + +(define-peg-pattern block-comment body + (and (ignore "/*") + (* (and (not-followed-by "*/") + peg-any)) + (ignore "*/"))) + +(define-peg-pattern line-comment body + (and (ignore "//") + (* (and (not-followed-by "\n") + peg-any)))) + +(define-peg-pattern comment all + (or line-comment block-comment)) + +(define-peg-pattern preprocessing-tokens all + (* (or whitespace + comment + preprocessing-token))) + + +;; returns a list of lexemes +(define (lex string) + (cdr (peg:tree (match-pattern preprocessing-tokens string)))) diff --git a/module/c/line-fold.scm b/module/c/line-fold.scm new file mode 100644 index 00000000..c61c2c70 --- /dev/null +++ b/module/c/line-fold.scm @@ -0,0 +1,29 @@ +(define-module (c line-fold) + :use-module (srfi srfi-1) + :use-module (srfi srfi-71) + :export (fold-lines)) + +(define (line-continued? line) + (and (not (string-null? line)) + (char=? #\\ (string-ref line (1- (string-length line)))))) + +(define (strip-backslash line) + (string-drop-right line 1)) + +(define (fold-lines string) + (with-output-to-string + (lambda () + (let loop ((lines (string-split string #\newline))) + (cond ((null? lines) 'NOOP) + ((null? (cdr lines)) + ;; TODO error message if last character is a backslash + (display (car lines)) + (newline)) + (else + (let ((to-merge remaining (span line-continued? lines))) + (for-each display (map strip-backslash to-merge)) + (display (car remaining)) + (newline) + (for-each (lambda _ (newline)) + (iota (length to-merge))) + (loop (cdr remaining))))))))) diff --git a/module/c/operators.scm b/module/c/operators.scm index ab1b3e7c..910dc8a9 100644 --- a/module/c/operators.scm +++ b/module/c/operators.scm @@ -9,8 +9,9 @@ `(+ - * / & ,(symbol #\|) ^ << >> % < > =)) ;; apparently part of C +;; https://en.cppreference.com/w/cpp/language/operator_alternative (define wordy-binary-operators - '(bitand and_eq and bitor or_eq or xor_eq xor)) + '(bitand and_eq and bitor or_eq or xor_eq xor not_eq)) (define symbol-binary-operators (append (map (lambda (x) (symbol-append x '=)) simple-operators) diff --git a/module/c/parse.scm b/module/c/parse.scm index 8030da77..7d11ea17 100644 --- a/module/c/parse.scm +++ b/module/c/parse.scm @@ -1,11 +1,14 @@ (define-module (c parse) :use-module (hnh util) :use-module (srfi srfi-1) + :use-module (srfi srfi-71) :use-module (ice-9 match) + :use-module ((rnrs io ports) + :select (string->bytevector make-transcoder utf-8-codec)) + :use-module (rnrs bytevectors) :export (parse-lexeme-tree)) -;;; Rename this -(define (perms set) +(define (permutations set) (concatenate (map (lambda (key) (map (lambda (o) (cons key o)) @@ -21,23 +24,124 @@ (define valid-sequences (delete 'dummy (lset-union eq? '(dummy) - (map symbol-concat (perms '(() U L))) - (map symbol-concat (perms '(() U LL)))))) + (map symbol-concat (permutations '(() U L))) + (map symbol-concat (permutations '(() U LL)))))) ;; => (LLU ULL LL LU UL L U) (aif (memv (string->symbol (string-upcase str)) valid-sequences) (case (car it) - [(LLU ULL) '(unsigned long-long)] + [(LLU ULL) '(unsigned long long)] [(LU UL) '(unsigned long)] - [(LL) '(long-long)] + [(LL) '(long long)] [(L) '(long)] [(U) '(unsigned)]) (scm-error 'c-parse-error "parse-integer-suffix" "Invalid integer suffix ~s" (list str) #f))) +(define (parse-float-suffix str) + (case (string->symbol str) + ((f F) '(float)) + ((l L) '(long double)))) + +(define (group-body->type vars) + (concatenate + (map + (match-lambda (('variable var) (list (parse-lexeme-tree `(variable ,var)))) + (('postfix ('variable var) + ('postfix-operator "*")) + (list (parse-lexeme-tree `(variable ,var)) + '*)) + (else (scm-error 'c-parse-error "parse-lexeme-tree" + "Invalid token ~s in typecast form: ~s" + (list else vars) #f))) + vars))) + +;; Takes a list of strings and integers, and merges it all into a single +;; bytevector representing a c string +(define* (string-fragments->c-string + fragments optional: (transcoder (make-transcoder (utf-8-codec)))) + + (define fragments-fixed + (map (lambda (frag) + (if (string? frag) + (string->bytevector frag transcoder) + frag)) + fragments)) + + (define bv-length + (fold (lambda (item sum) (+ sum (if (bytevector? item) + (bytevector-length item) + 1))) + 0 fragments-fixed)) + + (define bv (make-bytevector (1+ bv-length))) + ;; trailing null byte + (bytevector-u8-set! bv bv-length 0) + (fold (lambda (item idx) + (cond ((bytevector? item) + (bytevector-copy! item 0 + bv idx + (bytevector-length item)) + (+ idx (bytevector-length item))) + (else (bytevector-u8-set! bv idx item) + (+ idx 1)))) + 0 + fragments-fixed) + bv) + +(define (parse-float-form float-form) + (let ((float-string + (fold (lambda (arg str) + (string-append + str + (match arg + (('float-integer ('base-10 n)) n) + (('float-decimal ('base-10 n)) (string-append "." n)) + (('exponent "+" ('base-10 n)) (string-append "e" n)) + (('exponent ('base-10 n)) (string-append "e" n)) + (('exponent "-" ('base-10 n)) (string-append "e-" n))))) + "" float-form))) + ;; exact->inexact is a no-op if we already have an inexact number, but + ;; ensures we get an inexact number when we have an exact number (which we + ;; can get from the "1." case). Returning an inexact number here is important + ;; to avoid arithmetic suprises later. + (exact->inexact + (or (string->number float-string) + (scm-error 'c-parse-error "parse-lexeme-tree" + "Couldn't parse expression as float: ~s" + (list `(float ,@args)) #f))))) + + +(define (resolve-escaped-char form) + (match form + (('base-8-char n) (string->number n 8)) + (('base-16-char n) (string->number n 16)) + (c (char->integer + (case (string-ref c 0) + ((#\a) #\alarm) + ((#\b) #\backspace) + ((#\e) #\esc) ;; non-standard + ((#\f) #\page) + ((#\n) #\newline) + ((#\r) #\return) + ((#\t) #\tab) + ((#\v) #\vtab) + ((#\\) #\\) + ;; These are valid in both strings and chars + ((#\') #\') + ((#\") #\")))))) + +;; Takes a list of strings and escaped-char form +;; and returns a list of strings and integers +(define (resolve-string-fragment fragment) + (match fragment + (('escaped-char c) + (resolve-escaped-char c)) + (fargment fragment))) + (define (parse-lexeme-tree tree) (match tree ['() '()] @@ -50,55 +154,67 @@ [('integer n ('integer-suffix suffix)) `(as-type ,(parse-integer-suffix suffix) - ,(parse-lexeme-tree n)) - ] + ,(parse-lexeme-tree n))] + [('integer n) (parse-lexeme-tree n)] + + [('float args ... ('float-suffix suffix)) + `(as-type ,(parse-float-suffix suffix) + ;; parse rest of float as if it lacked a suffix + ,(parse-lexeme-tree `(float ,@args)))] + + [('float args ...) (parse-float-form args)] + ;; Character literals, stored as raw integers ;; so mathematical operations keep working on them. - [('char ('escaped-char ('base-8-char n))) - (-> n (string->number 8) #; integer->char)] - [('char ('escaped-char ('base-16-char n))) - (-> n (string->number 16) #; integer->char)] - [('char ('escaped-char c)) - (char->integer - (case (string-ref c 0) - ((#\a) #\alarm) - ((#\b) #\backspace) - ((#\e) #\esc) - ((#\f) #\page) - ((#\n) #\newline) - ((#\r) #\return) - ((#\t) #\tab) - ((#\v) #\vtab) - ((#\\) #\\) - ((#\') #\')))] + [('char ('escaped-char c)) (resolve-escaped-char c)] + [('char c) (char->integer (string-ref c 0))] [('variable var) (string->symbol var)] + + ;; normalize some binary operators to their wordy equivalent + ;; (which also happens to match better with scheme) + [('operator "&&") 'and] + [('operator "&=") 'and_eq] + [('operator "&") 'bitand] + [('operator "|") 'bitor] + [('operator "!=") 'not_eq] + [('operator "||") 'or] + [('operator "|=") 'or_eq] + [('operator "^") 'xor] + [('operator "^=") 'xor_eq] + ;; Change these names to something scheme can handle better + [('operator ".") 'object-slot] + [('operator "->") 'dereference-slot] [('operator op) (string->symbol op)] + [('prefix-operator op) (case (string->symbol op) + ((!) 'not) + ((~) 'compl) ((*) 'dereference) ((&) 'pointer) ((++) 'pre-increment) ((--) 'pre-decrement) - (else => identity))] + ((-) '-) + (else (scm-error 'c-parse-error "parse-lexeme-tree" + "Unknown prefix operator ~s" + (list op) #f)))] [('postfix-operator op) (case (string->symbol op) [(++) 'post-increment] [(--) 'post-decrement] - [else => identity])] + [else (scm-error 'c-parse-error "parse-lexeme-tree" + "Unknown postfix operator ~s" + (list op) #f)])] ;; Parenthesis grouping - [('group args) + [('group args ...) (parse-lexeme-tree args)] - ;; Atomic item. Used by flatten-infix - [('atom body) - (parse-lexeme-tree body)] - [('prefix op arg) `(,(parse-lexeme-tree op) ,(parse-lexeme-tree arg))] @@ -107,81 +223,204 @@ `(,(parse-lexeme-tree op) ,(parse-lexeme-tree arg))] + + + + + ;; resolved-operator and ternary are the return "types" + ;; of resolve-order-of-operations + [(('resolved-operator op) args ...) + `(,op ,@(map parse-lexeme-tree args))] + + [('ternary a b c) + `(ternary ,(parse-lexeme-tree a) + ,(parse-lexeme-tree b) + ,(parse-lexeme-tree c))] + + + + + ;; Is it OK for literal strings to be "stored" inline? + ;; Or must they be a pointer? + ['string #vu8(0)] + [('string str ...) + (-> (map resolve-string-fragment str) + string-fragments->c-string)] + + ;; implicit concatenation of string literals + [(('string str ...) ...) + (-> (map resolve-string-fragment (concatenate str)) + string-fragments->c-string)] + [('infix args ...) - (resolve-order-of-operations - (flatten-infix (cons 'infix args)))] + (let ((r (resolve-order-of-operations + (flatten-infix (cons 'infix args))))) + (parse-lexeme-tree r))] + [('funcall function ('group arguments)) `(funcall ,(parse-lexeme-tree function) ,(parse-lexeme-tree arguments))] - [bare (scm-error 'c-parse-error - "parse-lexeme-tree" - "Naked literal in lex-tree: ~s" - (list bare) - #f)])) + [(('variable "struct") ('variable value) ..1) + `(struct-type ,@(map string->symbol value)) + ] + + ;; A list of variables. Most likely a type signature + ;; [(('variable value) ..1) + ;; ] + + ;; A typecast with only variables must (?) be a typecast? + [(('group groups ..1) ... value) + (fold-right (lambda (type done) `(as-type ,type ,done)) + (parse-lexeme-tree value) + (map group-body->type groups))] + + ;; Type name resolution? + ;; https://en.wikipedia.org/wiki/C_data_types + ;; base types with spaces: + ;; ======================= + ;; [[un]signed] char + ;; [[un]signed] short [int] + ;; [[un]signed] int + ;; [un]signed [int] + ;; [[un]signed] long [int] + ;; [[un]signed] long long [int] + ;; float + ;; [long] double + + ;; https://en.wikipedia.org/wiki/Type_qualifier + ;; qualifiers + ;; const + ;; volatile + ;; restrict + ;; _Atomic + + + ;; Storage specifiers + ;; auto + ;; register + ;; static + ;; extern + + ;; struct <typename> + ;; enum <typename> + ;; union <typename> + + ;; https://en.wikipedia.org/wiki/C_syntax + ;; int (*ptr_to_array)[100] + + + [(? symbol? bare) + (scm-error 'c-parse-error + "parse-lexeme-tree" + "Naked literal in lex-tree: ~s" + (list bare) + #f)] + + [form + (scm-error 'c-parse-error + "parse-lexeme-tree" + "Unknown form in lex-tree: ~s" + (list form) #f) + ])) ;; https://en.wikipedia.org/wiki/Operators_in_C_and_C%2B%2B +;; https://en.cppreference.com/w/c/language/operator_precedence (define order-of-operations (reverse - (concatenate - ;; This is only for binary operations - `((-> ,(symbol #\.)) - (* / %) - (+ -) - (<< >>) - (< <= > >=) - (== !=) - (&) - (^) - (,(symbol #\|)) - (&&) - (,(symbol #\| #\|)) - (= += -= *= /= %= <<= >>= &= ^= ,(symbol #\| #\=)) - (,(symbol #\,)) - )))) - -(define (mark-other form) - (if (list? form) (cons '*other* form) form)) + ;; This is only for binary operations + `((-> ,(symbol #\.)) + ;; All unary procedures go here, incnluding typecasts, and sizeof + (* / %) + (+ -) + (<< >>) + (< <= > >=) + (== != not_eq) + (& bitand) + (^ xorg) + (,(symbol #\|) bitor) + (&& and) + (,(symbol #\| #\|) or) + (? :) + (= += -= *= /= %= <<= >>= &= ^= ,(symbol #\| #\=) + and_eq or_eq xor_eq) + (,(symbol #\,)) + ))) + +;; a.b->c.d (. (-> (. a b) c) d) +;; 2 * 3 / 4 * 5 => (* (/ (* 2 3) 4) 5) +;; eller => (* 2 (/ 3 4) 5) (define* (resolve-order-of-operations tree optional: (order order-of-operations)) (if (null? order) - (car tree) + (scm-error 'c-parse-error + "resolve-order-of-operations" + "Out of operations to resolve when resolving expression ~s" + (list tree) #f) (match tree - [('*other* body ...) body] - [(form) (resolve-order-of-operations form order)] - [(forms ...) - (match (split-by forms (car order)) - [(group) (resolve-order-of-operations group (cdr order))] - [groups - (cons (car order) - (map (lambda (form) (resolve-order-of-operations - form order-of-operations)) - groups))])] - [a a]))) + [('fixed-infix form) form] + [('fixed-infix forms ...) + (match (split-by-one-of forms (car order)) + [(group) + (resolve-order-of-operations (cons 'fixed-infix group) + (cdr order))] + [(a ('? b ...) (': c ...)) + `(ternary ,(resolve-order-of-operations (cons 'fixed-infix a) (cdr order)) + ,(resolve-order-of-operations (cons 'fixed-infix b) (cdr order)) + ,(resolve-order-of-operations (cons 'fixed-infix c) (cdr order)))] + [(first rest ...) + ;; TODO this is only valid for the associative operators (+, ...) + ;; but not some other (<, ...) + (if (apply eq? (map car rest)) + (let ((op (caar rest))) + `((resolved-operator ,op) + ,@(map (lambda (x) (resolve-order-of-operations (cons 'fixed-infix x) + (cdr order))) + (cons first (map cdr rest))))) + (fold (lambda (item done) + (let ((operator args (car+cdr item))) + `((resolved-operator ,operator) + ,done ,(resolve-order-of-operations + (cons 'fixed-infix args) + (cdr order))))) + (resolve-order-of-operations (cons 'fixed-infix first) + (cdr order)) + rest))])]))) + +;; 1 * 2 / 3 * 4 +;; ⇒ ((1) (* 2) (/ 3) (* 4)) +;; (1) +;; (* (1) 2) +;; (/ (* (1) 2) 3) +;; (* (/ (* (1) 2) 3) 4) ;; Flatens a tree of infix triples. Stops when it should. ;; (parenthesis, function calls, ...) (define (flatten-infix form) - (match form - [('infix left op ('infix right ...)) - (cons* (parse-lexeme-tree left) - (parse-lexeme-tree op) - (flatten-infix (cons 'infix right)))] - - [('infix left op right) - (list (mark-other (parse-lexeme-tree left)) - (parse-lexeme-tree op) - (mark-other (parse-lexeme-tree right)))] - - [other (scm-error 'c-parse-error - "flatten-infix" - "Not an infix tree ~a" - (list other) - #f)])) + (cons 'fixed-infix + (let loop ((form form)) + (match form + [('infix left op ('infix right ...)) + (cons* left + (parse-lexeme-tree op) + (loop (cons 'infix right)))] + + [('infix left op right) + (list left + (parse-lexeme-tree op) + right)] + + [('infix form) form] + + [other (scm-error 'c-parse-error + "flatten-infix" + "Not an infix tree ~a" + (list other) + #f)])))) diff --git a/module/c/preprocessor.scm b/module/c/preprocessor.scm new file mode 100644 index 00000000..71712b17 --- /dev/null +++ b/module/c/preprocessor.scm @@ -0,0 +1,370 @@ +(define-module (c preprocessor) + :use-module (srfi srfi-1) + :use-module (srfi srfi-9 gnu) + :use-module (ice-9 rdelim) + :use-module (ice-9 regex) + :use-module (hnh util object) + + :use-module (hnh util) + :use-module (hnh util object) + ) + +(define (read-lines port) + (let loop ((done '())) + (let ((line (read-line port))) + (if (eof-object? line) + (reverse done) + (loop (cons line done)))))) + +;; The source line of a give readen line +(define line-number (make-object-property)) +;; The source file of a given readen line +(define line-file (make-object-property)) + + +(define (mark-with-property! items property property-value) + (for-each (lambda (item) (set! (property item) property-value)) + items)) + +(define trigraph-rx (make-regexp "??([=()/'<>!-])")) +(define (expand-trigraphs line) + (regexp-substitute/global + #f trigraph-rx + line + 'pre (lambda (m) (case (string-ref (match:substring m 1) 1) + ((#\=) "#") + ((#\() "[") + ((#\)) "]") + ((#\/) "\\") + ((#\') "^") + ((#\<) "{") + ((#\>) "}") + ((#\!) "|") + ((#\-) "~"))) + 'post)) + +(define (number-lines lines) + (for-each (lambda (line number) + (set! (line-number line) number)) + lines + (iota (length lines) 1)) + lines) + +;; Should this line be merged with the next +(define (line-continued? line) + (case (string-length line) + ((0) #f) + ((1) (string=? "\\" line)) + (else + (let ((len (string-length line))) + ;; TODO can extra backslashes change this? + (and (char=? #\\ (string-ref line (- len 1))) + (not (char=? #\\ (string-ref line (- len 2))))))))) + +(define (transfer-line-number to from) + (set! (line-number to) (line-number from)) + to) + +;; Merge two lines, assuming that upper ends with a backslash +(define (merge-lines upper lower) + (let ((new-string (string-append (string-drop-right upper 1) lower))) + (transfer-line-number new-string upper) + new-string)) + +(define (fold-lines lines) + (fold-right (lambda (line done) + (if (line-continued? line) + (cons (merge-lines line (car done)) (cdr done)) + (cons line done))) + '() + lines)) + + +(define comment-rx (make-regexp "(//|/[*]|[*]/)")) + +(define (strip-comments lines) + (let loop ((in-comment #f) (lines lines) (done '())) + (if (null? lines) + (reverse done) + (let ((line (car lines))) + (cond ((regexp-exec comment-rx line) + => (lambda (m) + (format (current-output-port) "~s ~s substr = ~s~%" in-comment (line-number line) (match:substring m)) + (cond ((and in-comment (string=? "*/" (match:substring m))) + (loop #f (cons (transfer-line-number (match:suffix m) line) + (cdr lines)) + done)) + (in-comment (loop #t (cdr lines) done)) + ((string=? "//" (match:substring m)) + (loop #f (cdr lines) (cons (transfer-line-number (match:prefix m) line) + done))) + ((string=? "/*" (match:substring m)) + (loop #t (cons (transfer-line-number (match:suffix m) line) (cdr lines)) done)) + (else + (scm-error 'cpp-error "strip-comments" + "Unexpected */ in file ~a on line ~a" + (list (line-file line) (line-number line)) + #f))))) + (else (loop in-comment (cdr lines) (cons line done)))))))) + + +(define-immutable-record-type <preprocessor-directive> + (make-preprocessor-directive type body) + proprocessor-directive? + (type directive-type) + (body directive-body)) + +(define cpp-directive-rx (make-regexp "\\s*#\\s*((\\w+)(.*))?")) +(define (preprocessor-directive? line) + (cond ((regexp-exec cpp-directive-rx line) + => (lambda (m) + (if (match:substring m 2) + (make-preprocessor-directive + (string->symbol (match:substring m 2)) + (string-trim-both (match:substring m 3) char-set:whitespace)) + 'sort-of))) + (else #f))) + +;; defined + +;; TODO _Pragma + + +(define (expand-function-line-macro environment macro . params) + (expand-macro environment (apply-macro macro (map (lambda (param) (expand-macro environment param)) params)))) + +;; (define (environment-ref )) + +(define (list-of? lst predicate) + (every predicate lst)) + + +;; Parantheses when defining macro +(define (parse-parameter-string string) + (map string-trim-both + (string-split (string-trim-both string (char-set #\( #\))) + #\,))) + + +(define-type (object-macro) + (body type: string?)) + +(define-type (function-macro) + (formals type: (list-of? string?)) + (body type: string?)) + + +;; The interesting part +;; environment, (list string) -> (values (list string) (list strings)) +;; multiple lines since since a function-like macro can extend over multiple lines +;; (define (expand-macros environment strings) +;; ) + + +(define (crash-if-not-if body guilty) + (scm-error 'cpp-error guilty + "else, elif, and endif invalid outside if scope: ~s~%file: ~s line: ~s" + (list body (line-file body) (line-number body)))) + +;; (environment, lines) -> environment x lines +(define (parse-directives environment lines) + (let loop ((environment environment) (lines lines) (done '())) + (let* ((line (car line)) + (directive? (preprocessor-directive? line))) + (case directive? + ((#f) ; regular line + (loop environment (cdr lines) + ;; TODO this doesn't work, since parse-macros works on multiple lines + (cons (parse-macros environment (car lines)) done))) + ((sort-of) ; a no-op directive + (loop environment (cdr lines) done)) + (else ; an actual directive + (case (car (cpp-if-status environment)) + ((outside) + (case (directive-type m) + ((ifndef endif else) + (scm-error 'cpp-error "parse-directives" + "Unexpected directive: ~s" + (list line) #f)) + (else ; inside if, ifelse or else + ;; outside active-if inactive-if + (case (directive-type m) + ;; stack ending directives + ((endif) + (case (car (cpp-if-status environment)) + ((outside) (crash-if-not-if (directive-body m) "endif")) + (else (loop (modify environment cpp-if-status cdr) + (cdr lines) + done)))) + + ;; stack nudging directives + ((else) + (case (car (cpp-if-status environment)) + ((outside) (crash-if-not-if (directive-body m) "else")) + (else (loop (modify environment (lens-compose cpp-if-status car*) + (lambda (old) + (case old + ((active-if) 'inactive-if) + ((inactive-if) 'active-if)))) + (cdr lines) + done)))) + ((elif) + (case (car (cpp-if-status environment)) + ((outside) (crash-if-not-if (directive-body m) "elif")) + (else 'TODO ;; TODO + ) + )) + + ;; stack creating directives + ;; on inactive-if each creates a new frame, which also is inactive + ((ifndef) + (case (car (cpp-if-status environment)) + ((inactive-if) (loop (modify environment cpp-if-status + xcons 'inactive-if) + (cdr lines) + done)) + (else (loop (modify environment cpp-if-status + xcons (if (in-environment? environment (directive-body directive?)) + 'inactive-if 'active-if)) + (cdr lines) + done)))) + + ((ifdef) + (case (car (cpp-if-status environment)) + ((inactive-if) (loop (modify environment cpp-if-status + xcons 'inactive-if) + (cdr lines) + done)) + (else + (loop (modify environment cpp-if-status + xcons (if (in-environment? environment (directive-body directive?)) + 'active-if 'inactive-if)) + (cdr lines) + done)))) + + ((if) + (case (car (cpp-if-status environment)) + ((inactive-if) (loop (modify environment cpp-if-status + xcons 'inactive-if) + (cdr lines) + done)) + (else 'TODO ;; TODO + ))) + + + ;; other directives + ((include) (cond ((string-match "[<\"](.*)" + => (lambda (m) + (let ((fileneme (string-drop-right (directive-body m) 1))) + (case (string-ref (match:substring m 1) 0) + ;; TODO include-path + ((#\<) (handle-file environment filename)) + ((#\") (handle-file environment filename)))))) + (else (scm-error 'cpp-error "parse-directives" + "Invalid include" + '() #f))))) + ((define) + ;; TODO what are valid names? + (cond ((string-match "^(\\w+)([(][^)]*[)])?\\s+(.*)" (directive-body directive?)) + => (lambda (m) + (loop (let ((macro-body (string-trim-both (match:substring m 3)))) + (add-identifier! + environment + (match:substring m 1) + (cond ((match:substring m 2) + => (lambda (parameter-string) + (function-macro + formals: (parse-parameter-string parameter-string) + body: macro-body))) + (else (object-macro body: macro-body))))) + (cdr lines) + done))) + (else (scm-error 'cpp-error "parse-directives" + "Invalid #define line, ~s" + (list (directive-body directive?)) + #f)))) + + ((undef) + (case (car (cpp-if-status environment)) + ((inactive-if) (loop environment (cdr lines) done)) + (else (loop (remove-identifier environment (directive-body directive?)) + (cdr lines) + done)))) + + ((line) + (case (car (cpp-if-status environment)) + ((inactive-if) (loop environment (cdr lines) done)) + ;; TODO add first-run parameter to loop, in case expand-macros still return something invalid + (else (let parse-line-directive ((tokens (string-tokenize (directive-body directive?)))) + (cond ((= 1 (length tokens)) + ;; TODO parse token + (if (integer? (car tokens)) + ;; TODO update current line + (loop environment (cdr lines) done) + (parse-line-directive (expand-macros environment (directive-body directive?))))) + ((= 2 (length tokens)) + ;; TODO parse tokens + (if (and (integer? (car tokens)) + (string-literal? (cadr tokens))) + ;; TODO update current line and file + (loop environment (cdr lines) done) + (parse-line-directive (expand-macros environment (directive-body directive?))))) + (else (parse-line-directive (expand-macros environment (directive-body directive?))))))))) + + ((error) + (throw 'cpp-error-directive + (directive-body directive?))) + + ((warning) + (format (current-error-port) "#warning ~a~%" + (directive-body directive?)) + (loop environment (cdr lines) done)) + + ((pragma) + (format (current-error-port) + "PRAGMA: ~s~%" (directive-body directive?)) + (loop environment (cdr lines) done)) + + ((ident sccs) + (format (current-error-port) + "IDENT: ~s~%" (directive-body directive?)) + (loop environment (cdr lines) done)) + + (else + (scm-error 'cpp-error "parse-directives" + "Unknown pre-processor directive: ~s" + (list line) #f) + ))))))))) + )) + + +(define* (writeln expr optional: (port (current-output-port))) + (write expr port) + (newline port)) + +(define (handle-lines environment lines) + (parse-directive environment + (compose strip-comments fold-lines number-lines))) + + ;; parse-directives environment + +;; Return a number of lines +(define (read-file file-path) + (define raw-lines (call-with-input-file file-path read-lines)) + (mark-with-property! line line-file file-path) + (handle-lines raw-lines)) + + +;; pre defined macros +;; see info manual for cpp 3.7.1 Standard Predefined Macros +;; __FILE__ +;; __LINE__ +;; __DATE__ "Feb 12 1996" +;; __TIME__ "23:59:01" +;; __STDC__ 1 +;; __STDC_VERSION__ 201112L +;; __STDC_HOSTED__ 1 + +;; __cplusplus +;; __OBJC__ +;; __ASSEMBLER__ diff --git a/module/c/preprocessor2.scm b/module/c/preprocessor2.scm new file mode 100644 index 00000000..e99b1049 --- /dev/null +++ b/module/c/preprocessor2.scm @@ -0,0 +1,590 @@ +(define-module (c preprocessor2) + :use-module (srfi srfi-1) + :use-module (srfi srfi-71) + :use-module (srfi srfi-88) + :use-module (ice-9 match) + :use-module (c cpp-environment) + :use-module (c eval2) + :use-module ((c cpp-environment function-like-macro) :select (function-like-macro)) + :use-module ((c cpp-environment object-like-macro) :select (object-like-macro)) + :use-module ((c cpp-environment internal-macro) :select (internal-macro)) + :use-module ((hnh util) :select (->)) + :use-module ((hnh util lens) :select (set)) + :use-module (hnh util path) + :use-module ((c lex2) :select (lex)) + :use-module ((c trigraph) :select (replace-trigraphs)) + :use-module ((c line-fold) :select (fold-lines)) + :export ()) + +;; Returns two values: +;; - tokens until a newline token is met +;; - (potentially the newline token) and the remaining tokens +(define (tokens-until-eol tokens) + (break (lambda (token) (equal? token '(whitespace "\n"))) + tokens)) + +;; match in predicates so non-lists fail properly +(define (whitespace-token? token) + (match token + (`(whitespace ,_) #t) + (_ #f))) + +(define (unwrap-preprocessing-token token) + (match token + (`(preprocessing-token ,x) x) + (_ (scm-error 'wrong-type-arg "unwrap-preprocessing-token" + "Not a preprocessing token: ~s" (list token) + #f)))) + +(define (preprocessing-token? token) + (catch 'wrong-type-arg + (lambda () (unwrap-preprocessing-token token)) + (const #f))) + + +;; Replace all whitespace with single spaces. +(define (squeeze-whitespace tokens) + (match tokens + ('() '()) + ((`(whitespace ,_) `(whitespace ,_) rest ...) + (squeeze-whitespace (cons '(whitespace " ") rest))) + ((`(whitespace ,_) rest ...) + (cons '(whitespace " ") (squeeze-whitespace rest))) + ((token rest ...) + (cons token (squeeze-whitespace rest))))) + +;; Returns the "source" of the token, as a preprocessing string literal token +(define (stringify-token unwrapped-preprocessing-token) + (match unwrapped-preprocessing-token + (`(string-literal ,s) + (format #f "~s" s)) + (`(header-name (q-string ,s)) + (format #f "~s" s)) + (`(header-name (h-string ,s)) + (format #f "<~a>" s)) + (`(identifier ,id) id) + (`(pp-number ,n) n) + (`(character-constant ,c) + (format #f "'~a'" c)) + (`(punctuator ,p) p))) + +(define (stringify-tokens tokens) + `(preprocessing-token + (string-literal + ,(string-concatenate + (map (match-lambda (`(preprocessing-token ,body) (stringify-token body)) + (`(whitespace ,_) " ")) + (squeeze-whitespace tokens)))))) + +;; Expand ## tokens +;; TODO +(define (expand-join macro tokens) + tokens) + +;; parameters is a lexeme list, as returned by parse-parameter-list +(define (build-parameter-map macro parameters) + (if (macro-variadic? macro) + (let ((head rest (split-at parameters (length (macro-identifier-list macro))))) + ;; TODO commas (,) should be interleaved with rest + (cons (cons "__VA_ARGS__" rest) + (map cons (macro-identifier-list macro) head))) + (map cons + (macro-identifier-list macro) + parameters))) + +;; Drop leading whitespace tokens +(define (drop-whitespace tokens) + (drop-while whitespace-token? tokens)) + +(define (drop-whitespace-right tokens) + (-> tokens reverse drop-whitespace reverse)) + +(define (drop-whitespace-both tokens) + (-> tokens + drop-whitespace + drop-whitespace-right)) + +(define (expand-stringifiers macro parameter-map) + (let loop ((tokens (macro-body macro))) + (match tokens + (('(preprocessing-token (punctuator "#")) + rest ...) + (match (drop-whitespace rest) + ((`(preprocessing-token (identifier ,x)) rest ...) + (unless (member x (macro-identifier-list macro)) + (scm-error 'macro-expand-error "expand-stringifiers" + "'#' is not followed by a macro parameter: ~s" + (list x) #f)) + (cons (stringify-tokens (assoc-ref parameter-map x)) + (loop rest))))) + ('() '()) + ((token rest ...) + (cons token (loop rest)))))) + +;; expand function like macro +(define (apply-macro environment macro parameters) + (define parameter-map (build-parameter-map macro parameters)) + (define stringify-resolved (expand-stringifiers macro parameter-map)) + ;; TODO resolve ## + (define resulting-body stringify-resolved #; (expand-join macro stringify-resolved) + ) + (resolve-token-stream (extend-environment environment parameter-map) + resulting-body)) + + + +;; Expand object-like macro + +;; #define VALUE 10 +;; #define str(x) #x +;; #define OTHER str(VALUE) +;; OTHER +;; ⇒ "VALUE" + +;; token should be the token stream just after the name of the macro +(define (expand-macro environment macro tokens) + (cond ((object-macro? macro) + ;; Shouldn't we expand the macro body here? + (values environment (append (macro-body macro) tokens))) + + ((function-macro? macro) + (let ((containing remaining newlines (parse-parameter-list tokens))) + (values (bump-line environment newlines) + ;; Macro output can be macro expanded + ;; TODO self-referential macros? + (append (apply-macro environment macro containing) remaining)))) + + ((internal-macro? macro) + (let ((containing remaining newlines (parse-parameter-list tokens))) + (values (bump-line environment newlines) + (append ((macro-body macro) environment containing) + remaining)))) + + (else + (scm-error 'wrong-type-arg "expand-macro" + "Macro isn't a macro: ~s" + (list macro) #f)))) + +;; Takes a list of preprocessing tokens, and returns two values +;; if the last token was '...' +;; and a list of strings of all token names +;; Note that this is ONLY #define f(x) forms +;; not usage forms +(define (parse-identifier-list tokens) + (let loop ((tokens (remove whitespace-token? tokens)) (done '())) + (match tokens + ('() (values #f (reverse done))) + + ((`(preprocessing-token (identifier ,id)) rest ...) + (loop rest (cons id done))) + + ((`(preprocessing-token (punctuator "..."))) + (values #t (reverse done))) + + ((`(preprocessing-token (punctuator "...")) rest ...) + (scm-error 'cpp-error "parse-identifier-list" + "'...' only allowed as last argument in identifier list. Rest: ~s" + (list rest) #f)) + + ((`(preprocessing-token (punctuator ",")) rest ...) + (loop rest done)) + + ((`(preprocessing-token ,other) rest ...) + (scm-error 'cpp-error "parse-identifier-list" + "Unexpected preprocessing-token in identifier list: ~s" + (list other) #f))))) + + + +;; helper procedure to parse-parameter-list. +;; If a parameter is complex then whitespace is kept, but squeezed to single spaces. Surounding whitespace is removed. +;; Example: +;; #define str(x, y) #y +;; str(x, ( 2, 4 ) ) +;; expands to: +;; "( 2, 4 )" +;; 6.10.3.2 p 2 +(define (cleanup-whitespace tokens) + (-> tokens drop-whitespace-both squeeze-whitespace)) + +;; returns three values: +;; - a list of tokens where each is a parameter to the function like macro +;; - the remaining tokenstream +;; - how many newlines were encountered +;; The standard might call these "replacement lists" +(define (parse-parameter-list tokens) + (let %loop ((depth 0) (newlines 0) (current '()) + (parameters '()) (tokens tokens) (%first-iteration? #t)) + (define* (loop tokens key: + (depth depth) (newlines newlines) + (current current) (parameters parameters)) + (%loop depth newlines current parameters tokens #f)) + (let ((current* (if (zero? depth) + current + (cons (car tokens) current)))) + (match tokens + (('(whitespace "\n") rest ...) + (loop rest newlines: (1+ newlines) current: current*)) + ((`(whitespace ,_) rest ...) + (loop rest current: current*)) + (('(preprocessing-token (punctuator "(")) rest ...) + (loop rest depth: (1+ depth) current: current*)) + (('(preprocessing-token (punctuator ")")) rest ...) + (if (= 1 depth) + ;; return value + (values + (if (null? parameters) + (cond ((null? current) '()) + ((every whitespace-token? current) '()) + (else (reverse + (cons (cleanup-whitespace (reverse current)) + parameters)))) + (reverse + (cond ((null? current) parameters) + ((every whitespace-token? current) parameters) + (else (cons (cleanup-whitespace (reverse current)) + parameters))))) + + rest + newlines) + (loop rest + depth: (1- depth) + current: current*))) + (('(preprocessing-token (punctuator ",")) rest ...) + (if (= 1 depth) + (loop rest + current: '() + parameters: + (cons (cond ((null? current) '()) + ((every whitespace-token? current) '()) + (else (cleanup-whitespace (reverse current)))) + parameters)) + (loop rest current: current*))) + ((_ rest ...) + (loop rest current: current*)))))) + + +(define (join-file-line environment) + (define file (current-file environment)) + (define line (current-line environment)) + (extend-environment + environment + ;; 6.10.8 + (list + (object-like-macro + identifier: "__FILE__" + body: `((preprocessing-token (string-literal ,file)))) + (object-like-macro + identifier: "__LINE__" + body: `((preprocessing-token (pp-number ,(number->string line)))))))) + +(define (c-search-path) (make-parameter (list "." "/usr/include"))) + +;; #include <stdio.h> +(define (resolve-h-file string) + (cond ((path-absolute? string) string) + (else + (let ((filename + (find file-exists? + (map (lambda (path-prefix) + (path-append path-prefix string)) + (c-search-path))))) + (if filename filename + (scm-error 'cpp-error "resolve-h-file" + "Can't resolve file: ~s" + (list string) #f)))))) + +;; #include "myheader.h" +(define (resolve-q-file string) + ;; This should always be a fallback (6.10.2, p. 3) + (cond (else (resolve-h-file string)))) + +(define defined-macro + (internal-macro + identifier: "defined" + body: (lambda (environment tokens) + (match tokens + (`((preprocessing-token (identifier ,id))) + `(preprocessing-token (pp-number ,(boolean->c-boolean (in-environment? environment id))))) + (_ (scm-error 'cpp-error "defined" + "Invalid parameter list to `defined': ~s" + (list tokens) #f)))))) + +;; environment, tokens → environment +(define (handle-pragma environment tokens) + (match tokens + (`((preprocessing-token (identifier "STDC")) (whitespace ,_) ... + (preprocessing-token (identifier ,identifier)) (whitespace ,_) ... + (preprocessing-token (identifier ,on-off-switch)) (whitespace ,_) ...) + ;; TODO actually do something with the pragmas (probably just store them in the environment) + (format (current-error-port) + "#Pragma STDC ~a ~a" identifier on-off-switch) + environment) + (_ (format (current-error-port) + "Non-standard #Pragma: ~s~%" tokens) + environment))) + + +;; TODO +;; (define _Pragma-macro +;; (internal-macro +;; identifier: "_Pragma" +;; body: (lambda (environment tokens) +;; ))) + +;; TODO +(define (resolve-constant-expression tokens) + 'TODO + ) + +;; Expands a token-stream which contains no pre-processing directives (#if:s, ...) +(define (resolve-token-stream environment tokens) + (let loop ((tokens tokens)) + (match tokens + ('() '()) + ((`(preprocessing-token (identifier ,id)) rest ...) + (call-with-values (lambda () (maybe-extend-identifier environment id rest)) + (lambda (_ tokens) (loop tokens)))) + ((`(whitespace ,_) rest ...) + (loop rest)) + ((token rest ...) + (cons token (loop rest)))))) + +;; returns a new environment +;; handle body of #if +;; environment, (list token) → environment +(define (resolve-for-if environment tokens) + (-> (extend-environment environment defined-macro) + (resolve-token-stream tokens) + resolve-constant-expression + c-boolean->boolean + (if (enter-active-if environment) + (enter-inactive-if environment)))) + +;; environment, string, (list token) → environment, (list token) +(define (maybe-extend-identifier environment identifier remaining-tokens) + (cond ((get-identifier environment identifier) + => (lambda (value) (expand-macro (join-file-line environment) + value + remaining-tokens))) + (else ; It wasn't an identifier, leave it as is + ;; TODO shouldn't we include the identifier in the remaining tokens stream? + (values environment remaining-tokens)))) + +(define (resolve-and-include-header environment tokens) + (let loop ((%first-time #t) (tokens tokens)) + (match (drop-whitespace tokens) + ((`(header-name (h-string ,str)) rest ...) + (cond ((remove whitespace-token? rest) + (negate null?) + => (lambda (tokens) + (scm-error 'cpp-error "resolve-and-include-header" + "Unexpected tokens after #include <>: ~s" + (list tokens) #f)))) + (handle-preprocessing-tokens + environment + (-> str resolve-h-file read-file tokenize))) + + ((`(header-name (q-string ,str)) rest ...) + (cond ((remove whitespace-token? rest) + (negate null?) + => (lambda (tokens) + (scm-error 'cpp-error "resolve-and-include-header" + "Unexpected tokens after #include <>: ~s" + (list tokens) + #f)))) + (handle-preprocessing-tokens + environment + (-> str resolve-q-file read-file tokenize))) + + (tokens + (unless %first-time + (scm-error 'cpp-error "resolve-and-include-header" + "Failed parsing tokens: ~s" + (list tokens) #f)) + (loop #f (resolve-token-stream environment tokens)))))) + +;; environment, tokens → environment +(define (handle-line-directive environment tokens*) + (let loop ((%first-time #t) (tokens tokens*)) + (match tokens + (`((preprocessing-token (pp-number ,line)) (whitespace ,_) ... rest ...) + (match rest + (`((preprocessing-token (string-literal ,file)) (whitespace ,_) ...) + (-> environment + (set current-line line) + (set current-file file))) + (`((whitespace ,_) ...) + (set environment current-line line)) + (_ (unless %first-time + (scm-error 'cpp-error "handle-line-directive" + "Invalid line directive: ~s" + (list tokens*) #f)) + (loop #f (resolve-token-stream environment tokens))))) + (_ (unless %first-time + (scm-error 'cpp-error "handle-line-directive" + "Invalid line directive: ~s" + (list tokens*) #f)) + (loop #f (resolve-token-stream environment tokens)))))) + +;; environment, tokens → environment +(define (resolve-define environment tokens) + (match tokens + ((`(preprocessing-token (identifier ,identifier)) tail ...) + (-> environment + bump-line + (add-identifier! + identifier + (match tail + (('(preprocessing-token (punctuator "(")) rest ...) + ;; function like macro + (call-with-values (lambda () (break (lambda (token) (equal? token '(preprocessing-token (punctuator ")")))) + rest)) + (lambda (identifier-list replacement-list) + (let ((variadic? identifiers (parse-identifier-list identifier-list))) + + (function-like-macro + identifier: identifier + variadic?: variadic? + identifier-list: identifiers + ;; NOTE 6.10.3 states that there needs to be at least on whitespace here + body: (cdr replacement-list)))))) + (_ (object-like-macro + identifier: identifier + body: tail)))))))) + + + +;; environment, tokens -> environment, tokens +(define (handle-preprocessing-tokens environment tokens) + (let loop ((environment environment) (tokens tokens)) + (define (err fmt . args) + (scm-error 'cpp-error "handle-preprocessing-tokens" + (string-append "~a:~a " fmt) + (cons* (current-file environment) + (current-line environment) + args) + #f)) + + ;; TODO all of this needs to be surounded with a conditional for + ;; environmentns if status. However, ensure that each directive + ;; starts at start of line + + (match tokens + ('() '()) + ((`(whitespace "\n") `(whitespace ,_) '... '(preprocessing-token (puntuator "#")) rest ...) + ;; Line tokens are those in this line, + ;; while remaining tokens are the newline, follewed by the rest of the files tokens + (let ((line-tokens remaining-tokens (tokens-until-eol rest))) + ;; Actual tokens just removes all whitespace between "#" and "define" + (let ((actual-tokens (drop-whitespace line-tokens))) + (if (null? actual-tokens) + (loop (bump-line environment) remaining-tokens) + (match (car actual-tokens) + (`(preprocessing-token (identifier "if")) + (let ((environment (resolve-for-if environment actual-tokens))) + (loop environment remaining-tokens))) + + (`(preprocessing-token (identifier "ifdef")) + (match actual-tokens + ((`(preprocessing-token (identifier ,id)) _ ...) + (loop + ((if (in-environment? environment id) + enter-active-if enter-inactive-if) + environment) + remaining-tokens)) + (_ (err "Non identifier in ifdef: ~s" actual-tokens)))) + + (`(preprocessing-token (identifier "ifndef")) + (match actual-tokens + ((`(preprocessing-token (identifier ,id)) _ ...) + (loop + ((if (in-environment? environment id) + enter-inactive-if enter-active-if) + environment) + remaining-tokens)) + (_ (err "Non identifier in ifndef: ~s" actual-tokens)))) + + ('(preprocessing-token (identifier "else")) + ;; TODO + 'TODO + ) + + ('(preprocessing-token (identifier "elif")) + (-> environment leave-if + (resolve-for-if actual-tokens) + (loop remaining-tokens))) + + ('(preprocessing-token (identifier "endif")) + (loop (leave-if environment) remaining-tokens)) + + ('(preprocessing-token (identifier "include")) + (call-with-values + (lambda () (resolve-and-include-header environment (cdr actual-tokens))) + (lambda (environment tokens) + (loop environment (append tokens remaining-tokens))))) + + ('(preprocessing-token (identifier "define")) + (let ((env (resolve-define environment (cdr actual-tokens)))) + (loop env remaining-tokens)) + ) + + ('(preprocessing-token (identifier "undef")) + (loop (match actual-tokens + (`((preprocessing-token (identifier ,id))) + (-> environment bump-line (remove-identifier! id)))) + remaining-tokens)) + + ('(preprocessing-token (identifier "line")) + (loop (handle-line-directive environment actual-tokens) + remaining-tokens)) + + ('(preprocessing-token (identifier "error")) + ;; NOTE this is an "expected" error + (throw 'cpp-error actual-tokens)) + + ('(preprocessing-token (identifier "pragma")) + (loop (handle-pragma environment actual-tokens) + remaining-tokens))))))) + + ((`(preprocessing-token (identifier ,id)) rest ...) + (call-with-values (lambda () (maybe-extend-identifier environment id rest)) + loop)) + + (('(whitespace "\n") rest ...) + (cons '(whitespace "\n") (loop (bump-line environment) rest))) + + ((token rest ...) (cons token (loop environment rest)))))) + + + +(define (comment->whitespace expr) + (match expr + (('comment _) '(whitespace " ")) + (other other))) + +(define (read-file path) + (call-with-input-file path (@ (ice-9 rdelim) read-string))) + +(define (comment->whitespace token) + (match token + (`(comment ,_) '(whitespace " ")) + (other other))) + +(define (comments->whitespace tokens) + (map comment->whitespace tokens)) + +;;; 5.1.11.2 Translation phases + +(define (tokenize string) + (-> string +;;; 1. trigraph replacement + replace-trigraphs +;;; 2. Line folding + fold-lines +;;; 3. Decomposition into preprocenning tokens, whitespaces, and comments + lex +;;; 4. Execution of preprocessing directives, all preprocessing directives are then deleted + comments->whitespace + ;; squeeze-whitespace-blocks + )) diff --git a/module/c/trigraph.scm b/module/c/trigraph.scm new file mode 100644 index 00000000..197e01a4 --- /dev/null +++ b/module/c/trigraph.scm @@ -0,0 +1,24 @@ +(define-module (c trigraph) + :use-module (ice-9 regex) + :export (replace-trigraphs)) + +(define rx (make-regexp "\\?\\?([=\\(\\)'!<>/-])")) + +(define (proc m) + (case (string-ref (match:substring m 2) 0) + ((#\=) "#") + ((#\() "[") + ((#\)) "]") + ((#\') "^") + ((#\<) "{") + ((#\>) "}") + ((#\!) "|") + ((#\-) "~") + ((#\/) "\\"))) + +(define (replace-trigraphs string) + (call-with-output-string + (lambda (port) + (regexp-substitute/global + port rx string + 'pre proc 'post)))) diff --git a/module/hnh/util.scm b/module/hnh/util.scm index d2c0dd5f..9a45704b 100644 --- a/module/hnh/util.scm +++ b/module/hnh/util.scm @@ -34,6 +34,8 @@ group-by split-by + split-by-one-of + span-upto cross-product @@ -341,6 +343,29 @@ (cdr rem))]))) +(define (split-by-one-of lst items) + (cond ((null? items) + (scm-error 'wrong-type-arg "split-by-one-of" + "Must have at least one item to split by, when splitting ~s" + (cons items '()) #f)) + ((not (list? items)) + (scm-error 'wrong-type-arg "split-by-one-of" + "Items must be list of list of symbols, got ~s" + (list items) #f)) + (else + (call-with-values + (lambda () + (car+cdr + (let loop ((token 'sentinel-token) (lst lst)) + (let ((head tail (break (lambda (item) (memv item items)) + lst))) + (let ((group (cons token head))) + (if (null? tail) + (list group) + (cons group (loop (car tail) (cdr tail))))))))) + ;; Remove the sentinel token + (lambda (first rest) (cons (cdr first) rest)))))) + ;; Simar to span from srfi-1, but never takes more than ;; @var{count} items. Can however still take less. diff --git a/module/hnh/util/path.scm b/module/hnh/util/path.scm index ea081e85..0c8af48a 100644 --- a/module/hnh/util/path.scm +++ b/module/hnh/util/path.scm @@ -3,6 +3,7 @@ :use-module (srfi srfi-71) :use-module (hnh util) :export (path-append + path-absolute? path-join path-split file-hidden? @@ -12,6 +13,8 @@ (define // file-name-separator-string) (define /? file-name-separator?) +(define path-absolute? absolute-file-name?) + (define (path-append . strings) (fold (lambda (s done) (string-append diff --git a/module/srfi/srfi-64/util.scm b/module/srfi/srfi-64/util.scm new file mode 100644 index 00000000..a371227f --- /dev/null +++ b/module/srfi/srfi-64/util.scm @@ -0,0 +1,11 @@ +(define-module (srfi srfi-64 util) + :use-module (ice-9 curried-definitions) + :use-module ((srfi srfi-1) :select (every)) + :use-module (srfi srfi-64) + :export (test-match-group)) + +;; Specifier for name of group +(define ((test-match-group name . names) runner) + (every string=? + (reverse (cons name names)) + (test-runner-group-stack runner))) diff --git a/tests/run-tests.scm b/tests/run-tests.scm index 5270636e..3955a6a2 100755 --- a/tests/run-tests.scm +++ b/tests/run-tests.scm @@ -27,6 +27,7 @@ fi (ice-9 pretty-print) (ice-9 getopt-long) (ice-9 match) + (ice-9 regex) (system vm coverage) ((all-modules) :select (fs-find)) ) @@ -54,6 +55,24 @@ fi (define (make-indent depth) (make-string (* 2 depth) #\space)) +(define (string-replace-head s1 s2) + (string-replace s1 s2 + 0 (string-length s2))) + +(define (pp form indent prefix-1) + (let ((prefix (make-string (+ (string-length indent) + (string-length prefix-1)) + #\space))) + (display + (string-replace-head + (with-output-to-string + (lambda () (pretty-print + form + per-line-prefix: prefix + width: (- 79 (string-length indent))))) + (string-append indent prefix-1))))) + + (define (construct-test-runner) (define runner (test-runner-null)) (define depth 0) @@ -75,7 +94,10 @@ fi (cond ((test-runner-test-name runner) (negate string-null?) => identity) ((test-result-ref runner 'expected-value) - => (lambda (p) (with-output-to-string (lambda () (display (bold "[SOURCE]: ")) (truncated-print p width: 60)))))))) + => (lambda (p) (with-output-to-string + (lambda () + (display (bold "[SOURCE]: ")) + (truncated-print p width: 60)))))))) (when (eq? 'fail (test-result-kind)) (cond ((test-result-ref runner 'actual-error) => (lambda (err) @@ -94,12 +116,12 @@ fi (unknown-actual (gensym))) (let ((expected (test-result-ref runner 'expected-value unknown-expected)) (actual (test-result-ref runner 'actual-value unknown-actual))) - (if (eq? expected unknown-expected) - (format #t "~aAssertion failed, received ~s~%" - (make-indent (1+ depth)) actual) - (format #t "~aExpected: ~s~%~aReceived: ~s~%" - (make-indent (1+ depth)) expected - (make-indent (1+ depth)) actual)))))) + (let ((indent (make-indent (1+ depth)))) + (if (eq? expected unknown-expected) + (format #t "~aAssertion failed~%" indent) + (begin + (pp expected indent "Expected: ") + (pp actual indent "Received: ")))))))) (format #t "~aNear ~a:~a~%" (make-indent (1+ depth)) (test-result-ref runner 'source-file) @@ -203,9 +225,6 @@ fi ;; (format #t "Running on:~%~y~%" files) -(awhen (option-ref options 'only #f) - (set! files (list (path-append "test" it)))) - ((@ (hnh util exceptions) warnings-are-errors) #t) @@ -240,9 +259,38 @@ fi (test-begin "suite") -(awhen (option-ref options 'skip #f) - (format #t "Skipping ~s~%" it) - (test-skip it)) + +(define onlies + (let %loop ((args (command-line)) (onlies '())) + (define* (loop args key: only) + (if only + (%loop args (cons only onlies)) + (%loop args onlies))) + (if (null? args) + onlies + (cond ((string-match "^--skip(=.*)?$" (car args)) + => (lambda (m) + (cond ((match:substring m 1) + => (lambda (s) + (format #t "Skipping ~s~%" s) + (test-skip s) + (loop (cdr args)))) + (else (format #t "Skipping ~s~%" (cadr args)) + (test-skip (cadr args)) + (loop (cddr args)))))) + ((string-match "^--only(=.*)?$" (car args)) + => (lambda (m) + (cond ((match:substring m 1) + => (lambda (s) + (loop (cdr args) only: s))) + (else (loop (cddr args) only: (cadr args)))))) + (else (loop (cdr args))))))) + +(unless (null? onlies) + (set! files + (map (lambda (x) (path-append "test" x)) + ;; reverse only until I have built a dependency graph for tests + (reverse onlies)))) (finalizer (lambda () (for-each (lambda (f) (catch/print-trace (lambda () (test-group f (load f))))) files))) diff --git a/tests/test/c-parse.scm b/tests/test/c-parse.scm new file mode 100644 index 00000000..c16958de --- /dev/null +++ b/tests/test/c-parse.scm @@ -0,0 +1,69 @@ +;;; Commentary +;; Test implementation details of C parser +;; TODO Should be ran before (test cpp) +;;; Code + +(define-module (test cpp) + :use-module (srfi srfi-64) + :use-module (srfi srfi-88) + :use-module ((c lex) :select (lex)) + :use-module (c parse)) + +(define flatten-infix (@@ (c parse) flatten-infix)) +(define resolve-order-of-operations (@@ (c parse) resolve-order-of-operations)) + +(test-group "Flatten infix" + (test-equal "Simple binary operator" + '(fixed-infix (integer (base-10 "1")) + + + (integer (base-10 "2"))) + (flatten-infix (lex "1 + 2"))) + + (test-equal "Simple binary operator, with compound structure in on branch" + '(fixed-infix (integer (base-10 "1")) + + + (funcall (variable "f") + (group (integer (base-10 "2"))))) + (flatten-infix (lex "1 + f(2)")))) + +(test-group "Order of operations" + (test-equal "Basic binary operator" + '((resolved-operator +) + (integer (base-10 "1")) + (integer (base-10 "2"))) + (resolve-order-of-operations (flatten-infix (lex "1 + 2")))) + + (test-equal "Multiple operators, with non-left-associative application" + '((resolved-operator +) + (integer (base-10 "1")) + ((resolved-operator *) + (integer (base-10 "2")) + (integer (base-10 "3")))) + (resolve-order-of-operations (flatten-infix (lex "1 + 2 * 3")))) + + (test-equal "Multiple of the same operation gets clumed together" + '((resolved-operator +) + (integer (base-10 "1")) + (integer (base-10 "2")) + (integer (base-10 "3"))) + (resolve-order-of-operations (flatten-infix (lex "1 + 2 + 3")))) + + (test-equal "Simple Ternary" + '(ternary + (integer (base-10 "1")) + (integer (base-10 "2")) + (integer (base-10 "3"))) + (resolve-order-of-operations (flatten-infix (lex "1 ? 2 : 3")))) + + (test-equal "ternary with further infix operators" + '(ternary ((resolved-operator +) + (integer (base-10 "1")) + (integer (base-10 "2"))) + ((resolved-operator %) + (integer (base-10 "3")) + (integer (base-10 "4"))) + ((resolved-operator *) + (integer (base-10 "5")) + (integer (base-10 "6")))) + (resolve-order-of-operations (flatten-infix (lex "1 + 2? 3 % 4 : 5 * 6"))))) + diff --git a/tests/test/cpp.scm b/tests/test/cpp.scm index 9c720fde..1294bc96 100644 --- a/tests/test/cpp.scm +++ b/tests/test/cpp.scm @@ -3,37 +3,602 @@ ;;; Code: (define-module (test cpp) + :use-module (srfi srfi-1) :use-module (srfi srfi-64) :use-module (srfi srfi-88) :use-module ((c lex) :select (lex)) - :use-module ((c parse) :select (parse-lexeme-tree))) + :use-module ((c parse) :select (parse-lexeme-tree)) + :use-module ((c eval) :select (c-eval)) + :use-module ((c eval environment) :select (make-environment env-set!)) + :use-module ((rnrs arithmetic bitwise) + :select (bitwise-xor))) + +;; Note that the lexer's output isn't stable. +;; The tests here are more to see where the lexer succeeds but the parser fails. +;; So changing the lexer test cases isn't a problem +;; but don't change the parser test cases + +;; __asm__ always has strings as arguments +(test-skip "__asm__") + +;; Lexer produces garbage when attempted. Fixing this would also fix cast +;; operations. +(test-skip "Float in infix expression") +;; order of operation is wrong, leading to an incorrect result +(test-skip "Cast with operation") + +;; not implemented +(test-skip "Token concatenation") + +;; A string follewed by a macro (which expands to a string) +;; should be concatenated. This is however not yet implemented +(test-skip "Implicit concatenation of string and macro") (define run (compose parse-lexeme-tree lex)) -(test-equal - '(+ (post-increment (dereference C)) 3) - (run "(*C)++ + 3")) +(define (alist->environment alist) + (fold (lambda (pair env) + (apply env-set! env pair)) + (make-environment) + alist)) + +(define (exec form . base-bindings) + (call-with-values + (lambda () (c-eval (alist->environment base-bindings) + (run form))) + (lambda (env value) value))) + +(define-syntax let-group + (syntax-rules () + ((let ((form name) rest ...) body ...) + (test-group name + (let ((form name) + rest ...) + body ...))))) + +(let-group + ((form "(*C)++ + 3")) + (test-equal '(infix (postfix (group (prefix (prefix-operator "*") + (variable "C"))) + (postfix-operator "++")) + (operator "+") + (integer (base-10 "3"))) + (lex form)) + (test-equal '(+ (post-increment (dereference C)) 3) + (run form))) + +(let-group + ((form "*C++ + 3")) + (test-equal '(infix (postfix (prefix (prefix-operator "*") + (variable "C")) + (postfix-operator "++")) + (operator "+") + (integer (base-10 "3"))) + (lex form)) + (test-equal '(+ (post-increment (dereference C)) 3) + (run form))) + +(let-group + ((form "*C++")) + (test-equal '(postfix (prefix (prefix-operator "*") + (variable "C")) + (postfix-operator "++")) + (lex form)) + (test-equal '(post-increment (dereference C)) + (run form))) + +(let-group + ((form "C++ + C++")) + (test-equal '(infix (postfix (variable "C") + (postfix-operator "++")) + (operator "+") + (postfix (variable "C") + (postfix-operator "++"))) + (lex form)) + (test-equal '(+ (post-increment C) (post-increment C)) + (run form))) + +(let-group + ((form "++C + ++C")) + (test-equal '(infix (prefix (prefix-operator "++") + (variable "C")) + (operator "+") + (prefix (prefix-operator "++") + (variable "C"))) + (lex form)) + (test-equal '(+ (pre-increment C) (pre-increment C)) + (run form))) + +(let-group + ((form "2 + 2 * 2")) + (test-equal '(infix (integer (base-10 "2")) + (operator "+") + (infix (integer (base-10 "2")) + (operator "*") + (integer (base-10 "2")))) + (lex form)) + (test-equal '(+ 2 (* 2 2)) (run form)) + (test-equal 6 (exec form))) + +(let-group + ((form "2 * 2 + 2")) + (test-equal '(infix (integer (base-10 "2")) + (operator "*") + (infix (integer (base-10 "2")) + (operator "+") + (integer (base-10 "2")))) + (lex form)) + (test-equal '(+ (* 2 2) 2) (run form)) + (test-equal 6 (exec form))) + +(let-group + ((form "2+2+2")) + (test-equal '(infix (integer (base-10 "2")) + (operator "+") + (infix (integer (base-10 "2")) + (operator "+") + (integer (base-10 "2")))) (lex form)) + (test-equal '(+ 2 2 2) (run form)) + (test-equal 6 (exec form))) + +(test-group "Unary minus" + (test-group "Without space" + (let ((form "-1")) + (test-equal '(prefix (prefix-operator "-") + (integer (base-10 "1"))) + (lex form)) + (test-equal '(- 1) (run form)) + (test-equal -1 (exec form)))) + + (test-group "With space" + (let ((form "- 1")) + (test-equal '(prefix (prefix-operator "-") + (integer (base-10 "1"))) + (lex form)) + (test-equal '(- 1) (run form)) + (test-equal -1 (exec form)))) + + (test-group "Before variable" + (let ((form "-x")) + (test-equal '(prefix (prefix-operator "-") + (variable "x")) + (lex form)) + (test-equal '(- x) (run form)) + (test-equal -5 (exec form '(x 5))))) + + (test-group "Before infix" + (let ((form "-x+3")) + (test-equal '(infix (prefix (prefix-operator "-") + (variable "x")) + (operator "+") + (integer (base-10 "3"))) + (lex form)) + (test-equal '(+ (- x) 3) (run form)) + (test-equal -2 (exec form '(x 5))))) + + (test-group "Inside infix expression" + (let ((form "x+-3")) + (test-equal '(infix (variable "x") + (operator "+") + (prefix (prefix-operator "-") + (integer (base-10 "3")))) + (lex form)) + (test-equal '(+ x (- 3)) (run form)) + (test-equal 2 (exec form '(x 5))))) + ) + + + + +;; Hand picked forms from output of `cpp -dM /usr/include/termios.h` on +;; FreeBSD 13.1-RELEASE releng/13.1-n250148-fc952ac2212 GENERIC amd64 +;; 2022-06-28 + +(let ((form "00000200")) + (test-equal '(integer (base-8 "0000200")) (lex form)) + (test-equal 128 (run form))) + +(let ((form "0")) + (test-equal '(integer (base-10 "0")) (lex form)) + (test-equal 0 (run form))) + +(let ((form "1000000U")) + (test-equal '(integer (base-10 "1000000") (integer-suffix "U")) (lex form)) + (test-equal '(as-type (unsigned) 1000000) (run form)) + (test-equal 1000000 (exec form))) + + +(let ((form "0x10c")) + (test-equal '(integer (base-16 "10c")) (lex form)) + (test-equal 268 (run form))) + +;; Lexer keeps original case, handled later by parser +(let ((form "0X10C")) + (test-equal '(integer (base-16 "10C")) (lex form)) + (test-equal 268 (run form))) + +(let ((form "a != b")) + (test-equal '(infix (variable "a") + (operator "!=") + (variable "b")) + (lex form)) + (test-equal '(not_eq a b) (run form)) + (test-equal 1 (exec form '(a 1) '(b 2))) + (test-equal 0 (exec form '(a 1) '(b 1))) + ) + +(let ((form "((c) == (val) && (val) != _POSIX_VDISABLE)")) + ;; (test-equal '() (lex form)) + (test-equal '(and (== c val) + (not_eq val _POSIX_VDISABLE)) + (run form)) + (test-equal 0 (exec form '(c 1) '(val 2) '(_POSIX_VDISABLE 3))) + ) + +(let ((form "CTRL('O')")) + (test-equal '(funcall (variable "CTRL") (group (char "O"))) (lex form)) + (test-equal '(funcall CTRL 79) (run form)) + (test-equal (bitwise-xor #x40 (char->integer #\O)) + (exec form + ;; Definition copied from our parsers output of + ;; preprocessing output as defined above + '(CTRL (lambda (x) + (ternary (and (>= x 97) (<= x 122)) + (+ (- x 97) 1) + (bitand (+ (- x 65) 1) 127))))))) + +(let ((form "CREPRINT")) + (test-equal '(variable "CREPRINT") (lex form)) + (test-equal 'CREPRINT (run form))) + +(let ((form "(CCTS_OFLOW | CRTS_IFLOW)")) + (test-equal '(group (infix (variable "CCTS_OFLOW") + (operator "|") + (variable "CRTS_IFLOW"))) + (lex form)) + (test-equal '(bitor CCTS_OFLOW CRTS_IFLOW) (run form))) + +;; ((x) >= 'a' && (x) <= 'z' +;; ? ((x) - 'a' + 1) +;; : (((x) - 'a' + 1) & 0x7f)) +(let ((form "((x) >= 'a' && (x) <= 'z' ? ((x) - 'a' + 1) : (((x) - 'a' + 1) & 0x7f))")) + ;; (test-equal '() (lex form)) + (test-equal '(ternary + (and (>= x #x61) + (<= x #x7A)) + (+ (- x #x61) 1) + (bitand (+ (- x #x61) 1) 127)) + (run form))) + +(let ((form "((x) & ~(IOCPARM_MASK << 16))")) + ;; (test-equal '() (lex form)) + (test-equal '(bitand x (compl (<< IOCPARM_MASK 16))) (run form))) + +(let ((form "(((x) >> 8) & 0xff)")) + ;; (test-equal '() (lex form)) + (test-equal '(bitand (>> x 8) 255) (run form))) + +(let ((form "(((x) >> 16) & IOCPARM_MASK)")) + ;; (test-equal '() (lex form)) + (test-equal '(bitand (>> x 16) IOCPARM_MASK) (run form))) + +(let ((form "((1 << IOCPARM_SHIFT) - 1)")) + ;; (test-equal '() (lex form)) + (test-equal '(- (<< 1 IOCPARM_SHIFT) 1) (run form))) + +(let ((form "_IO('t', 120)")) + (test-equal '(funcall + (variable "_IO") + (group (infix (char "t") + (operator ",") + (integer (base-10 "120"))))) + (lex form)) + (test-equal '(funcall _IO (#{,}# 116 120)) (run form))) + +;; note the lone type +(let ((form "_IOW('t', 98, int)")) + ;; (test-equal '() (lex form)) + (test-equal '(funcall _IOW (#{,}# 116 98 int)) + (run form))) + +;; note the multi-word type +(let ((form "_IOR('t', 19, struct termios)")) + ;; (test-equal '() (lex form)) + (test-equal '(funcall _IOR (#{,}# 116 19 (struct-type termios))) (run form))) + + +;; TODO concatenation rules +;; #define __CONCAT(x,y) __CONCAT1(x,y) +;; #define __CONCAT1(x,y) x ## y +;; #define __CONSTANT_CFSTRINGS__ 1 +;; #define __COPYRIGHT(s) __IDSTRING(__CONCAT(__copyright_,__LINE__),s) + +(test-group "Token concatenation" + (let ((form "x ## y")) + (test-equal '() (lex form)) + (test-equal '0 (run form)))) + +(test-group "Floating point numbers" + + (test-group "Diffent forms" + (test-group "No decimal point, exponent, no suffix" + (let ((form "10e10")) + (test-equal '(float (float-integer (base-10 "10")) + (exponent (base-10 "10"))) + (lex form)) + (test-equal 10e10 (run form)))) + + (test-group "No decimal point, negative exponent" + (let ((form "10e-10")) + (test-equal '(float (float-integer (base-10 "10")) + (exponent "-" (base-10 "10"))) + (lex form)) + (test-equal 10e-10 (run form)))) + + (test-group "No decimal point, exponent and suffix" + (let ((form "10e10L")) + (test-equal '(float (float-integer (base-10 "10")) + (exponent (base-10 "10")) + (float-suffix "L")) + (lex form)) + (test-equal '(as-type (long double) 10e10) + (run form)))) + + (test-group "Leading period, no exponent or suffix" + (let ((form ".1")) + (test-equal '(float (float-decimal (base-10 "1"))) (lex form)) + (test-equal 0.1 (run form)))) + + (test-group "Trailing period, no exponent or suffix" + (let ((form "1.")) + (test-equal '(float (float-integer (base-10 "1"))) (lex form)) + (test-equal 1.0 (run form))))) + + + (test-group "Negative float" + (let ((form "-1.0")) + (test-equal '(prefix (prefix-operator "-") + (float (float-integer (base-10 "1")) + (float-decimal (base-10 "0")))) + (lex form)) + (test-equal '(- 1.0) (run form)))) + + + + (test-group "Real world examples" + (let ((form "4.9406564584124654e-324")) + (test-equal '(float (float-integer (base-10 "4")) + (float-decimal (base-10 "9406564584124654")) + (exponent "-" (base-10 "324"))) + (lex form)) + (test-equal 4.9406564584124654e-324 (run form))) + + (let ((form "1.7976931348623157e+308")) + (test-equal '(float (float-integer (base-10 "1")) + (float-decimal (base-10 "7976931348623157")) + (exponent "+" (base-10 "308"))) + (lex form)) + (test-equal 1.7976931348623157e+308 (run form)))) + + (test-group "Float in infix expression" + (test-group "Simple case" + (let ((form "1. + .1")) + (test-equal '(infix (float (float-integer (base-10 "1"))) + (operator "+") + (float (float-decimal (base-10 "1")))) + (lex form)) + (test-equal '(+ 1.0 0.1) (run form)))) + ;; (test-group "Complicated case") + )) + +(test-group "Typecasts" + + (let ((form "(unsigned) 5")) + (test-equal '((group (variable "unsigned")) + (integer (base-10 "5"))) + (lex form)) + (test-equal '(as-type (unsigned) 5) + (run form))) + + (let ((form "(unsigned integer) 5")) + (test-equal '((group (variable "unsigned") + (variable "integer")) + (integer (base-10 "5"))) + (lex form)) + (test-equal '(as-type (unsigned integer) 5) (run form))) + + (test-group "Pointer with space" + (let ((form "(int *) 5")) + (test-equal '((group (postfix (variable "int") + (postfix-operator "*"))) + (integer (base-10 "5"))) + (lex form)) + (test-equal '(as-type (int *) 5) + (run form)))) + + (test-group "Pointer without space" + (let ((form "(int*) 5")) + (test-equal '((group (postfix (variable "int") + (postfix-operator "*"))) + (integer (base-10 "5"))) + (lex form)) + (test-equal '(as-type (int *) 5) + (run form)))) + + (test-group "Multi word type pointer" + (let ((form "(unsigned int*) 5")) + (test-equal '((group (variable "unsigned") + (postfix (variable "int") + (postfix-operator "*"))) + (integer (base-10 "5"))) + (lex form)) + (test-equal '(as-type (unsigned int *) 5) + (run form)))) + + (test-group "Double cast" + (let ((form "(int)(unsigned) 5")) + (test-equal '((group (variable "int")) + (group (variable "unsigned")) + (integer (base-10 "5"))) (lex form)) + (test-equal '(as-type (int) (as-type (unsigned) 5)) + (run form)))) + + (test-group "Cast with operation" + (let ((form "(int) 5 + 7")) + (test-equal '((group (variable "int")) + (infix (integer (base-10 "5")) + (operator "+") + (integer (base-10 "7")))) + (lex form)) + + (test-equal '(+ (as-type (int) 5) 7) + (run form)))) + + + + (test-group "Tripple cast, with value inside paranthesis" + (let ((form "(type)(__uintptr_t)(const void *)(var)")) + (test-equal '((group (variable "type")) + (group (variable "__uintptr_t")) + (group (variable "const") + (postfix (variable "void") + (postfix-operator "*"))) + (group (variable "var"))) + (lex form)) + (test-equal '(as-type (type) + (as-type (__uintptr_t) + (as-type (const void *) + var))) + (run form)))) + + (test-group "Same as above, but whole thing inside parenthesis" + (let ((form "((type)(__uintptr_t)(const void *)(var))")) + (test-equal '(group (group (variable "type")) + (group (variable "__uintptr_t")) + (group (variable "const") + (postfix (variable "void") + (postfix-operator "*"))) + (group (variable "var"))) + (lex form)) + (test-equal '(as-type (type) + (as-type (__uintptr_t) + (as-type (const void *) + var))) + (run form)))) + + (let ((form "((type)(__uintptr_t)(const volatile void *)(var))")) + ;; (test-equal '() (lex form)) + (test-equal '(as-type (type) + (as-type (__uintptr_t) + (as-type (const volatile void *) + var))) + (run form))) + + (let ((form "((unsigned long) ((inout) | (((len) & IOCPARM_MASK) << 16) | ((group) << 8) | (num)))")) + (test-equal '(group (group (variable "unsigned") (variable "long")) + (group (infix (group (variable "inout")) + (operator "|") + (infix (group (infix (group (infix (group (variable "len")) + (operator "&") + (variable "IOCPARM_MASK"))) + (operator "<<") + (integer (base-10 "16")))) + (operator "|") + (infix (group (infix (group (variable "group")) + (operator "<<") + (integer (base-10 "8")))) + (operator "|") + (group (variable "num"))))))) + (lex form)) + (test-equal '(as-type (unsigned long) + (bitor inout + (<< (bitand len IOCPARM_MASK) 16) + (<< group 8) + num)) + (run form)))) + +(test-group "Characters" + (let ((form "'c'")) + (test-equal '(char "c") (lex form)) + (test-equal #x63 (run form))) + + (let ((form "'\\n'")) + (test-equal '(char (escaped-char "n")) (lex form)) + (test-equal (char->integer #\newline) (run form)))) + +(test-group "Strings" + (test-group "Empty string" + (let ((form "\"\"")) + (test-equal 'string (lex form)) + (test-equal #vu8(0) (run form)))) + + (test-group "Simple string" + (let ((form "\"li\"")) + (test-equal '(string "li") (lex form)) + (test-equal #vu8(#x6C #x69 0) (run form)))) + + (test-group "Implicit concatenation of strings" + (let ((form "\"a\" \"b\"")) + (test-equal '((string "a") + (string "b")) + (lex form)) + (test-equal #vu8(#x61 #x62 0) + (run form)))) -(test-equal - '(+ (post-increment (dereference C)) 3) - (run "*C++ + 3")) + (test-group "Implicit concatenation of string and macro" + (let ((form "\"a\" MACRO")) + (test-equal '((string "a") (variable "MACRO")) (lex form)) + (test-equal '() (run form)))) -(test-equal - '(post-increment (dereference C)) - (run "*C++")) + (test-group "String with only escape" + (let ((form (string #\" #\\ #\" #\"))) + (test-equal `(string (escaped-char "\"")) (lex form)) + (test-equal #vu8(34 0) (run form)))) -(test-equal - '(+ (post-increment C) (post-increment C)) - (run "C++ + C++")) + (test-group "String with escape at start" + (let ((form (string #\" #\\ #\" #\a #\"))) + (test-equal `(string (escaped-char "\"") "a") (lex form)) + (test-equal #vu8(34 #x61 0) (run form)))) -(test-equal - '(+ (pre-increment C) (pre-increment C)) - (run "++C + ++C")) + (test-group "String with escape at end" + (let ((form (string #\" #\a #\\ #\" #\"))) + (test-equal `(string "a" (escaped-char "\"")) (lex form)) + (test-equal #vu8(#x61 34 0) (run form)))) -(test-equal '(+ 2 (* 2 2)) (run "2 + 2 * 2")) + (test-group "String with escape in middle" + (let ((form (string #\" #\a #\\ #\" #\b #\"))) + (test-equal `(string "a" (escaped-char "\"") "b") (lex form)) + (test-equal #vu8(#x61 34 #x62 0) (run form)))) -(test-equal '(+ (* 2 2) 2) (run "2 * 2 + 2")) + ;; \e is semi non-standard + (test-group "String with bakslash-e esacpe" + (let ((form "\"\\e\"")) + (test-equal '(string (escaped-char "e")) (lex form)) + (test-equal #vu8(#x1b 0) (run form)))) -(test-equal '(+ 2 2 2) (run "2+2+2")) + (test-group "String with byte secquence escape" + (let ((form "\"\\xf0\\x9f\\x92\\xa9\"")) + (test-equal '(string (escaped-char (base-16-char "f0")) + (escaped-char (base-16-char "9f")) + (escaped-char (base-16-char "92")) + (escaped-char (base-16-char "a9"))) + (lex form)) + (test-equal #vu8(#xf0 #x9f #x92 #xa9 0) (run form))))) +(test-group "__asm__" + (let ((form "__asm__(\".globl \" __XSTRING(sym))")) + (test-equal '() (lex form)) + ;; TODO implicit string concatenation + (test-equal '(funcall __asm__ + (string ".globl ") + (funcall __XSTRING sym)) (run form)))) +(let ((form "__attribute__((__aligned__(x)))")) + (test-equal '(funcall (variable "__attribute__") + (group (group (funcall (variable "__aligned__") + (group (variable "x")))))) + (lex form)) + ;; This drops the extra set of parenthesis. Do we care? + (test-equal '(funcall __attribute__ + (funcall __aligned__ x)) + (run form))) diff --git a/tests/test/cpp/cpp-environment.scm b/tests/test/cpp/cpp-environment.scm new file mode 100644 index 00000000..8600c731 --- /dev/null +++ b/tests/test/cpp/cpp-environment.scm @@ -0,0 +1,44 @@ +(define-module (test cpp cpp-environmunt) + :use-module (srfi srfi-64) + :use-module (srfi srfi-88) + :use-module (c cpp-environment) + :use-module (c cpp-environment object-like-macro) + ) + +(let ((e (make-environment))) + (test-equal '(outside) (cpp-if-status e)) + (let ((e* (enter-active-if e))) + (test-equal "Enter works" '(active-if outside) (cpp-if-status e*)) + (test-equal "Original object remainins unmodified" + '(outside) (cpp-if-status e)))) + +(define cpp-file-stack (@@ (c cpp-environment) cpp-file-stack)) + +(let ((e (make-environment))) + (test-equal "Default file stack" '(("*outside*" . 1)) (cpp-file-stack e)) + (let ((e* (enter-file e "test.c"))) + (test-equal "File stack after entering file" + '(("test.c" . 1) ("*outside*" . 1)) (cpp-file-stack e*)) + (let ((e** (bump-line e*))) + (test-equal 2 (current-line e**))))) + + + +(let ((e (make-environment))) + (let ((e* (add-identifier! + e "key" + (object-like-macro + identifier: "key" + body: '((preprocessing-token (identifier "value"))))))) + (let ((result (get-identifier e* "key"))) + (test-assert (macro? result)) + (test-equal '((preprocessing-token (identifier "value"))) + (macro-body result)))) + ;; (get-identifier e "key") here is undefined + ) + +(let ((e (make-environment))) + (let ((result (get-identifier e "key"))) + (test-assert "Missing identifier returns #f" + (not result))) + ) diff --git a/tests/test/cpp/lex2.scm b/tests/test/cpp/lex2.scm new file mode 100644 index 00000000..762ff176 --- /dev/null +++ b/tests/test/cpp/lex2.scm @@ -0,0 +1,80 @@ +(define-module (test cpp lex2) + :use-module (srfi srfi-64) + :use-module (srfi srfi-88) + :use-module (ice-9 peg) + :use-module (c lex2)) + + +(test-equal "Integer literal" + '((preprocessing-token (pp-number "10"))) + (lex "10")) + +(test-equal "String literal" + '((preprocessing-token (string-literal "Hello"))) + (lex "\"Hello\"")) + + +(test-equal "Mulitple tokens, including whitespace" + '((whitespace " ") + (preprocessing-token (pp-number "10")) + (whitespace " ")) + (lex " 10 ")) + +(test-equal "Char literal" + '((preprocessing-token (character-constant "a"))) + (lex "'a'")) + + + +(test-equal "Comment inside string" + '((preprocessing-token (string-literal "Hel/*lo"))) + (lex "\"Hel/*lo\"")) + +(test-equal "#define line" + '((preprocessing-token (punctuator "#")) + (preprocessing-token (identifier "define")) + (whitespace " ") + (preprocessing-token (identifier "f")) + (preprocessing-token (punctuator "(")) + (preprocessing-token (identifier "x")) + (preprocessing-token (punctuator ")")) + (whitespace " ") + (preprocessing-token (pp-number "10"))) + (lex "#define f(x) 10")) + + + +(test-equal "Nested parenthesis" + '((preprocessing-token (identifier "f")) + (preprocessing-token (punctuator "(")) + (preprocessing-token (pp-number "1")) + (preprocessing-token (punctuator ",")) + (whitespace " ") + (preprocessing-token (punctuator "(")) + (preprocessing-token (pp-number "2")) + (preprocessing-token (punctuator ",")) + (whitespace " ") + (preprocessing-token (pp-number "3")) + (preprocessing-token (punctuator ")")) + (preprocessing-token (punctuator ",")) + (whitespace " ") + (preprocessing-token (pp-number "4")) + (preprocessing-token (punctuator ")"))) + (lex "f(1, (2, 3), 4)")) + + + +;; Generating a single lexeme +;; (whitespace " ") +;; would also be ok +(test-equal "Grouped whitespace" + '((whitespace " ") + (whitespace " ")) + (lex " ")) + +(test-equal "Newlines get sepparate whitespace tokens" + '((whitespace " ") + (whitespace " ") + (whitespace "\n") + (whitespace " ")) + (lex " \n ")) diff --git a/tests/test/cpp/preprocessor2.scm b/tests/test/cpp/preprocessor2.scm new file mode 100644 index 00000000..3d62e224 --- /dev/null +++ b/tests/test/cpp/preprocessor2.scm @@ -0,0 +1,390 @@ +(define-module (test cpp preprocessor2) + :use-module (srfi srfi-64) + :use-module (srfi srfi-64 util) + :use-module (srfi srfi-64 test-error) + :use-module (srfi srfi-71) + :use-module (srfi srfi-88) + :use-module ((hnh util) :select (unval)) + :use-module (c preprocessor2) + :use-module (c cpp-environment) + :use-module (c cpp-environment function-like-macro) + :use-module (c cpp-environment object-like-macro) + :use-module (c lex2)) + +;; TODO Not yet implemented +;; (test-expect-fail (test-match-group "Stringify")) +;; (test-expect-fail +;; (test-match-all (test-match-group "Expand stringifiers") +;; (test-match-name "Correct stringification of one param"))) + + +(define tokens-until-eol (@@ (c preprocessor2) tokens-until-eol)) +(test-group "Tokens until End Of Line" + (call-with-values + (lambda () + (tokens-until-eol + '(before (whitespace "\n") after))) + (lambda (bef aft) + (test-equal '(before) bef) + (test-equal '((whitespace "\n") after) aft)))) + + + +(define squeeze-whitespace (@@ (c preprocessor2) squeeze-whitespace)) +(test-equal "Squeeze whitespace" + '(bef (whitespace " ") aft) + (squeeze-whitespace + '(bef + (whitespace a) + (whitespace b) + aft))) + + + +(define stringify-token (@@ (c preprocessor2) stringify-token)) +(define stringify-tokens (@@ (c preprocessor2) stringify-tokens)) + +(test-group "Stringify" + (test-equal "(" + (stringify-token '(punctuator "("))) + ;; TODO more cases + + (test-equal (car (lex "\"(a, b)\"")) + (stringify-tokens (lex "(a, b)"))) + ) + + +(define parse-identifier-list (@@ (c preprocessor2) parse-identifier-list)) + +(test-group "Parse identifier list" + (test-group "Single argument" + (let ((rest args (parse-identifier-list (lex "x")))) + (test-assert (not rest)) + (test-equal '("x") args))) + + (test-group "Multiple parameters" + (let ((rest args (parse-identifier-list (lex "x, y")))) + (test-assert (not rest)) + (test-equal '("x" "y") args))) + + + (test-group "Rest args after regular" + (let ((rest args (parse-identifier-list (lex "x, ...")))) + (test-assert rest) + (test-equal '("x") args))) + + (test-group "Only rest args" + (let ((rest args (parse-identifier-list (lex "...")))) + (test-assert rest) + (test-equal '() args))) + + (test-group "Errors" + (test-error "Compound forms are invalid" + 'cpp-error (parse-identifier-list (lex "(y)"))) + + (test-error "Non-identifier atoms are invalid" + 'cpp-error (parse-identifier-list (lex "1"))) + + (test-error "Rest args not at end is invalid" + 'cpp-error (parse-identifier-list (lex "..., y"))))) + + + +(define expand-stringifiers (@@ (c preprocessor2) expand-stringifiers)) +(define build-parameter-map (@@ (c preprocessor2) build-parameter-map)) +(define parse-parameter-list (@@ (c preprocessor2) parse-parameter-list)) +(define cleanup-whitespace (@@ (c preprocessor2) cleanup-whitespace)) + +(test-equal "Clean up whitespace" + (lex "( 2 , 4 )") + (cleanup-whitespace (lex " \n ( 2 , \n 4 ) \t "))) + + +;; Parameter lists (the callsite arguments to the macro) +(test-group "Parameter list" + (test-group "Empty parameter list" + (let ((containing remaining nls (parse-parameter-list (lex "()")))) + (test-equal '() containing) + (test-equal '() remaining) + (test-equal 0 nls))) + + (test-group "Single value in parameter list" + (let ((containing remaining nls (parse-parameter-list (lex "(x)")))) + (test-equal (list (lex "x")) containing) + (test-equal '() remaining) + (test-equal 0 nls))) + + (test-group "Two values in parameter list" + (let ((containing remaining nls (parse-parameter-list (lex "(x, y)")))) + (test-equal (list (lex "x") + (lex "y")) + containing) + (test-equal '() remaining) + (test-equal 0 nls))) + + (test-group "Three values in parameter list" + (let ((containing remaining nls (parse-parameter-list (lex "(x, y, z)")))) + (test-equal (list (lex "x") + (lex "y") + (lex "z")) + containing) + (test-equal '() remaining) + (test-equal 0 nls))) + + (test-group "Numeric parameter" + (let ((containing remaining nls (parse-parameter-list (lex "(1)")))) + (test-equal (list (lex "1")) containing) + (test-equal '() remaining) + (test-equal 0 nls)) + ) + + (test-group "Two values, one of which is a paretheseed pair" + (let ((containing remaining nls + (parse-parameter-list (lex "(x, (y, z))")))) + (test-equal (list (lex "x") (lex "(y, z)")) + containing) + (test-equal '() remaining) + (test-equal 0 nls)))) + +(test-group "Build parameter map" + (test-equal "Simplest case, zero arguments" + '() + (let ((m (function-like-macro + identifier: "str" + identifier-list: '() + body: (lex "#x")))) + (build-parameter-map + m '() #; (list (lex "x")) + ))) + + (test-equal "Single (simple) argument" + `(("x" . ,(lex "x"))) + (let ((m (function-like-macro + identifier: "str" + identifier-list: '("x") + body: '()))) + (build-parameter-map + m + (list (lex "x"))))) + + (test-equal "Single advanced argument" + `(("x" . ,(lex "(x)"))) + (let ((m (function-like-macro + identifier: "str" + identifier-list: '("x") + body: '()))) + (build-parameter-map + m (list (lex "(x)"))))) + + (test-group "Rest arguments" + (test-equal "Single simple" + `(("__VA_ARGS__" . ,(list (lex "x")))) + (let ((m (function-like-macro + identifier: "str" + identifier-list: '() + variadic?: #t + body: '()))) + (build-parameter-map + m (list (lex "x"))))) + + #; + (test-equal "Two simple" + '() + (let ((m (function-like-macro + identifier: "str" + identifier-list: '() + variadic?: #t + body: '()))) + (build-parameter-map + m (list (lex "x"))))) + )) + + + +(test-group "Expand stringifiers" + (let ((m (function-like-macro + identifier: "str" + identifier-list: '("x") + body: (lex "#x")))) + (test-equal "Correct stringification of one param" + (lex "\"10\"") + (expand-stringifiers + m (build-parameter-map + m (list (lex "10")))))) + + (let ((m (function-like-macro + identifier: "str" + identifier-list: '() + body: (lex "#x")))) + (test-error "Stringification fails for non-parameters" + 'macro-expand-error + (expand-stringifiers + m (build-parameter-map + m (list (lex "x"))))))) + +;; TODO expand-join +;; token ## token2 + +(define join-file-line (@@ (c preprocessor2) join-file-line)) + +(let ((e (join-file-line (make-environment)))) + (test-equal (object-like-macro identifier: "__FILE__" + body: '((preprocessing-token (string-literal "*outside*")))) + (get-identifier e "__FILE__")) + (test-equal (object-like-macro identifier: "__LINE__" + body: '((preprocessing-token (pp-number "1")))) + (get-identifier e "__LINE__"))) + +(define resolve-token-stream (@@ (c preprocessor2) resolve-token-stream)) + +(test-group "Token streams" + (test-group "Non-expanding" + (test-equal "Null stream" + '() (resolve-token-stream (make-environment) '())) + (test-equal "Constant resolve to themselves" + (lex "1") (resolve-token-stream (make-environment) (lex "1"))) + (test-equal "Identifier-likes not in environment stay put" + (lex "x") (resolve-token-stream (make-environment) (lex "x"))) + (test-equal "Identifier-likes with stuff after keep stuff after" + (lex "x 1") (resolve-token-stream (make-environment) (lex "x 1")))) + + (test-group "Object likes" + (test-equal "Expansion of single token" + (lex "10") (resolve-token-stream (extend-environment (make-environment) + (list (object-like-macro + identifier: "x" + body: (lex "10")))) + (lex "x"))) + + (test-equal "Expansion keeps stuff after" + (lex "10 1") (resolve-token-stream (extend-environment (make-environment) + (list (object-like-macro + identifier: "x" + body: (lex "10")))) + (lex "x 1"))) + + (test-equal "Multiple object like macros in one stream" + (lex "10 20") + (resolve-token-stream (extend-environment (make-environment) + (list (object-like-macro + identifier: "x" + body: (lex "10")) + (object-like-macro + identifier: "y" + body: (lex "20")))) + (lex "x y"))) + ) + + ;; TODO + + ;; (test-group "Function likes") + + ;; (test-group "Mix of object and function likes") + + ) + +(define expand-macro (@@ (c preprocessor2) expand-macro)) +(define resolve-define (@@ (c preprocessor2) resolve-define)) +(define apply-macro (@@ (c preprocessor2) apply-macro)) +(define maybe-extend-identifier (@@ (c preprocessor2) maybe-extend-identifier)) + +(test-group "Macro expansion" + (test-group "Expand macro part 1" + ;; Expand object like macros + ;; apply-macro depends on this, but expand macro with function like macros + ;; depend on apply-macro, thereby the two parter + (test-group "Object like macros" + (call-with-values + (lambda () (expand-macro (make-environment) + (object-like-macro + identifier: "x" body: (lex "1 + 2")) + '())) + (lambda (_ tokens) (test-equal "Simplest case" (lex "1 + 2") tokens))) + + (call-with-values + (lambda () (expand-macro (make-environment) + (object-like-macro + identifier: "x" body: (lex "1+2")) + (cdr (lex "x something else")))) + (lambda (_ tokens) (test-equal "Expansion with stuff after" + (lex "1+2 something else") tokens))) + + ;; (call-with-values (expand-macro (make-environment))) + + )) + + +(test-group "Maybe extend identifier" + (test-equal "Non-identifier returns remaining" + '() ((unval maybe-extend-identifier 1) + (make-environment) + "x" + '())) + + (test-equal "Non-identifiers remaining tokens are returned verbatim" + '(remaining) ((unval maybe-extend-identifier 1) + (make-environment) + "x" + '(remaining))) + + (test-equal "Object like identifier expands" + (lex "1 + 2") + ((unval maybe-extend-identifier 1) + (extend-environment (make-environment) + (list + (object-like-macro + identifier: "x" + body: (lex "1 + 2")))) + "x" + '())) + + (test-equal "Object like macro still returns remaining verbatim" + (append (lex "1 + 2") '(remaining)) + ((unval maybe-extend-identifier 1) + (extend-environment (make-environment) + (list + (object-like-macro + identifier: "x" + body: (lex "1 + 2")))) + "x" + '(remaining))) + + ) + + (test-group "Apply macro" + (test-equal "zero arg macro on nothing" + (lex "1") + (apply-macro + (make-environment) + (function-like-macro identifier: "f" + identifier-list: '() + body: (lex "1")) + '())) + + (test-equal "Single arg macro" + (lex "10") + (apply-macro + (make-environment) + (function-like-macro identifier: "f" + identifier-list: '("x") + body: (lex "x")) + (lex "10")))) + + (test-group "Expand macro part 2" + (test-group "Function like macros" + (let ((e (make-environment)) + (m (function-like-macro + identifier: "f" + identifier-list: '() + body: (lex "1")))) + (call-with-values (lambda () (expand-macro e m (lex "()"))) + (lambda (_ tokens*) (test-equal (lex "1") tokens*))) + ;; TODO this should raise an arity error + (call-with-values (lambda () (expand-macro e m (lex "(10)"))) + (lambda (_ tokens*) (test-equal '() tokens*))))))) + +(define apply-macro (@@ (c preprocessor2) apply-macro)) + + +;; (resolve-define (make-environment) +;; (lex "f(x) x+1")) diff --git a/tests/test/util.scm b/tests/test/util.scm index 1de96a37..aa37d20c 100644 --- a/tests/test/util.scm +++ b/tests/test/util.scm @@ -187,6 +187,25 @@ (test-error 'wrong-type-arg (find-extreme '())) +;; TODO group-by +;; TODO split-by + +(test-group "Split-by-one-of" + + (test-equal "Empty input" + '(()) (split-by-one-of '() '(+))) + + (test-equal "No matching tokens" + '((1 + 2)) (split-by-one-of '(1 + 2) '(/))) + + (test-equal "Matching tokens" + '((1) (+ 2) (- 3)) + (split-by-one-of '(1 + 2 - 3) '(+ -))) + + (test-equal "Maching tokens, multiple values in each group" + '((1 + 2) (* 3 + 4)) + (split-by-one-of '(1 + 2 * 3 + 4) '(*)))) + (call-with-values (lambda () (span-upto |