From 3413f60db482ce7e6d6d786348723a2b406d1038 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Hugo=20H=C3=B6rnquist?= Date: Thu, 21 Jul 2022 18:05:59 +0200 Subject: Remove old unused files. --- module/c/cpp.scm | 151 ----------- module/c/eval.scm | 265 ------------------ module/c/eval/environment.scm | 34 --- module/c/lex.scm | 163 ------------ module/c/old/cpp.scm | 151 +++++++++++ module/c/old/lex.scm | 163 ++++++++++++ module/c/old/operators.scm | 24 ++ module/c/old/parse.scm | 426 +++++++++++++++++++++++++++++ module/c/operators.scm | 24 -- module/c/parse.scm | 426 ----------------------------- module/c/preprocessor.scm | 370 -------------------------- module/c/util.scm | 20 -- module/c/zipper.scm | 60 ----- module/glob.scm | 2 +- module/vulgar/termios.scm | 2 +- tests/test/c-parse.scm | 8 +- tests/test/cpp.scm | 604 ------------------------------------------ 17 files changed, 770 insertions(+), 2123 deletions(-) delete mode 100644 module/c/cpp.scm delete mode 100644 module/c/eval.scm delete mode 100644 module/c/eval/environment.scm delete mode 100644 module/c/lex.scm create mode 100644 module/c/old/cpp.scm create mode 100644 module/c/old/lex.scm create mode 100644 module/c/old/operators.scm create mode 100644 module/c/old/parse.scm delete mode 100644 module/c/operators.scm delete mode 100644 module/c/parse.scm delete mode 100644 module/c/preprocessor.scm delete mode 100644 module/c/util.scm delete mode 100644 module/c/zipper.scm delete mode 100644 tests/test/cpp.scm diff --git a/module/c/cpp.scm b/module/c/cpp.scm deleted file mode 100644 index aed496f2..00000000 --- a/module/c/cpp.scm +++ /dev/null @@ -1,151 +0,0 @@ -(define-module (c cpp) - :use-module (hnh util) - :use-module (srfi srfi-1) - :use-module (ice-9 popen) - :use-module (ice-9 match) - :use-module (ice-9 regex) - :use-module ((rnrs io ports) :select (call-with-port)) - :use-module ((rnrs bytevectors) :select (bytevector?)) - :use-module (ice-9 format) - :use-module ((hnh util io) :select (read-lines)) - :use-module (hnh util graph) - :use-module (c lex) - :use-module (c parse) - :use-module (c operators) - :export (replace-symbols include#) - ) - - -;; input "#define F(x, y) x + y" -;; 1 full define | F(x,y) -;; 2 macro name | F -;; 3 macro args | (x,y) -;; 5 macro body | x + y or #f -(define define-re (make-regexp "^#define ((\\w+)([(][^)]*[)])?)( (.*))?")) - -(define (tokenize-define-line header-line) - (aif (regexp-exec define-re header-line) - (cons (match:substring it 1) - (let ((body (match:substring it 5))) - (if (or (eqv? body #f) - (string-null? body)) - "1" body))) - (scm-error 'c-parse-error - "tokenize-define-line" - "Line dosen't match: ~s" - (list header-line) #f))) - - -(define (replace-symbols tree dict) - (if (not (list? tree)) - (or (assoc-ref dict tree) tree) - (map (lambda (node) (replace-symbols node dict)) - tree))) - -;; Direct values. Lisp also has quoted symbols in this group. -(define (immediate? x) - (or (number? x) - (bytevector? x))) - -;; TODO replace this with something sensible -;; like a correct list extracted from (c eval) -;; and not thinging that types are variables -;; built in symbols. Should never be marked as dependencies -(define (primitive? x) - (memv x `( - ;; language primitives - sizeof - - ;; special forms introduced by parser - funcall ternary struct-type as-type - - ;; unary operatons which aren't also binary operators - ++ -- ! ~ - not compl dereference pointer - pre-increment pre-decrement - post-increment post-decrement - ,@binary-operators - ))) - - - -;; (symbol . value) -> (list (dependencies . symbol . value) -(define (parse-cpp-define pair) - (define f (compose parse-lexeme-tree lex)) - (define left (f (car pair))) - (define proc-args - (match (and (pair? left) - (eq? 'funcall (car left)) - (caddr left)) - [#f '()] - [(_ args ...) args] - [arg (list arg)])) - - (define right (f (cdr pair))) - (define dependencies - (lset-difference - eq? - (remove primitive? - (remove immediate? - (flatten (if (list? right) - right (list right))))) - proc-args)) - - (cons - dependencies - (match left - [('funcall name ('#{,}# args ...)) - (cons name `(lambda ,args ,right))] - - [('funcall name arg) - (cons name `(lambda (,arg) ,right))] - - [name (cons name right)]))) - - -(define (parse-cpp-file lines) - (map (lambda (line) - (catch #t - (lambda () (parse-cpp-define line)) - (lambda (err caller fmt args data) - (format #t "~a in ~a: ~?~%" - err caller fmt args) - (format #t "~s~%" line) - #f))) - lines)) - -(define (private-c-symbol? string) - (char=? #\_ (string-ref string 0))) - -(define (tokenize-header-file header-file) - (map tokenize-define-line - (call-with-port - (open-pipe* OPEN_READ "cpp" "-dM" header-file) - read-lines))) - -(define (load-cpp-file header-file) - - (define lines (tokenize-header-file header-file)) - (define forms (parse-cpp-file lines)) - - (fold (lambda (node graph) - (add-node graph (cdr node) (car node))) - (make-graph car) - (filter identity forms))) - -(define (include% header-file) - (define graph* (load-cpp-file header-file)) - ;; Hack for termios since this symbol isn't defined. - ;; (including in the above removed private c symbols) - (define graph (add-node graph* (cons '_POSIX_VDISABLE 0) '())) - ;; TODO expand bodies - ;; (remove (compose private-c-symbol? car)) - (resolve-dependency-graph graph)) - -(define-macro (include# header-file . args) - - (define define-form (if (null? args) 'define (car args))) - - `(begin - ,@(map (lambda (pair) `(,define-form ,(car pair) ,(cdr pair))) - (include% header-file)))) diff --git a/module/c/eval.scm b/module/c/eval.scm deleted file mode 100644 index 67d0075d..00000000 --- a/module/c/eval.scm +++ /dev/null @@ -1,265 +0,0 @@ -(define-module (c eval) - :use-module (hnh util) - :use-module (srfi srfi-1) - :use-module (srfi srfi-71) - :use-module (srfi srfi-88) - :use-module (ice-9 match) - :use-module (ice-9 curried-definitions) - :use-module ((rnrs bytevectors) - :select (bytevector?)) - :use-module ((rnrs arithmetic bitwise) - :select (bitwise-not - bitwise-and - bitwise-ior - bitwise-xor - bitwise-arithmetic-shift-left - bitwise-arithmetic-shift-right)) - :use-module (c eval environment) - :export (c-procedure? - procedure-formals - procedure-body - procedure-arity - - c-eval - )) - -(define C-TRUE 1) -(define C-FALSE 0) - -(define (boolean->c-boolean bool) - (if bool C-TRUE C-FALSE)) - -(define (c-boolean->boolean bool) - (not (zero? bool))) - -(define (c-not b) - (-> b c-boolean->boolean not boolean->c-boolean)) - -(define (c-procedure? expr) - (and (list? expr) - (not (null? expr)) - (eq? 'lambda (car expr)))) - -(define* (ensure-c-procedure expr optional: calling-procedure) - (unless (c-procedure? expr) - (scm-error 'c-eval-error calling-procedure - "Value not a procedure: ~s" - (list procedure #f)))) - -(define (procedure-formals procedure) - (ensure-c-procedure procedure "procedure-formals") - (list-ref procedure 1)) - -(define (procedure-body procedure) - (ensure-c-procedure procedure "procedure-body") - (list-ref procedure 2)) - -(define (procedure-arity procedure) - (length (procedure-formals procedure))) - -(define (literal? expression) - (or (number? expression) - (bytevector? expression))) - - - -;; Internal helper procedures - -(define (mod-set operation) - (lambda (env slot value) - ;; a += b - ;; a = a + b - (c-eval env `(= ,slot (,operation ,slot ,value))))) - -(define (fold-2 proc init lst) - (car+cdr - (fold (lambda (arg env+done) - (let ((env* arg* (proc (car env+done) arg))) - (cons* env* arg* (cdr env+done)))) - init - lst))) - -;; TODO this disregards -;; - floating point convertions -;; - integer truncation -(define ((simple-infix operation) env . operands) - (let ((env done (fold-2 c-eval (cons env '()) operands))) - (values env (apply operation (reverse done))))) - -(define ((binary-operator proc) env i c) - (let ((env* i* (c-eval env i))) - (let ((env** c* (c-eval env* c))) - (values env** (proc i* c*))))) - - - - -;; The order of evaluation for a number of these is undefined, meaning -;; that any side effects without sequence points is undefined. -;; However, for many of these I do the sensible thing and evaluate them -;; from left to right, instead of ignoring all side effects. - -;; TODO double check these with a C standard - - -;; Operators have their own namespace. They are called without funcall -;; in the pseudo-lisp which C is compiled to, and they expand more like -;; lisp macros, since its up to each operator what to do with its operands. -;; This to allow assignment and short circuting. -(define primitives - `((and . ,(lambda (env . operands) - (let loop ((env env) (operands operands)) - (if (null? operands) - (values env C-TRUE) - (let* ((env* result (c-eval env (car operands)))) - (if (c-boolean->boolean result) - (loop env* (cdr operands)) - (values env* result))))))) - (or . ,(lambda (env . operands) - (let loop ((env env) (operands operands)) - (if (null? operands) - (values env C-FALSE) - (let* ((env* result (c-eval env (car operands)))) - (if (false? result) - (values env* result) - (loop env* (cdr operands)))))))) - (= . ,(lambda (env slot value) - ;; TOOD if slot isn't a variable, but a field (or array index) - ;; then it needs to be resolved... - (let ((env* result (c-eval env value))) - (values (env-set! env* slot result) - result)))) - (and_eq ,(mod-set 'bitand)) ; &= - (or_eq ,(mod-set 'bitor)) ; |= - (xor_eq ,(mod-set 'xor)) ; ^= - (+= ,(mod-set '+)) - (-= ,(mod-set '-)) - (*= ,(mod-set '*)) - (/= ,(mod-set '/)) - (<<= ,(mod-set '<<)) - (>>= ,(mod-set '>>)) - (%= ,(mod-set '%)) - (+ . ,(simple-infix +)) - (* . ,(simple-infix *)) - (/ . ,(simple-infix /)) - (- . ,(lambda (env op . operands) - (if (null? operands) - (let ((env* value (c-eval env op))) - (values env* (- value))) - (apply (simple-infix -) - env op operands)))) - (bitor . ,(simple-infix bitwise-ior)) - (bitand . ,(simple-infix bitwise-and)) - (xor . ,(simple-infix bitwise-xor)) - (not_eq . ,(lambda (env a b) (c-eval env `(not (== ,a ,b))))) ; != - (<< . ,(binary-operator bitwise-arithmetic-shift-left)) - (>> . ,(binary-operator bitwise-arithmetic-shift-right)) - (< . ,(binary-operator (compose boolean->c-boolean <))) - (> . ,(binary-operator (compose boolean->c-boolean >))) - ;; this assumes that = handles pointers - (== . ,(binary-operator (compose boolean->c-boolean =))) - (<= . ,(binary-operator (compose boolean->c-boolean <=))) - (>= . ,(binary-operator (compose boolean->c-boolean >=))) - (% . ,(binary-operator modulo)) - - (not . ,(lambda (env value) - (let ((env* result (c-eval env value))) - (values env* (c-not result))))) - (compl . ,(lambda (env value) - (let ((env* result (c-eval env value))) - (values env* (bitwise-not result))))) - - ;; ++C - (pre-increment . ,(lambda (env slot) (c-eval env `(+= ,slot 1)))) - (pre-decrement . ,(lambda (env slot) (c-eval env `(-= ,slot 1)))) - ;; TODO these (C++, C--) need to handle if slot isn't a direct variabl - (post-increment . ,(lambda (env slot) - (let ((value (env-ref env slot))) - (values (env-set! env slot (1+ value)) - value)))) - (pre-decrement . ,(lambda (env slot) - (let ((value (env-ref env slot))) - (values (env-set! env slot (1+ value)) - value)))) - - (ternary . ,(lambda (env test true-clause false-clause) - (let ((env* value (c-eval env test))) - (c-eval env* - (if (c-boolean->boolean value) - true-clause false-clause))))) - - ;; TODO remaining operations - (as-type . ,(lambda (env target-type value) - (format (current-error-port) "cast<~s>(~s)~%" target-type value) - (values env value))) - - (object-slot . ,(lambda (env object slot) - (scm-error 'not-implemented "object-slot" - "Object slots are not implemented, when accessing ~s.~s" - (list object slot) #f))) - (dereference-slot ,(lambda (env ptr slot) - (scm-error 'not-implemented "dereference-slot" - "Object slots are not implemented, when accessing ~s->~s" - (list object slot) #f))) - (dereference . ,(lambda (env ptr) - (scm-error 'not-implemented "dereference" - "Poiner dereferencing is not implemented: *~s" - (list ptr) #f))) - (pointer . ,(lambda (env value) - (scm-error 'not-implemented "pointer" - "Pointer of is not implemented: &~s" - (list value) #f))))) - -;; TODO |,| - - -(define (c-eval environment expression) - (match expression - (('lambda formals body) (values environment `(lambda ,formals ,body))) - ;; hack since sizeof really should be a operator - (('funcall 'sizeof arg) - ;; TODO - (format (current-error-port) "sizeof ~s~%" arg) - (values environment 1)) - - (('funcall procedure-name ('#{,}# args ...)) - (let ((procedure (env-ref environment procedure-name))) - (ensure-c-procedure procedure "c-eval") - (unless (= (length args) (procedure-arity procedure)) - (scm-error 'c-eval-error "c-eval" - "Procedure arity mismatch for ~s, expected ~s, got ~s" - (list procedure-name - (procedure-arity procedure) - (length args)) - #f)) - (let ((env args* (fold-2 c-eval (cons environment '()) args ))) - (let ((inner-environment - (fold (lambda (name value env) (env-set! env name value)) - (push-frame! env) - (procedure-formals procedure) args*))) - (let ((resulting-environment - result-value - (c-eval inner-environment (procedure-body procedure)))) - (values (pop-frame! resulting-environment) - result-value)))))) - - (('funcall procedure arg) - (c-eval environment `(funcall ,procedure (#{,}# ,arg)))) - - ((operator operands ...) - (apply (or (assoc-ref primitives operator) - (scm-error 'c-eval-error "c-eval" - "Applying non-existant primitive operator: ~s, operands: ~s" - (list operator operands) #f)) - environment operands)) - - ;; "f()" gets compiled to simply f - ;; meaning that we instead use the environment to determine - ;; if something is a variable or procedure - (expr - (if (literal? expr) - (values environment expr) - (let ((value (env-ref environment expr))) - (if (c-procedure? value) - (c-eval environment `(funcall ,value (#{,}#))) - (values environment value))))))) diff --git a/module/c/eval/environment.scm b/module/c/eval/environment.scm deleted file mode 100644 index 12eefaf7..00000000 --- a/module/c/eval/environment.scm +++ /dev/null @@ -1,34 +0,0 @@ -(define-module (c eval environment) - :use-module (srfi srfi-1) - :export (make-environment - env-set! env-ref push-frame! pop-frame!)) - -(define (make-frame) - (make-hash-table)) - -(define (make-environment) - (list (make-frame))) - -;; Returns an updated environment, linear update -(define (env-set! env key value) - ;; get handle to differentiate #f - ;; (even though #f should never be stored since it's not a C value) - (cond ((find (lambda (frame) (hashq-get-handle frame key)) env) - => (lambda (frame) (hashq-set! frame key value))) - (else (hashq-set! (car env) key value))) - env) - -(define (env-ref env key) - (cond ((null? env) - (scm-error 'misc-error "env-ref" - "~s unbound" - (list key) - #f)) - ((hashq-get-handle (car env) key) => cdr) - (else (env-ref (cdr env) key)))) - -(define (push-frame! environment) - (cons (make-frame) environment)) - -(define (pop-frame! environment) - (cdr environment)) diff --git a/module/c/lex.scm b/module/c/lex.scm deleted file mode 100644 index 0bde5c9e..00000000 --- a/module/c/lex.scm +++ /dev/null @@ -1,163 +0,0 @@ -(define-module (c lex) - :use-module (ice-9 peg) - :use-module (c operators) - :export (lex)) - - -;; Like define-peg-pattern, but body is evaluated -(define-syntax define-peg-pattern* - (lambda (stx) - (syntax-case stx () - ((_ sym accum pat) - #`(define sym - (let ((matchf (compile-peg-pattern (datum->syntax #'stx pat) 'accum))) - (let ((syn ((@ (ice-9 peg codegen) wrap-parser-for-users) #'stx matchf 'accum 'sym))) - ((@ (system base compile) compile) - ((@ (ice-9 peg cache) cg-cached-parser) - syn))))))))) - - - - -(define-peg-pattern base-8-digit body - (range #\0 #\7)) - -(define-peg-pattern base-10-digit body - (range #\0 #\9)) - -(define-peg-pattern base-16-digit body - (or (range #\0 #\9) - (range #\A #\F) - (range #\a #\f))) - -;; https://en.cppreference.com/w/cpp/language/integer_literal -(define-peg-pattern base-10 all (+ base-10-digit)) -(define-peg-pattern base-8 all (and (ignore "0") (+ base-8-digit))) -(define-peg-pattern base-16 all (and (ignore (and "0" (or "x" "X"))) - (+ base-16-digit))) - -;; accept anything now, ensure correctnes later -(define-peg-pattern integer-suffix all - (* (or "u" "U" "l" "L"))) - -(define-peg-pattern integer all - (and (or base-8 base-16 base-10) (? integer-suffix))) - -(define-peg-pattern float-suffix all - (* (or "f" "F" "l" "L"))) - -(define-peg-pattern exponent all - (and (ignore (or "e" "E")) (? (or "+" "-")) base-10)) - -;; Helper patterns for creating named groups in float -(define-peg-pattern float-integer all base-10) -(define-peg-pattern float-decimal all base-10) - -(define-peg-pattern float all - (or (and float-integer exponent (? float-suffix)) - (and (? float-integer) (ignore ".") float-decimal (? exponent) (? float-suffix)) - (and float-integer (ignore ".") (? exponent) (? float-suffix)))) - -(define-peg-pattern number body - (or float integer)) - -(define-peg-pattern group all - (and (ignore "(") expr (ignore ")"))) - -(define-peg-pattern base-8-char all - (and base-8-digit - (? base-8-digit) - (? base-8-digit))) - -(define-peg-pattern base-16-char all - (and (ignore "x") base-16-digit (? base-16-digit))) - -(define-peg-pattern escaped-char all - (and (ignore "\\") (or base-16-char - base-8-char - peg-any))) - -(define-peg-pattern char all - (and (ignore "'") (or escaped-char peg-any) (ignore "'"))) - -(define-peg-pattern quot none "\"") - -(define-peg-pattern string all - (and quot (* (or escaped-char (and (not-followed-by "\"") peg-any))) quot)) - -(define-peg-pattern* operator all - `(or ,@(map symbol->string symbol-binary-operators) - ,@(map (lambda (op) `(and ,(symbol->string op) ws)) - wordy-binary-operators) - "?" ":")) - -;; whitespace -(define-peg-pattern ws none - (or " " " " "\n")) - -;; space (for when whitespace is optional) -(define-peg-pattern sp none (* ws)) - -(define-peg-pattern safe-letter body - (or "_" - (range #\A #\Z) - (range #\a #\z))) - -(define-peg-pattern variable all - (and safe-letter - (* (or safe-letter - base-10-digit)))) - -(define-peg-pattern prefix-operator all - ;; It's important that ++ and -- are BEFORE + and - - ;; otherwise the first + is found, leaving the second +, which fails - ;; to lex since it's an invalid token - ;; TODO sizeof can be written as a prefix operator - ;; (without parenthesis) if the operand is an expression. - (or "*" "&" "++" "--" - "!" "~" "+" "-")) - - -;;; Note that stacked pre or postfix operators without parenthesis -;;; dosen't work. So `*&C' is invalid, while `*(&C)' is valid. - -(define-peg-pattern prefix all - (and prefix-operator sp (or variable group funcall literal))) - -(define-peg-pattern postfix-operator all - (or "++" "--" "*")) - -(define-peg-pattern postfix all - ;; literals can't be in-place incremented and decremented - ;; Make sure we don't match postfix-operator here, since - ;; that also gives us an infinite loop. - (and (or prefix funcall group variable) sp postfix-operator)) - -(define-peg-pattern infix all - ;; first case is "same" as expr, but in different order to prevent - ;; infinite self reference. Pre and postfix not here, solved by having - ;; them before infix in expr - (and (or funcall postfix prefix group literal variable) - sp operator sp expr)) - -(define-peg-pattern funcall all - (and variable sp group)) - -(define-peg-pattern literal body - (or char string number)) - -;;; main parser -(define-peg-pattern expr body - (+ (and sp (or - ;; float must be BEFORE infix, otherwise 3.2 is parsed as (infix 3 (operator ".") 2) - ;; that however breaks the infix logic, meaning that floating point numbers can't be - ;; used in basic arithmetic. - ;; TODO remove all implicit order of operations handling in the lexer, and move it to - ;; the parser. This should also fix the case of typecasts being applied incorrectly. - float - infix postfix prefix funcall group literal variable) - sp))) - - -(define (lex string) - (peg:tree (match-pattern expr string))) diff --git a/module/c/old/cpp.scm b/module/c/old/cpp.scm new file mode 100644 index 00000000..1623bd11 --- /dev/null +++ b/module/c/old/cpp.scm @@ -0,0 +1,151 @@ +(define-module (c old cpp) + :use-module (hnh util) + :use-module (srfi srfi-1) + :use-module (ice-9 popen) + :use-module (ice-9 match) + :use-module (ice-9 regex) + :use-module ((rnrs io ports) :select (call-with-port)) + :use-module ((rnrs bytevectors) :select (bytevector?)) + :use-module (ice-9 format) + :use-module ((hnh util io) :select (read-lines)) + :use-module (hnh util graph) + :use-module (c old lex) + :use-module (c old parse) + :use-module (c old operators) + :export (replace-symbols include#) + ) + + +;; input "#define F(x, y) x + y" +;; 1 full define | F(x,y) +;; 2 macro name | F +;; 3 macro args | (x,y) +;; 5 macro body | x + y or #f +(define define-re (make-regexp "^#define ((\\w+)([(][^)]*[)])?)( (.*))?")) + +(define (tokenize-define-line header-line) + (aif (regexp-exec define-re header-line) + (cons (match:substring it 1) + (let ((body (match:substring it 5))) + (if (or (eqv? body #f) + (string-null? body)) + "1" body))) + (scm-error 'c-parse-error + "tokenize-define-line" + "Line dosen't match: ~s" + (list header-line) #f))) + + +(define (replace-symbols tree dict) + (if (not (list? tree)) + (or (assoc-ref dict tree) tree) + (map (lambda (node) (replace-symbols node dict)) + tree))) + +;; Direct values. Lisp also has quoted symbols in this group. +(define (immediate? x) + (or (number? x) + (bytevector? x))) + +;; TODO replace this with something sensible +;; like a correct list extracted from (c eval) +;; and not thinging that types are variables +;; built in symbols. Should never be marked as dependencies +(define (primitive? x) + (memv x `( + ;; language primitives + sizeof + + ;; special forms introduced by parser + funcall ternary struct-type as-type + + ;; unary operatons which aren't also binary operators + ++ -- ! ~ + not compl dereference pointer + pre-increment pre-decrement + post-increment post-decrement + ,@binary-operators + ))) + + + +;; (symbol . value) -> (list (dependencies . symbol . value) +(define (parse-cpp-define pair) + (define f (compose parse-lexeme-tree lex)) + (define left (f (car pair))) + (define proc-args + (match (and (pair? left) + (eq? 'funcall (car left)) + (caddr left)) + [#f '()] + [(_ args ...) args] + [arg (list arg)])) + + (define right (f (cdr pair))) + (define dependencies + (lset-difference + eq? + (remove primitive? + (remove immediate? + (flatten (if (list? right) + right (list right))))) + proc-args)) + + (cons + dependencies + (match left + [('funcall name ('#{,}# args ...)) + (cons name `(lambda ,args ,right))] + + [('funcall name arg) + (cons name `(lambda (,arg) ,right))] + + [name (cons name right)]))) + + +(define (parse-cpp-file lines) + (map (lambda (line) + (catch #t + (lambda () (parse-cpp-define line)) + (lambda (err caller fmt args data) + (format #t "~a in ~a: ~?~%" + err caller fmt args) + (format #t "~s~%" line) + #f))) + lines)) + +(define (private-c-symbol? string) + (char=? #\_ (string-ref string 0))) + +(define (tokenize-header-file header-file) + (map tokenize-define-line + (call-with-port + (open-pipe* OPEN_READ "cpp" "-dM" header-file) + read-lines))) + +(define (load-cpp-file header-file) + + (define lines (tokenize-header-file header-file)) + (define forms (parse-cpp-file lines)) + + (fold (lambda (node graph) + (add-node graph (cdr node) (car node))) + (make-graph car) + (filter identity forms))) + +(define (include% header-file) + (define graph* (load-cpp-file header-file)) + ;; Hack for termios since this symbol isn't defined. + ;; (including in the above removed private c symbols) + (define graph (add-node graph* (cons '_POSIX_VDISABLE 0) '())) + ;; TODO expand bodies + ;; (remove (compose private-c-symbol? car)) + (resolve-dependency-graph graph)) + +(define-macro (include# header-file . args) + + (define define-form (if (null? args) 'define (car args))) + + `(begin + ,@(map (lambda (pair) `(,define-form ,(car pair) ,(cdr pair))) + (include% header-file)))) diff --git a/module/c/old/lex.scm b/module/c/old/lex.scm new file mode 100644 index 00000000..dcc7336d --- /dev/null +++ b/module/c/old/lex.scm @@ -0,0 +1,163 @@ +(define-module (c old lex) + :use-module (ice-9 peg) + :use-module (c old operators) + :export (lex)) + + +;; Like define-peg-pattern, but body is evaluated +(define-syntax define-peg-pattern* + (lambda (stx) + (syntax-case stx () + ((_ sym accum pat) + #`(define sym + (let ((matchf (compile-peg-pattern (datum->syntax #'stx pat) 'accum))) + (let ((syn ((@ (ice-9 peg codegen) wrap-parser-for-users) #'stx matchf 'accum 'sym))) + ((@ (system base compile) compile) + ((@ (ice-9 peg cache) cg-cached-parser) + syn))))))))) + + + + +(define-peg-pattern base-8-digit body + (range #\0 #\7)) + +(define-peg-pattern base-10-digit body + (range #\0 #\9)) + +(define-peg-pattern base-16-digit body + (or (range #\0 #\9) + (range #\A #\F) + (range #\a #\f))) + +;; https://en.cppreference.com/w/cpp/language/integer_literal +(define-peg-pattern base-10 all (+ base-10-digit)) +(define-peg-pattern base-8 all (and (ignore "0") (+ base-8-digit))) +(define-peg-pattern base-16 all (and (ignore (and "0" (or "x" "X"))) + (+ base-16-digit))) + +;; accept anything now, ensure correctnes later +(define-peg-pattern integer-suffix all + (* (or "u" "U" "l" "L"))) + +(define-peg-pattern integer all + (and (or base-8 base-16 base-10) (? integer-suffix))) + +(define-peg-pattern float-suffix all + (* (or "f" "F" "l" "L"))) + +(define-peg-pattern exponent all + (and (ignore (or "e" "E")) (? (or "+" "-")) base-10)) + +;; Helper patterns for creating named groups in float +(define-peg-pattern float-integer all base-10) +(define-peg-pattern float-decimal all base-10) + +(define-peg-pattern float all + (or (and float-integer exponent (? float-suffix)) + (and (? float-integer) (ignore ".") float-decimal (? exponent) (? float-suffix)) + (and float-integer (ignore ".") (? exponent) (? float-suffix)))) + +(define-peg-pattern number body + (or float integer)) + +(define-peg-pattern group all + (and (ignore "(") expr (ignore ")"))) + +(define-peg-pattern base-8-char all + (and base-8-digit + (? base-8-digit) + (? base-8-digit))) + +(define-peg-pattern base-16-char all + (and (ignore "x") base-16-digit (? base-16-digit))) + +(define-peg-pattern escaped-char all + (and (ignore "\\") (or base-16-char + base-8-char + peg-any))) + +(define-peg-pattern char all + (and (ignore "'") (or escaped-char peg-any) (ignore "'"))) + +(define-peg-pattern quot none "\"") + +(define-peg-pattern string all + (and quot (* (or escaped-char (and (not-followed-by "\"") peg-any))) quot)) + +(define-peg-pattern* operator all + `(or ,@(map symbol->string symbol-binary-operators) + ,@(map (lambda (op) `(and ,(symbol->string op) ws)) + wordy-binary-operators) + "?" ":")) + +;; whitespace +(define-peg-pattern ws none + (or " " " " "\n")) + +;; space (for when whitespace is optional) +(define-peg-pattern sp none (* ws)) + +(define-peg-pattern safe-letter body + (or "_" + (range #\A #\Z) + (range #\a #\z))) + +(define-peg-pattern variable all + (and safe-letter + (* (or safe-letter + base-10-digit)))) + +(define-peg-pattern prefix-operator all + ;; It's important that ++ and -- are BEFORE + and - + ;; otherwise the first + is found, leaving the second +, which fails + ;; to lex since it's an invalid token + ;; TODO sizeof can be written as a prefix operator + ;; (without parenthesis) if the operand is an expression. + (or "*" "&" "++" "--" + "!" "~" "+" "-")) + + +;;; Note that stacked pre or postfix operators without parenthesis +;;; dosen't work. So `*&C' is invalid, while `*(&C)' is valid. + +(define-peg-pattern prefix all + (and prefix-operator sp (or variable group funcall literal))) + +(define-peg-pattern postfix-operator all + (or "++" "--" "*")) + +(define-peg-pattern postfix all + ;; literals can't be in-place incremented and decremented + ;; Make sure we don't match postfix-operator here, since + ;; that also gives us an infinite loop. + (and (or prefix funcall group variable) sp postfix-operator)) + +(define-peg-pattern infix all + ;; first case is "same" as expr, but in different order to prevent + ;; infinite self reference. Pre and postfix not here, solved by having + ;; them before infix in expr + (and (or funcall postfix prefix group literal variable) + sp operator sp expr)) + +(define-peg-pattern funcall all + (and variable sp group)) + +(define-peg-pattern literal body + (or char string number)) + +;;; main parser +(define-peg-pattern expr body + (+ (and sp (or + ;; float must be BEFORE infix, otherwise 3.2 is parsed as (infix 3 (operator ".") 2) + ;; that however breaks the infix logic, meaning that floating point numbers can't be + ;; used in basic arithmetic. + ;; TODO remove all implicit order of operations handling in the lexer, and move it to + ;; the parser. This should also fix the case of typecasts being applied incorrectly. + float + infix postfix prefix funcall group literal variable) + sp))) + + +(define (lex string) + (peg:tree (match-pattern expr string))) diff --git a/module/c/old/operators.scm b/module/c/old/operators.scm new file mode 100644 index 00000000..0b253ada --- /dev/null +++ b/module/c/old/operators.scm @@ -0,0 +1,24 @@ +(define-module (c old operators) + :export (wordy-binary-operators + symbol-binary-operators + binary-operators)) + + +;;; Simple operators are those which can be combined with '=' +(define simple-operators + `(+ - * / & ,(symbol #\|) ^ << >> % < > =)) + +;; apparently part of C +;; https://en.cppreference.com/w/cpp/language/operator_alternative +(define wordy-binary-operators + '(bitand and_eq and bitor or_eq or xor_eq xor not_eq)) + +(define symbol-binary-operators + (append (map (lambda (x) (symbol-append x '=)) simple-operators) + `(&& ,(symbol #\| #\|) != ,(symbol #\,) + -> ,(symbol #\.)) + simple-operators)) + +(define binary-operators + (append symbol-binary-operators + wordy-binary-operators)) diff --git a/module/c/old/parse.scm b/module/c/old/parse.scm new file mode 100644 index 00000000..d598e3c9 --- /dev/null +++ b/module/c/old/parse.scm @@ -0,0 +1,426 @@ +(define-module (c old parse) + :use-module (hnh util) + :use-module (srfi srfi-1) + :use-module (srfi srfi-71) + :use-module (ice-9 match) + :use-module ((rnrs io ports) + :select (string->bytevector make-transcoder utf-8-codec)) + :use-module (rnrs bytevectors) + :export (parse-lexeme-tree)) + +(define (permutations set) + (concatenate + (map (lambda (key) + (map (lambda (o) (cons key o)) + (delete key set))) + set))) + +(define (symbol-concat pair) + (cond [(null? (car pair)) (cdr pair)] + [(null? (cdr pair)) (car pair)] + [else (symbol-append (car pair) (cdr pair))])) + +(define (parse-integer-suffix str) + (define valid-sequences + (delete 'dummy + (lset-union eq? '(dummy) + (map symbol-concat (permutations '(() U L))) + (map symbol-concat (permutations '(() U LL)))))) + + ;; => (LLU ULL LL LU UL L U) + + (aif (memv (string->symbol (string-upcase str)) + valid-sequences) + (case (car it) + [(LLU ULL) '(unsigned long long)] + [(LU UL) '(unsigned long)] + [(LL) '(long long)] + [(L) '(long)] + [(U) '(unsigned)]) + (scm-error 'c-parse-error "parse-integer-suffix" + "Invalid integer suffix ~s" + (list str) #f))) + +(define (parse-float-suffix str) + (case (string->symbol str) + ((f F) '(float)) + ((l L) '(long double)))) + +(define (group-body->type vars) + (concatenate + (map + (match-lambda (('variable var) (list (parse-lexeme-tree `(variable ,var)))) + (('postfix ('variable var) + ('postfix-operator "*")) + (list (parse-lexeme-tree `(variable ,var)) + '*)) + (else (scm-error 'c-parse-error "parse-lexeme-tree" + "Invalid token ~s in typecast form: ~s" + (list else vars) #f))) + vars))) + +;; Takes a list of strings and integers, and merges it all into a single +;; bytevector representing a c string +(define* (string-fragments->c-string + fragments optional: (transcoder (make-transcoder (utf-8-codec)))) + + (define fragments-fixed + (map (lambda (frag) + (if (string? frag) + (string->bytevector frag transcoder) + frag)) + fragments)) + + (define bv-length + (fold (lambda (item sum) (+ sum (if (bytevector? item) + (bytevector-length item) + 1))) + 0 fragments-fixed)) + + (define bv (make-bytevector (1+ bv-length))) + ;; trailing null byte + (bytevector-u8-set! bv bv-length 0) + (fold (lambda (item idx) + (cond ((bytevector? item) + (bytevector-copy! item 0 + bv idx + (bytevector-length item)) + (+ idx (bytevector-length item))) + (else (bytevector-u8-set! bv idx item) + (+ idx 1)))) + 0 + fragments-fixed) + bv) + +(define (parse-float-form float-form) + (let ((float-string + (fold (lambda (arg str) + (string-append + str + (match arg + (('float-integer ('base-10 n)) n) + (('float-decimal ('base-10 n)) (string-append "." n)) + (('exponent "+" ('base-10 n)) (string-append "e" n)) + (('exponent ('base-10 n)) (string-append "e" n)) + (('exponent "-" ('base-10 n)) (string-append "e-" n))))) + "" float-form))) + ;; exact->inexact is a no-op if we already have an inexact number, but + ;; ensures we get an inexact number when we have an exact number (which we + ;; can get from the "1." case). Returning an inexact number here is important + ;; to avoid arithmetic suprises later. + (exact->inexact + (or (string->number float-string) + (scm-error 'c-parse-error "parse-lexeme-tree" + "Couldn't parse expression as float: ~s" + (list `(float ,@args)) #f))))) + + +(define (resolve-escaped-char form) + (match form + (('base-8-char n) (string->number n 8)) + (('base-16-char n) (string->number n 16)) + (c (char->integer + (case (string-ref c 0) + ((#\a) #\alarm) + ((#\b) #\backspace) + ((#\e) #\esc) ;; non-standard + ((#\f) #\page) + ((#\n) #\newline) + ((#\r) #\return) + ((#\t) #\tab) + ((#\v) #\vtab) + ((#\\) #\\) + ;; These are valid in both strings and chars + ((#\') #\') + ((#\") #\")))))) + +;; Takes a list of strings and escaped-char form +;; and returns a list of strings and integers +(define (resolve-string-fragment fragment) + (match fragment + (('escaped-char c) + (resolve-escaped-char c)) + (fargment fragment))) + +(define (parse-lexeme-tree tree) + (match tree + ['() '()] + + ;; Number constants + [('base-10 n) (string->number n 10)] + [('base-8 n) (string->number n 8)] + [('base-16 n) (string->number n 16)] + + [('integer n ('integer-suffix suffix)) + `(as-type + ,(parse-integer-suffix suffix) + ,(parse-lexeme-tree n))] + + [('integer n) + (parse-lexeme-tree n)] + + + [('float args ... ('float-suffix suffix)) + `(as-type ,(parse-float-suffix suffix) + ;; parse rest of float as if it lacked a suffix + ,(parse-lexeme-tree `(float ,@args)))] + + [('float args ...) (parse-float-form args)] + + ;; Character literals, stored as raw integers + ;; so mathematical operations keep working on them. + [('char ('escaped-char c)) (resolve-escaped-char c)] + + [('char c) (char->integer (string-ref c 0))] + + [('variable var) (string->symbol var)] + + ;; normalize some binary operators to their wordy equivalent + ;; (which also happens to match better with scheme) + [('operator "&&") 'and] + [('operator "&=") 'and_eq] + [('operator "&") 'bitand] + [('operator "|") 'bitor] + [('operator "!=") 'not_eq] + [('operator "||") 'or] + [('operator "|=") 'or_eq] + [('operator "^") 'xor] + [('operator "^=") 'xor_eq] + ;; Change these names to something scheme can handle better + [('operator ".") 'object-slot] + [('operator "->") 'dereference-slot] + [('operator op) (string->symbol op)] + + [('prefix-operator op) + (case (string->symbol op) + ((!) 'not) + ((~) 'compl) + ((*) 'dereference) + ((&) 'pointer) + ((++) 'pre-increment) + ((--) 'pre-decrement) + ((-) '-) + (else (scm-error 'c-parse-error "parse-lexeme-tree" + "Unknown prefix operator ~s" + (list op) #f)))] + [('postfix-operator op) + (case (string->symbol op) + [(++) 'post-increment] + [(--) 'post-decrement] + [else (scm-error 'c-parse-error "parse-lexeme-tree" + "Unknown postfix operator ~s" + (list op) #f)])] + + ;; Parenthesis grouping + [('group args ...) + (parse-lexeme-tree args)] + + [('prefix op arg) + `(,(parse-lexeme-tree op) + ,(parse-lexeme-tree arg))] + + [('postfix arg op) + `(,(parse-lexeme-tree op) + ,(parse-lexeme-tree arg))] + + + + + + ;; resolved-operator and ternary are the return "types" + ;; of resolve-order-of-operations + [(('resolved-operator op) args ...) + `(,op ,@(map parse-lexeme-tree args))] + + [('ternary a b c) + `(ternary ,(parse-lexeme-tree a) + ,(parse-lexeme-tree b) + ,(parse-lexeme-tree c))] + + + + + ;; Is it OK for literal strings to be "stored" inline? + ;; Or must they be a pointer? + ['string #vu8(0)] + [('string str ...) + (-> (map resolve-string-fragment str) + string-fragments->c-string)] + + ;; implicit concatenation of string literals + [(('string str ...) ...) + (-> (map resolve-string-fragment (concatenate str)) + string-fragments->c-string)] + + [('infix args ...) + (let ((r (resolve-order-of-operations + (flatten-infix (cons 'infix args))))) + (parse-lexeme-tree r))] + + + [('funcall function ('group arguments)) + `(funcall ,(parse-lexeme-tree function) + ,(parse-lexeme-tree arguments))] + + [(('variable "struct") ('variable value) ..1) + `(struct-type ,@(map string->symbol value)) + ] + + ;; A list of variables. Most likely a type signature + ;; [(('variable value) ..1) + ;; ] + + ;; A typecast with only variables must (?) be a typecast? + [(('group groups ..1) ... value) + (fold-right (lambda (type done) `(as-type ,type ,done)) + (parse-lexeme-tree value) + (map group-body->type groups))] + + ;; Type name resolution? + ;; https://en.wikipedia.org/wiki/C_data_types + ;; base types with spaces: + ;; ======================= + ;; [[un]signed] char + ;; [[un]signed] short [int] + ;; [[un]signed] int + ;; [un]signed [int] + ;; [[un]signed] long [int] + ;; [[un]signed] long long [int] + ;; float + ;; [long] double + + ;; https://en.wikipedia.org/wiki/Type_qualifier + ;; qualifiers + ;; const + ;; volatile + ;; restrict + ;; _Atomic + + + ;; Storage specifiers + ;; auto + ;; register + ;; static + ;; extern + + ;; struct + ;; enum + ;; union + + ;; https://en.wikipedia.org/wiki/C_syntax + ;; int (*ptr_to_array)[100] + + + [(? symbol? bare) + (scm-error 'c-parse-error + "parse-lexeme-tree" + "Naked literal in lex-tree: ~s" + (list bare) + #f)] + + [form + (scm-error 'c-parse-error + "parse-lexeme-tree" + "Unknown form in lex-tree: ~s" + (list form) #f) + ])) + +;; https://en.wikipedia.org/wiki/Operators_in_C_and_C%2B%2B +;; https://en.cppreference.com/w/c/language/operator_precedence + +(define order-of-operations + (reverse + ;; This is only for binary operations + `((-> ,(symbol #\.)) + ;; All unary procedures go here, incnluding typecasts, and sizeof + (* / %) + (+ -) + (<< >>) + (< <= > >=) + (== != not_eq) + (& bitand) + (^ xorg) + (,(symbol #\|) bitor) + (&& and) + (,(symbol #\| #\|) or) + (? :) + (= += -= *= /= %= <<= >>= &= ^= ,(symbol #\| #\=) + and_eq or_eq xor_eq) + (,(symbol #\,)) + ))) + +;; a.b->c.d (. (-> (. a b) c) d) +;; 2 * 3 / 4 * 5 => (* (/ (* 2 3) 4) 5) +;; eller => (* 2 (/ 3 4) 5) + +(define* (resolve-order-of-operations + tree optional: (order order-of-operations)) + + (if (null? order) + (scm-error 'c-parse-error + "resolve-order-of-operations" + "Out of operations to resolve when resolving expression ~s" + (list tree) #f) + (match tree + [('fixed-infix form) form] + [('fixed-infix forms ...) + (match (split-by-one-of forms (car order)) + [(group) + (resolve-order-of-operations (cons 'fixed-infix group) + (cdr order))] + [(a ('? b ...) (': c ...)) + `(ternary ,(resolve-order-of-operations (cons 'fixed-infix a) (cdr order)) + ,(resolve-order-of-operations (cons 'fixed-infix b) (cdr order)) + ,(resolve-order-of-operations (cons 'fixed-infix c) (cdr order)))] + [(first rest ...) + ;; TODO this is only valid for the associative operators (+, ...) + ;; but not some other (<, ...) + (if (apply eq? (map car rest)) + (let ((op (caar rest))) + `((resolved-operator ,op) + ,@(map (lambda (x) (resolve-order-of-operations (cons 'fixed-infix x) + (cdr order))) + (cons first (map cdr rest))))) + (fold (lambda (item done) + (let ((operator args (car+cdr item))) + `((resolved-operator ,operator) + ,done ,(resolve-order-of-operations + (cons 'fixed-infix args) + (cdr order))))) + (resolve-order-of-operations (cons 'fixed-infix first) + (cdr order)) + rest))])]))) + +;; 1 * 2 / 3 * 4 +;; ⇒ ((1) (* 2) (/ 3) (* 4)) +;; (1) +;; (* (1) 2) +;; (/ (* (1) 2) 3) +;; (* (/ (* (1) 2) 3) 4) + +;; Flatens a tree of infix triples. Stops when it should. +;; (parenthesis, function calls, ...) +(define (flatten-infix form) + (cons 'fixed-infix + (let loop ((form form)) + (match form + [('infix left op ('infix right ...)) + (cons* left + (parse-lexeme-tree op) + (loop (cons 'infix right)))] + + [('infix left op right) + (list left + (parse-lexeme-tree op) + right)] + + [('infix form) form] + + [other (scm-error 'c-parse-error + "flatten-infix" + "Not an infix tree ~a" + (list other) + #f)])))) + + + diff --git a/module/c/operators.scm b/module/c/operators.scm deleted file mode 100644 index 910dc8a9..00000000 --- a/module/c/operators.scm +++ /dev/null @@ -1,24 +0,0 @@ -(define-module (c operators) - :export (wordy-binary-operators - symbol-binary-operators - binary-operators)) - - -;;; Simple operators are those which can be combined with '=' -(define simple-operators - `(+ - * / & ,(symbol #\|) ^ << >> % < > =)) - -;; apparently part of C -;; https://en.cppreference.com/w/cpp/language/operator_alternative -(define wordy-binary-operators - '(bitand and_eq and bitor or_eq or xor_eq xor not_eq)) - -(define symbol-binary-operators - (append (map (lambda (x) (symbol-append x '=)) simple-operators) - `(&& ,(symbol #\| #\|) != ,(symbol #\,) - -> ,(symbol #\.)) - simple-operators)) - -(define binary-operators - (append symbol-binary-operators - wordy-binary-operators)) diff --git a/module/c/parse.scm b/module/c/parse.scm deleted file mode 100644 index 7d11ea17..00000000 --- a/module/c/parse.scm +++ /dev/null @@ -1,426 +0,0 @@ -(define-module (c parse) - :use-module (hnh util) - :use-module (srfi srfi-1) - :use-module (srfi srfi-71) - :use-module (ice-9 match) - :use-module ((rnrs io ports) - :select (string->bytevector make-transcoder utf-8-codec)) - :use-module (rnrs bytevectors) - :export (parse-lexeme-tree)) - -(define (permutations set) - (concatenate - (map (lambda (key) - (map (lambda (o) (cons key o)) - (delete key set))) - set))) - -(define (symbol-concat pair) - (cond [(null? (car pair)) (cdr pair)] - [(null? (cdr pair)) (car pair)] - [else (symbol-append (car pair) (cdr pair))])) - -(define (parse-integer-suffix str) - (define valid-sequences - (delete 'dummy - (lset-union eq? '(dummy) - (map symbol-concat (permutations '(() U L))) - (map symbol-concat (permutations '(() U LL)))))) - - ;; => (LLU ULL LL LU UL L U) - - (aif (memv (string->symbol (string-upcase str)) - valid-sequences) - (case (car it) - [(LLU ULL) '(unsigned long long)] - [(LU UL) '(unsigned long)] - [(LL) '(long long)] - [(L) '(long)] - [(U) '(unsigned)]) - (scm-error 'c-parse-error "parse-integer-suffix" - "Invalid integer suffix ~s" - (list str) #f))) - -(define (parse-float-suffix str) - (case (string->symbol str) - ((f F) '(float)) - ((l L) '(long double)))) - -(define (group-body->type vars) - (concatenate - (map - (match-lambda (('variable var) (list (parse-lexeme-tree `(variable ,var)))) - (('postfix ('variable var) - ('postfix-operator "*")) - (list (parse-lexeme-tree `(variable ,var)) - '*)) - (else (scm-error 'c-parse-error "parse-lexeme-tree" - "Invalid token ~s in typecast form: ~s" - (list else vars) #f))) - vars))) - -;; Takes a list of strings and integers, and merges it all into a single -;; bytevector representing a c string -(define* (string-fragments->c-string - fragments optional: (transcoder (make-transcoder (utf-8-codec)))) - - (define fragments-fixed - (map (lambda (frag) - (if (string? frag) - (string->bytevector frag transcoder) - frag)) - fragments)) - - (define bv-length - (fold (lambda (item sum) (+ sum (if (bytevector? item) - (bytevector-length item) - 1))) - 0 fragments-fixed)) - - (define bv (make-bytevector (1+ bv-length))) - ;; trailing null byte - (bytevector-u8-set! bv bv-length 0) - (fold (lambda (item idx) - (cond ((bytevector? item) - (bytevector-copy! item 0 - bv idx - (bytevector-length item)) - (+ idx (bytevector-length item))) - (else (bytevector-u8-set! bv idx item) - (+ idx 1)))) - 0 - fragments-fixed) - bv) - -(define (parse-float-form float-form) - (let ((float-string - (fold (lambda (arg str) - (string-append - str - (match arg - (('float-integer ('base-10 n)) n) - (('float-decimal ('base-10 n)) (string-append "." n)) - (('exponent "+" ('base-10 n)) (string-append "e" n)) - (('exponent ('base-10 n)) (string-append "e" n)) - (('exponent "-" ('base-10 n)) (string-append "e-" n))))) - "" float-form))) - ;; exact->inexact is a no-op if we already have an inexact number, but - ;; ensures we get an inexact number when we have an exact number (which we - ;; can get from the "1." case). Returning an inexact number here is important - ;; to avoid arithmetic suprises later. - (exact->inexact - (or (string->number float-string) - (scm-error 'c-parse-error "parse-lexeme-tree" - "Couldn't parse expression as float: ~s" - (list `(float ,@args)) #f))))) - - -(define (resolve-escaped-char form) - (match form - (('base-8-char n) (string->number n 8)) - (('base-16-char n) (string->number n 16)) - (c (char->integer - (case (string-ref c 0) - ((#\a) #\alarm) - ((#\b) #\backspace) - ((#\e) #\esc) ;; non-standard - ((#\f) #\page) - ((#\n) #\newline) - ((#\r) #\return) - ((#\t) #\tab) - ((#\v) #\vtab) - ((#\\) #\\) - ;; These are valid in both strings and chars - ((#\') #\') - ((#\") #\")))))) - -;; Takes a list of strings and escaped-char form -;; and returns a list of strings and integers -(define (resolve-string-fragment fragment) - (match fragment - (('escaped-char c) - (resolve-escaped-char c)) - (fargment fragment))) - -(define (parse-lexeme-tree tree) - (match tree - ['() '()] - - ;; Number constants - [('base-10 n) (string->number n 10)] - [('base-8 n) (string->number n 8)] - [('base-16 n) (string->number n 16)] - - [('integer n ('integer-suffix suffix)) - `(as-type - ,(parse-integer-suffix suffix) - ,(parse-lexeme-tree n))] - - [('integer n) - (parse-lexeme-tree n)] - - - [('float args ... ('float-suffix suffix)) - `(as-type ,(parse-float-suffix suffix) - ;; parse rest of float as if it lacked a suffix - ,(parse-lexeme-tree `(float ,@args)))] - - [('float args ...) (parse-float-form args)] - - ;; Character literals, stored as raw integers - ;; so mathematical operations keep working on them. - [('char ('escaped-char c)) (resolve-escaped-char c)] - - [('char c) (char->integer (string-ref c 0))] - - [('variable var) (string->symbol var)] - - ;; normalize some binary operators to their wordy equivalent - ;; (which also happens to match better with scheme) - [('operator "&&") 'and] - [('operator "&=") 'and_eq] - [('operator "&") 'bitand] - [('operator "|") 'bitor] - [('operator "!=") 'not_eq] - [('operator "||") 'or] - [('operator "|=") 'or_eq] - [('operator "^") 'xor] - [('operator "^=") 'xor_eq] - ;; Change these names to something scheme can handle better - [('operator ".") 'object-slot] - [('operator "->") 'dereference-slot] - [('operator op) (string->symbol op)] - - [('prefix-operator op) - (case (string->symbol op) - ((!) 'not) - ((~) 'compl) - ((*) 'dereference) - ((&) 'pointer) - ((++) 'pre-increment) - ((--) 'pre-decrement) - ((-) '-) - (else (scm-error 'c-parse-error "parse-lexeme-tree" - "Unknown prefix operator ~s" - (list op) #f)))] - [('postfix-operator op) - (case (string->symbol op) - [(++) 'post-increment] - [(--) 'post-decrement] - [else (scm-error 'c-parse-error "parse-lexeme-tree" - "Unknown postfix operator ~s" - (list op) #f)])] - - ;; Parenthesis grouping - [('group args ...) - (parse-lexeme-tree args)] - - [('prefix op arg) - `(,(parse-lexeme-tree op) - ,(parse-lexeme-tree arg))] - - [('postfix arg op) - `(,(parse-lexeme-tree op) - ,(parse-lexeme-tree arg))] - - - - - - ;; resolved-operator and ternary are the return "types" - ;; of resolve-order-of-operations - [(('resolved-operator op) args ...) - `(,op ,@(map parse-lexeme-tree args))] - - [('ternary a b c) - `(ternary ,(parse-lexeme-tree a) - ,(parse-lexeme-tree b) - ,(parse-lexeme-tree c))] - - - - - ;; Is it OK for literal strings to be "stored" inline? - ;; Or must they be a pointer? - ['string #vu8(0)] - [('string str ...) - (-> (map resolve-string-fragment str) - string-fragments->c-string)] - - ;; implicit concatenation of string literals - [(('string str ...) ...) - (-> (map resolve-string-fragment (concatenate str)) - string-fragments->c-string)] - - [('infix args ...) - (let ((r (resolve-order-of-operations - (flatten-infix (cons 'infix args))))) - (parse-lexeme-tree r))] - - - [('funcall function ('group arguments)) - `(funcall ,(parse-lexeme-tree function) - ,(parse-lexeme-tree arguments))] - - [(('variable "struct") ('variable value) ..1) - `(struct-type ,@(map string->symbol value)) - ] - - ;; A list of variables. Most likely a type signature - ;; [(('variable value) ..1) - ;; ] - - ;; A typecast with only variables must (?) be a typecast? - [(('group groups ..1) ... value) - (fold-right (lambda (type done) `(as-type ,type ,done)) - (parse-lexeme-tree value) - (map group-body->type groups))] - - ;; Type name resolution? - ;; https://en.wikipedia.org/wiki/C_data_types - ;; base types with spaces: - ;; ======================= - ;; [[un]signed] char - ;; [[un]signed] short [int] - ;; [[un]signed] int - ;; [un]signed [int] - ;; [[un]signed] long [int] - ;; [[un]signed] long long [int] - ;; float - ;; [long] double - - ;; https://en.wikipedia.org/wiki/Type_qualifier - ;; qualifiers - ;; const - ;; volatile - ;; restrict - ;; _Atomic - - - ;; Storage specifiers - ;; auto - ;; register - ;; static - ;; extern - - ;; struct - ;; enum - ;; union - - ;; https://en.wikipedia.org/wiki/C_syntax - ;; int (*ptr_to_array)[100] - - - [(? symbol? bare) - (scm-error 'c-parse-error - "parse-lexeme-tree" - "Naked literal in lex-tree: ~s" - (list bare) - #f)] - - [form - (scm-error 'c-parse-error - "parse-lexeme-tree" - "Unknown form in lex-tree: ~s" - (list form) #f) - ])) - -;; https://en.wikipedia.org/wiki/Operators_in_C_and_C%2B%2B -;; https://en.cppreference.com/w/c/language/operator_precedence - -(define order-of-operations - (reverse - ;; This is only for binary operations - `((-> ,(symbol #\.)) - ;; All unary procedures go here, incnluding typecasts, and sizeof - (* / %) - (+ -) - (<< >>) - (< <= > >=) - (== != not_eq) - (& bitand) - (^ xorg) - (,(symbol #\|) bitor) - (&& and) - (,(symbol #\| #\|) or) - (? :) - (= += -= *= /= %= <<= >>= &= ^= ,(symbol #\| #\=) - and_eq or_eq xor_eq) - (,(symbol #\,)) - ))) - -;; a.b->c.d (. (-> (. a b) c) d) -;; 2 * 3 / 4 * 5 => (* (/ (* 2 3) 4) 5) -;; eller => (* 2 (/ 3 4) 5) - -(define* (resolve-order-of-operations - tree optional: (order order-of-operations)) - - (if (null? order) - (scm-error 'c-parse-error - "resolve-order-of-operations" - "Out of operations to resolve when resolving expression ~s" - (list tree) #f) - (match tree - [('fixed-infix form) form] - [('fixed-infix forms ...) - (match (split-by-one-of forms (car order)) - [(group) - (resolve-order-of-operations (cons 'fixed-infix group) - (cdr order))] - [(a ('? b ...) (': c ...)) - `(ternary ,(resolve-order-of-operations (cons 'fixed-infix a) (cdr order)) - ,(resolve-order-of-operations (cons 'fixed-infix b) (cdr order)) - ,(resolve-order-of-operations (cons 'fixed-infix c) (cdr order)))] - [(first rest ...) - ;; TODO this is only valid for the associative operators (+, ...) - ;; but not some other (<, ...) - (if (apply eq? (map car rest)) - (let ((op (caar rest))) - `((resolved-operator ,op) - ,@(map (lambda (x) (resolve-order-of-operations (cons 'fixed-infix x) - (cdr order))) - (cons first (map cdr rest))))) - (fold (lambda (item done) - (let ((operator args (car+cdr item))) - `((resolved-operator ,operator) - ,done ,(resolve-order-of-operations - (cons 'fixed-infix args) - (cdr order))))) - (resolve-order-of-operations (cons 'fixed-infix first) - (cdr order)) - rest))])]))) - -;; 1 * 2 / 3 * 4 -;; ⇒ ((1) (* 2) (/ 3) (* 4)) -;; (1) -;; (* (1) 2) -;; (/ (* (1) 2) 3) -;; (* (/ (* (1) 2) 3) 4) - -;; Flatens a tree of infix triples. Stops when it should. -;; (parenthesis, function calls, ...) -(define (flatten-infix form) - (cons 'fixed-infix - (let loop ((form form)) - (match form - [('infix left op ('infix right ...)) - (cons* left - (parse-lexeme-tree op) - (loop (cons 'infix right)))] - - [('infix left op right) - (list left - (parse-lexeme-tree op) - right)] - - [('infix form) form] - - [other (scm-error 'c-parse-error - "flatten-infix" - "Not an infix tree ~a" - (list other) - #f)])))) - - - diff --git a/module/c/preprocessor.scm b/module/c/preprocessor.scm deleted file mode 100644 index 71712b17..00000000 --- a/module/c/preprocessor.scm +++ /dev/null @@ -1,370 +0,0 @@ -(define-module (c preprocessor) - :use-module (srfi srfi-1) - :use-module (srfi srfi-9 gnu) - :use-module (ice-9 rdelim) - :use-module (ice-9 regex) - :use-module (hnh util object) - - :use-module (hnh util) - :use-module (hnh util object) - ) - -(define (read-lines port) - (let loop ((done '())) - (let ((line (read-line port))) - (if (eof-object? line) - (reverse done) - (loop (cons line done)))))) - -;; The source line of a give readen line -(define line-number (make-object-property)) -;; The source file of a given readen line -(define line-file (make-object-property)) - - -(define (mark-with-property! items property property-value) - (for-each (lambda (item) (set! (property item) property-value)) - items)) - -(define trigraph-rx (make-regexp "??([=()/'<>!-])")) -(define (expand-trigraphs line) - (regexp-substitute/global - #f trigraph-rx - line - 'pre (lambda (m) (case (string-ref (match:substring m 1) 1) - ((#\=) "#") - ((#\() "[") - ((#\)) "]") - ((#\/) "\\") - ((#\') "^") - ((#\<) "{") - ((#\>) "}") - ((#\!) "|") - ((#\-) "~"))) - 'post)) - -(define (number-lines lines) - (for-each (lambda (line number) - (set! (line-number line) number)) - lines - (iota (length lines) 1)) - lines) - -;; Should this line be merged with the next -(define (line-continued? line) - (case (string-length line) - ((0) #f) - ((1) (string=? "\\" line)) - (else - (let ((len (string-length line))) - ;; TODO can extra backslashes change this? - (and (char=? #\\ (string-ref line (- len 1))) - (not (char=? #\\ (string-ref line (- len 2))))))))) - -(define (transfer-line-number to from) - (set! (line-number to) (line-number from)) - to) - -;; Merge two lines, assuming that upper ends with a backslash -(define (merge-lines upper lower) - (let ((new-string (string-append (string-drop-right upper 1) lower))) - (transfer-line-number new-string upper) - new-string)) - -(define (fold-lines lines) - (fold-right (lambda (line done) - (if (line-continued? line) - (cons (merge-lines line (car done)) (cdr done)) - (cons line done))) - '() - lines)) - - -(define comment-rx (make-regexp "(//|/[*]|[*]/)")) - -(define (strip-comments lines) - (let loop ((in-comment #f) (lines lines) (done '())) - (if (null? lines) - (reverse done) - (let ((line (car lines))) - (cond ((regexp-exec comment-rx line) - => (lambda (m) - (format (current-output-port) "~s ~s substr = ~s~%" in-comment (line-number line) (match:substring m)) - (cond ((and in-comment (string=? "*/" (match:substring m))) - (loop #f (cons (transfer-line-number (match:suffix m) line) - (cdr lines)) - done)) - (in-comment (loop #t (cdr lines) done)) - ((string=? "//" (match:substring m)) - (loop #f (cdr lines) (cons (transfer-line-number (match:prefix m) line) - done))) - ((string=? "/*" (match:substring m)) - (loop #t (cons (transfer-line-number (match:suffix m) line) (cdr lines)) done)) - (else - (scm-error 'cpp-error "strip-comments" - "Unexpected */ in file ~a on line ~a" - (list (line-file line) (line-number line)) - #f))))) - (else (loop in-comment (cdr lines) (cons line done)))))))) - - -(define-immutable-record-type - (make-preprocessor-directive type body) - proprocessor-directive? - (type directive-type) - (body directive-body)) - -(define cpp-directive-rx (make-regexp "\\s*#\\s*((\\w+)(.*))?")) -(define (preprocessor-directive? line) - (cond ((regexp-exec cpp-directive-rx line) - => (lambda (m) - (if (match:substring m 2) - (make-preprocessor-directive - (string->symbol (match:substring m 2)) - (string-trim-both (match:substring m 3) char-set:whitespace)) - 'sort-of))) - (else #f))) - -;; defined - -;; TODO _Pragma - - -(define (expand-function-line-macro environment macro . params) - (expand-macro environment (apply-macro macro (map (lambda (param) (expand-macro environment param)) params)))) - -;; (define (environment-ref )) - -(define (list-of? lst predicate) - (every predicate lst)) - - -;; Parantheses when defining macro -(define (parse-parameter-string string) - (map string-trim-both - (string-split (string-trim-both string (char-set #\( #\))) - #\,))) - - -(define-type (object-macro) - (body type: string?)) - -(define-type (function-macro) - (formals type: (list-of? string?)) - (body type: string?)) - - -;; The interesting part -;; environment, (list string) -> (values (list string) (list strings)) -;; multiple lines since since a function-like macro can extend over multiple lines -;; (define (expand-macros environment strings) -;; ) - - -(define (crash-if-not-if body guilty) - (scm-error 'cpp-error guilty - "else, elif, and endif invalid outside if scope: ~s~%file: ~s line: ~s" - (list body (line-file body) (line-number body)))) - -;; (environment, lines) -> environment x lines -(define (parse-directives environment lines) - (let loop ((environment environment) (lines lines) (done '())) - (let* ((line (car line)) - (directive? (preprocessor-directive? line))) - (case directive? - ((#f) ; regular line - (loop environment (cdr lines) - ;; TODO this doesn't work, since parse-macros works on multiple lines - (cons (parse-macros environment (car lines)) done))) - ((sort-of) ; a no-op directive - (loop environment (cdr lines) done)) - (else ; an actual directive - (case (car (cpp-if-status environment)) - ((outside) - (case (directive-type m) - ((ifndef endif else) - (scm-error 'cpp-error "parse-directives" - "Unexpected directive: ~s" - (list line) #f)) - (else ; inside if, ifelse or else - ;; outside active-if inactive-if - (case (directive-type m) - ;; stack ending directives - ((endif) - (case (car (cpp-if-status environment)) - ((outside) (crash-if-not-if (directive-body m) "endif")) - (else (loop (modify environment cpp-if-status cdr) - (cdr lines) - done)))) - - ;; stack nudging directives - ((else) - (case (car (cpp-if-status environment)) - ((outside) (crash-if-not-if (directive-body m) "else")) - (else (loop (modify environment (lens-compose cpp-if-status car*) - (lambda (old) - (case old - ((active-if) 'inactive-if) - ((inactive-if) 'active-if)))) - (cdr lines) - done)))) - ((elif) - (case (car (cpp-if-status environment)) - ((outside) (crash-if-not-if (directive-body m) "elif")) - (else 'TODO ;; TODO - ) - )) - - ;; stack creating directives - ;; on inactive-if each creates a new frame, which also is inactive - ((ifndef) - (case (car (cpp-if-status environment)) - ((inactive-if) (loop (modify environment cpp-if-status - xcons 'inactive-if) - (cdr lines) - done)) - (else (loop (modify environment cpp-if-status - xcons (if (in-environment? environment (directive-body directive?)) - 'inactive-if 'active-if)) - (cdr lines) - done)))) - - ((ifdef) - (case (car (cpp-if-status environment)) - ((inactive-if) (loop (modify environment cpp-if-status - xcons 'inactive-if) - (cdr lines) - done)) - (else - (loop (modify environment cpp-if-status - xcons (if (in-environment? environment (directive-body directive?)) - 'active-if 'inactive-if)) - (cdr lines) - done)))) - - ((if) - (case (car (cpp-if-status environment)) - ((inactive-if) (loop (modify environment cpp-if-status - xcons 'inactive-if) - (cdr lines) - done)) - (else 'TODO ;; TODO - ))) - - - ;; other directives - ((include) (cond ((string-match "[<\"](.*)" - => (lambda (m) - (let ((fileneme (string-drop-right (directive-body m) 1))) - (case (string-ref (match:substring m 1) 0) - ;; TODO include-path - ((#\<) (handle-file environment filename)) - ((#\") (handle-file environment filename)))))) - (else (scm-error 'cpp-error "parse-directives" - "Invalid include" - '() #f))))) - ((define) - ;; TODO what are valid names? - (cond ((string-match "^(\\w+)([(][^)]*[)])?\\s+(.*)" (directive-body directive?)) - => (lambda (m) - (loop (let ((macro-body (string-trim-both (match:substring m 3)))) - (add-identifier! - environment - (match:substring m 1) - (cond ((match:substring m 2) - => (lambda (parameter-string) - (function-macro - formals: (parse-parameter-string parameter-string) - body: macro-body))) - (else (object-macro body: macro-body))))) - (cdr lines) - done))) - (else (scm-error 'cpp-error "parse-directives" - "Invalid #define line, ~s" - (list (directive-body directive?)) - #f)))) - - ((undef) - (case (car (cpp-if-status environment)) - ((inactive-if) (loop environment (cdr lines) done)) - (else (loop (remove-identifier environment (directive-body directive?)) - (cdr lines) - done)))) - - ((line) - (case (car (cpp-if-status environment)) - ((inactive-if) (loop environment (cdr lines) done)) - ;; TODO add first-run parameter to loop, in case expand-macros still return something invalid - (else (let parse-line-directive ((tokens (string-tokenize (directive-body directive?)))) - (cond ((= 1 (length tokens)) - ;; TODO parse token - (if (integer? (car tokens)) - ;; TODO update current line - (loop environment (cdr lines) done) - (parse-line-directive (expand-macros environment (directive-body directive?))))) - ((= 2 (length tokens)) - ;; TODO parse tokens - (if (and (integer? (car tokens)) - (string-literal? (cadr tokens))) - ;; TODO update current line and file - (loop environment (cdr lines) done) - (parse-line-directive (expand-macros environment (directive-body directive?))))) - (else (parse-line-directive (expand-macros environment (directive-body directive?))))))))) - - ((error) - (throw 'cpp-error-directive - (directive-body directive?))) - - ((warning) - (format (current-error-port) "#warning ~a~%" - (directive-body directive?)) - (loop environment (cdr lines) done)) - - ((pragma) - (format (current-error-port) - "PRAGMA: ~s~%" (directive-body directive?)) - (loop environment (cdr lines) done)) - - ((ident sccs) - (format (current-error-port) - "IDENT: ~s~%" (directive-body directive?)) - (loop environment (cdr lines) done)) - - (else - (scm-error 'cpp-error "parse-directives" - "Unknown pre-processor directive: ~s" - (list line) #f) - ))))))))) - )) - - -(define* (writeln expr optional: (port (current-output-port))) - (write expr port) - (newline port)) - -(define (handle-lines environment lines) - (parse-directive environment - (compose strip-comments fold-lines number-lines))) - - ;; parse-directives environment - -;; Return a number of lines -(define (read-file file-path) - (define raw-lines (call-with-input-file file-path read-lines)) - (mark-with-property! line line-file file-path) - (handle-lines raw-lines)) - - -;; pre defined macros -;; see info manual for cpp 3.7.1 Standard Predefined Macros -;; __FILE__ -;; __LINE__ -;; __DATE__ "Feb 12 1996" -;; __TIME__ "23:59:01" -;; __STDC__ 1 -;; __STDC_VERSION__ 201112L -;; __STDC_HOSTED__ 1 - -;; __cplusplus -;; __OBJC__ -;; __ASSEMBLER__ diff --git a/module/c/util.scm b/module/c/util.scm deleted file mode 100644 index f258d3e3..00000000 --- a/module/c/util.scm +++ /dev/null @@ -1,20 +0,0 @@ -(use-modules (c lex2) - (srfi srfi-1) - (srfi srfi-88) - (c to-token) - (c cpp-types)) - -(define (tok s) - (map preprocessing-token->token (remove whitespace-token? (lex s)))) - -(define* (parse tokens optional: (parser make-parser)) - ((parser) - (build-lexical-analyzer tokens) - error)) - - -(define (mm) - (lalr-parser - (x) - (y (x)) - )) diff --git a/module/c/zipper.scm b/module/c/zipper.scm deleted file mode 100644 index 65cea211..00000000 --- a/module/c/zipper.scm +++ /dev/null @@ -1,60 +0,0 @@ -;;; Commentary: -;; Zipper data structure. Could be moved to (hnh util), but would then need to -;; be at least slightly more thorough. -;;; Code: - -(define-module (c zipper) - :use-module (srfi srfi-88) - :use-module (hnh util object) - :export (list-zipper - list-zipper? - left focused right - zip-left - zip-right - zip-find-right - list->zipper - zipper->list - rezip)) - -(define-type (list-zipper) - (left type: list?) - focused - (right type: list?)) - -;; Move zipper one step to the left -(define (zip-left zipper) - (if (null? (left zipper)) - zipper - (list-zipper left: (cdr (left zipper)) - right: (cons (focused zipper) (right zipper)) - focused: (car (left zipper))))) - -;; Move zipper one step to the right -(define (zip-right zipper) - (if (null? (right zipper)) - zipper - (list-zipper left: (cons (focused zipper) (left zipper)) - right: (cdr (right zipper)) - focused: (car (right zipper))))) - -;; find first element matching predicate, going right -(define (zip-find-right predicate zipper) - (cond ((null? (right zipper)) zipper) - ((predicate (focused zipper)) zipper) - (else (zip-find-right predicate (zip-right zipper))))) - -(define (list->zipper list) - (list-zipper left: '() - focused: (car list) - right: (cdr list))) - - -(define (rezip zipper) - (if (null? (left zipper)) - zipper - (rezip (zip-left zipper)))) - -(define (zipper->list zipper) - (let ((z (rezip zipper))) - (cons (focused z) - (right z)))) diff --git a/module/glob.scm b/module/glob.scm index 64f97690..bd9a6ea5 100644 --- a/module/glob.scm +++ b/module/glob.scm @@ -1,7 +1,7 @@ (define-module (glob) :use-module (system foreign) :use-module (rnrs bytevectors) - :use-module (c cpp) + :use-module (c old cpp) :export (glob)) diff --git a/module/vulgar/termios.scm b/module/vulgar/termios.scm index ddd8920f..5ef86c00 100644 --- a/module/vulgar/termios.scm +++ b/module/vulgar/termios.scm @@ -7,7 +7,7 @@ :use-module (ice-9 rdelim) :use-module (srfi srfi-9) ; records :use-module (srfi srfi-88) - :use-module (c cpp) + :use-module (c old cpp) :use-module (hnh util) :export (make-termios copy-termios diff --git a/tests/test/c-parse.scm b/tests/test/c-parse.scm index c16958de..22aaf92a 100644 --- a/tests/test/c-parse.scm +++ b/tests/test/c-parse.scm @@ -6,11 +6,11 @@ (define-module (test cpp) :use-module (srfi srfi-64) :use-module (srfi srfi-88) - :use-module ((c lex) :select (lex)) - :use-module (c parse)) + :use-module ((c old lex) :select (lex)) + :use-module (c old parse)) -(define flatten-infix (@@ (c parse) flatten-infix)) -(define resolve-order-of-operations (@@ (c parse) resolve-order-of-operations)) +(define flatten-infix (@@ (c old parse) flatten-infix)) +(define resolve-order-of-operations (@@ (c old parse) resolve-order-of-operations)) (test-group "Flatten infix" (test-equal "Simple binary operator" diff --git a/tests/test/cpp.scm b/tests/test/cpp.scm deleted file mode 100644 index 1294bc96..00000000 --- a/tests/test/cpp.scm +++ /dev/null @@ -1,604 +0,0 @@ -;;; Commentary: -;; Tests my parser for a subset of the C programming language. -;;; Code: - -(define-module (test cpp) - :use-module (srfi srfi-1) - :use-module (srfi srfi-64) - :use-module (srfi srfi-88) - :use-module ((c lex) :select (lex)) - :use-module ((c parse) :select (parse-lexeme-tree)) - :use-module ((c eval) :select (c-eval)) - :use-module ((c eval environment) :select (make-environment env-set!)) - :use-module ((rnrs arithmetic bitwise) - :select (bitwise-xor))) - -;; Note that the lexer's output isn't stable. -;; The tests here are more to see where the lexer succeeds but the parser fails. -;; So changing the lexer test cases isn't a problem -;; but don't change the parser test cases - -;; __asm__ always has strings as arguments -(test-skip "__asm__") - -;; Lexer produces garbage when attempted. Fixing this would also fix cast -;; operations. -(test-skip "Float in infix expression") -;; order of operation is wrong, leading to an incorrect result -(test-skip "Cast with operation") - -;; not implemented -(test-skip "Token concatenation") - -;; A string follewed by a macro (which expands to a string) -;; should be concatenated. This is however not yet implemented -(test-skip "Implicit concatenation of string and macro") - -(define run (compose parse-lexeme-tree lex)) - -(define (alist->environment alist) - (fold (lambda (pair env) - (apply env-set! env pair)) - (make-environment) - alist)) - -(define (exec form . base-bindings) - (call-with-values - (lambda () (c-eval (alist->environment base-bindings) - (run form))) - (lambda (env value) value))) - -(define-syntax let-group - (syntax-rules () - ((let ((form name) rest ...) body ...) - (test-group name - (let ((form name) - rest ...) - body ...))))) - -(let-group - ((form "(*C)++ + 3")) - (test-equal '(infix (postfix (group (prefix (prefix-operator "*") - (variable "C"))) - (postfix-operator "++")) - (operator "+") - (integer (base-10 "3"))) - (lex form)) - (test-equal '(+ (post-increment (dereference C)) 3) - (run form))) - -(let-group - ((form "*C++ + 3")) - (test-equal '(infix (postfix (prefix (prefix-operator "*") - (variable "C")) - (postfix-operator "++")) - (operator "+") - (integer (base-10 "3"))) - (lex form)) - (test-equal '(+ (post-increment (dereference C)) 3) - (run form))) - -(let-group - ((form "*C++")) - (test-equal '(postfix (prefix (prefix-operator "*") - (variable "C")) - (postfix-operator "++")) - (lex form)) - (test-equal '(post-increment (dereference C)) - (run form))) - -(let-group - ((form "C++ + C++")) - (test-equal '(infix (postfix (variable "C") - (postfix-operator "++")) - (operator "+") - (postfix (variable "C") - (postfix-operator "++"))) - (lex form)) - (test-equal '(+ (post-increment C) (post-increment C)) - (run form))) - -(let-group - ((form "++C + ++C")) - (test-equal '(infix (prefix (prefix-operator "++") - (variable "C")) - (operator "+") - (prefix (prefix-operator "++") - (variable "C"))) - (lex form)) - (test-equal '(+ (pre-increment C) (pre-increment C)) - (run form))) - -(let-group - ((form "2 + 2 * 2")) - (test-equal '(infix (integer (base-10 "2")) - (operator "+") - (infix (integer (base-10 "2")) - (operator "*") - (integer (base-10 "2")))) - (lex form)) - (test-equal '(+ 2 (* 2 2)) (run form)) - (test-equal 6 (exec form))) - -(let-group - ((form "2 * 2 + 2")) - (test-equal '(infix (integer (base-10 "2")) - (operator "*") - (infix (integer (base-10 "2")) - (operator "+") - (integer (base-10 "2")))) - (lex form)) - (test-equal '(+ (* 2 2) 2) (run form)) - (test-equal 6 (exec form))) - -(let-group - ((form "2+2+2")) - (test-equal '(infix (integer (base-10 "2")) - (operator "+") - (infix (integer (base-10 "2")) - (operator "+") - (integer (base-10 "2")))) (lex form)) - (test-equal '(+ 2 2 2) (run form)) - (test-equal 6 (exec form))) - -(test-group "Unary minus" - (test-group "Without space" - (let ((form "-1")) - (test-equal '(prefix (prefix-operator "-") - (integer (base-10 "1"))) - (lex form)) - (test-equal '(- 1) (run form)) - (test-equal -1 (exec form)))) - - (test-group "With space" - (let ((form "- 1")) - (test-equal '(prefix (prefix-operator "-") - (integer (base-10 "1"))) - (lex form)) - (test-equal '(- 1) (run form)) - (test-equal -1 (exec form)))) - - (test-group "Before variable" - (let ((form "-x")) - (test-equal '(prefix (prefix-operator "-") - (variable "x")) - (lex form)) - (test-equal '(- x) (run form)) - (test-equal -5 (exec form '(x 5))))) - - (test-group "Before infix" - (let ((form "-x+3")) - (test-equal '(infix (prefix (prefix-operator "-") - (variable "x")) - (operator "+") - (integer (base-10 "3"))) - (lex form)) - (test-equal '(+ (- x) 3) (run form)) - (test-equal -2 (exec form '(x 5))))) - - (test-group "Inside infix expression" - (let ((form "x+-3")) - (test-equal '(infix (variable "x") - (operator "+") - (prefix (prefix-operator "-") - (integer (base-10 "3")))) - (lex form)) - (test-equal '(+ x (- 3)) (run form)) - (test-equal 2 (exec form '(x 5))))) - ) - - - - -;; Hand picked forms from output of `cpp -dM /usr/include/termios.h` on -;; FreeBSD 13.1-RELEASE releng/13.1-n250148-fc952ac2212 GENERIC amd64 -;; 2022-06-28 - -(let ((form "00000200")) - (test-equal '(integer (base-8 "0000200")) (lex form)) - (test-equal 128 (run form))) - -(let ((form "0")) - (test-equal '(integer (base-10 "0")) (lex form)) - (test-equal 0 (run form))) - -(let ((form "1000000U")) - (test-equal '(integer (base-10 "1000000") (integer-suffix "U")) (lex form)) - (test-equal '(as-type (unsigned) 1000000) (run form)) - (test-equal 1000000 (exec form))) - - -(let ((form "0x10c")) - (test-equal '(integer (base-16 "10c")) (lex form)) - (test-equal 268 (run form))) - -;; Lexer keeps original case, handled later by parser -(let ((form "0X10C")) - (test-equal '(integer (base-16 "10C")) (lex form)) - (test-equal 268 (run form))) - -(let ((form "a != b")) - (test-equal '(infix (variable "a") - (operator "!=") - (variable "b")) - (lex form)) - (test-equal '(not_eq a b) (run form)) - (test-equal 1 (exec form '(a 1) '(b 2))) - (test-equal 0 (exec form '(a 1) '(b 1))) - ) - -(let ((form "((c) == (val) && (val) != _POSIX_VDISABLE)")) - ;; (test-equal '() (lex form)) - (test-equal '(and (== c val) - (not_eq val _POSIX_VDISABLE)) - (run form)) - (test-equal 0 (exec form '(c 1) '(val 2) '(_POSIX_VDISABLE 3))) - ) - -(let ((form "CTRL('O')")) - (test-equal '(funcall (variable "CTRL") (group (char "O"))) (lex form)) - (test-equal '(funcall CTRL 79) (run form)) - (test-equal (bitwise-xor #x40 (char->integer #\O)) - (exec form - ;; Definition copied from our parsers output of - ;; preprocessing output as defined above - '(CTRL (lambda (x) - (ternary (and (>= x 97) (<= x 122)) - (+ (- x 97) 1) - (bitand (+ (- x 65) 1) 127))))))) - -(let ((form "CREPRINT")) - (test-equal '(variable "CREPRINT") (lex form)) - (test-equal 'CREPRINT (run form))) - -(let ((form "(CCTS_OFLOW | CRTS_IFLOW)")) - (test-equal '(group (infix (variable "CCTS_OFLOW") - (operator "|") - (variable "CRTS_IFLOW"))) - (lex form)) - (test-equal '(bitor CCTS_OFLOW CRTS_IFLOW) (run form))) - -;; ((x) >= 'a' && (x) <= 'z' -;; ? ((x) - 'a' + 1) -;; : (((x) - 'a' + 1) & 0x7f)) -(let ((form "((x) >= 'a' && (x) <= 'z' ? ((x) - 'a' + 1) : (((x) - 'a' + 1) & 0x7f))")) - ;; (test-equal '() (lex form)) - (test-equal '(ternary - (and (>= x #x61) - (<= x #x7A)) - (+ (- x #x61) 1) - (bitand (+ (- x #x61) 1) 127)) - (run form))) - -(let ((form "((x) & ~(IOCPARM_MASK << 16))")) - ;; (test-equal '() (lex form)) - (test-equal '(bitand x (compl (<< IOCPARM_MASK 16))) (run form))) - -(let ((form "(((x) >> 8) & 0xff)")) - ;; (test-equal '() (lex form)) - (test-equal '(bitand (>> x 8) 255) (run form))) - -(let ((form "(((x) >> 16) & IOCPARM_MASK)")) - ;; (test-equal '() (lex form)) - (test-equal '(bitand (>> x 16) IOCPARM_MASK) (run form))) - -(let ((form "((1 << IOCPARM_SHIFT) - 1)")) - ;; (test-equal '() (lex form)) - (test-equal '(- (<< 1 IOCPARM_SHIFT) 1) (run form))) - -(let ((form "_IO('t', 120)")) - (test-equal '(funcall - (variable "_IO") - (group (infix (char "t") - (operator ",") - (integer (base-10 "120"))))) - (lex form)) - (test-equal '(funcall _IO (#{,}# 116 120)) (run form))) - -;; note the lone type -(let ((form "_IOW('t', 98, int)")) - ;; (test-equal '() (lex form)) - (test-equal '(funcall _IOW (#{,}# 116 98 int)) - (run form))) - -;; note the multi-word type -(let ((form "_IOR('t', 19, struct termios)")) - ;; (test-equal '() (lex form)) - (test-equal '(funcall _IOR (#{,}# 116 19 (struct-type termios))) (run form))) - - -;; TODO concatenation rules -;; #define __CONCAT(x,y) __CONCAT1(x,y) -;; #define __CONCAT1(x,y) x ## y -;; #define __CONSTANT_CFSTRINGS__ 1 -;; #define __COPYRIGHT(s) __IDSTRING(__CONCAT(__copyright_,__LINE__),s) - -(test-group "Token concatenation" - (let ((form "x ## y")) - (test-equal '() (lex form)) - (test-equal '0 (run form)))) - -(test-group "Floating point numbers" - - (test-group "Diffent forms" - (test-group "No decimal point, exponent, no suffix" - (let ((form "10e10")) - (test-equal '(float (float-integer (base-10 "10")) - (exponent (base-10 "10"))) - (lex form)) - (test-equal 10e10 (run form)))) - - (test-group "No decimal point, negative exponent" - (let ((form "10e-10")) - (test-equal '(float (float-integer (base-10 "10")) - (exponent "-" (base-10 "10"))) - (lex form)) - (test-equal 10e-10 (run form)))) - - (test-group "No decimal point, exponent and suffix" - (let ((form "10e10L")) - (test-equal '(float (float-integer (base-10 "10")) - (exponent (base-10 "10")) - (float-suffix "L")) - (lex form)) - (test-equal '(as-type (long double) 10e10) - (run form)))) - - (test-group "Leading period, no exponent or suffix" - (let ((form ".1")) - (test-equal '(float (float-decimal (base-10 "1"))) (lex form)) - (test-equal 0.1 (run form)))) - - (test-group "Trailing period, no exponent or suffix" - (let ((form "1.")) - (test-equal '(float (float-integer (base-10 "1"))) (lex form)) - (test-equal 1.0 (run form))))) - - - (test-group "Negative float" - (let ((form "-1.0")) - (test-equal '(prefix (prefix-operator "-") - (float (float-integer (base-10 "1")) - (float-decimal (base-10 "0")))) - (lex form)) - (test-equal '(- 1.0) (run form)))) - - - - (test-group "Real world examples" - (let ((form "4.9406564584124654e-324")) - (test-equal '(float (float-integer (base-10 "4")) - (float-decimal (base-10 "9406564584124654")) - (exponent "-" (base-10 "324"))) - (lex form)) - (test-equal 4.9406564584124654e-324 (run form))) - - (let ((form "1.7976931348623157e+308")) - (test-equal '(float (float-integer (base-10 "1")) - (float-decimal (base-10 "7976931348623157")) - (exponent "+" (base-10 "308"))) - (lex form)) - (test-equal 1.7976931348623157e+308 (run form)))) - - (test-group "Float in infix expression" - (test-group "Simple case" - (let ((form "1. + .1")) - (test-equal '(infix (float (float-integer (base-10 "1"))) - (operator "+") - (float (float-decimal (base-10 "1")))) - (lex form)) - (test-equal '(+ 1.0 0.1) (run form)))) - ;; (test-group "Complicated case") - )) - -(test-group "Typecasts" - - (let ((form "(unsigned) 5")) - (test-equal '((group (variable "unsigned")) - (integer (base-10 "5"))) - (lex form)) - (test-equal '(as-type (unsigned) 5) - (run form))) - - (let ((form "(unsigned integer) 5")) - (test-equal '((group (variable "unsigned") - (variable "integer")) - (integer (base-10 "5"))) - (lex form)) - (test-equal '(as-type (unsigned integer) 5) (run form))) - - (test-group "Pointer with space" - (let ((form "(int *) 5")) - (test-equal '((group (postfix (variable "int") - (postfix-operator "*"))) - (integer (base-10 "5"))) - (lex form)) - (test-equal '(as-type (int *) 5) - (run form)))) - - (test-group "Pointer without space" - (let ((form "(int*) 5")) - (test-equal '((group (postfix (variable "int") - (postfix-operator "*"))) - (integer (base-10 "5"))) - (lex form)) - (test-equal '(as-type (int *) 5) - (run form)))) - - (test-group "Multi word type pointer" - (let ((form "(unsigned int*) 5")) - (test-equal '((group (variable "unsigned") - (postfix (variable "int") - (postfix-operator "*"))) - (integer (base-10 "5"))) - (lex form)) - (test-equal '(as-type (unsigned int *) 5) - (run form)))) - - (test-group "Double cast" - (let ((form "(int)(unsigned) 5")) - (test-equal '((group (variable "int")) - (group (variable "unsigned")) - (integer (base-10 "5"))) (lex form)) - (test-equal '(as-type (int) (as-type (unsigned) 5)) - (run form)))) - - (test-group "Cast with operation" - (let ((form "(int) 5 + 7")) - (test-equal '((group (variable "int")) - (infix (integer (base-10 "5")) - (operator "+") - (integer (base-10 "7")))) - (lex form)) - - (test-equal '(+ (as-type (int) 5) 7) - (run form)))) - - - - (test-group "Tripple cast, with value inside paranthesis" - (let ((form "(type)(__uintptr_t)(const void *)(var)")) - (test-equal '((group (variable "type")) - (group (variable "__uintptr_t")) - (group (variable "const") - (postfix (variable "void") - (postfix-operator "*"))) - (group (variable "var"))) - (lex form)) - (test-equal '(as-type (type) - (as-type (__uintptr_t) - (as-type (const void *) - var))) - (run form)))) - - (test-group "Same as above, but whole thing inside parenthesis" - (let ((form "((type)(__uintptr_t)(const void *)(var))")) - (test-equal '(group (group (variable "type")) - (group (variable "__uintptr_t")) - (group (variable "const") - (postfix (variable "void") - (postfix-operator "*"))) - (group (variable "var"))) - (lex form)) - (test-equal '(as-type (type) - (as-type (__uintptr_t) - (as-type (const void *) - var))) - (run form)))) - - (let ((form "((type)(__uintptr_t)(const volatile void *)(var))")) - ;; (test-equal '() (lex form)) - (test-equal '(as-type (type) - (as-type (__uintptr_t) - (as-type (const volatile void *) - var))) - (run form))) - - (let ((form "((unsigned long) ((inout) | (((len) & IOCPARM_MASK) << 16) | ((group) << 8) | (num)))")) - (test-equal '(group (group (variable "unsigned") (variable "long")) - (group (infix (group (variable "inout")) - (operator "|") - (infix (group (infix (group (infix (group (variable "len")) - (operator "&") - (variable "IOCPARM_MASK"))) - (operator "<<") - (integer (base-10 "16")))) - (operator "|") - (infix (group (infix (group (variable "group")) - (operator "<<") - (integer (base-10 "8")))) - (operator "|") - (group (variable "num"))))))) - (lex form)) - (test-equal '(as-type (unsigned long) - (bitor inout - (<< (bitand len IOCPARM_MASK) 16) - (<< group 8) - num)) - (run form)))) - -(test-group "Characters" - (let ((form "'c'")) - (test-equal '(char "c") (lex form)) - (test-equal #x63 (run form))) - - (let ((form "'\\n'")) - (test-equal '(char (escaped-char "n")) (lex form)) - (test-equal (char->integer #\newline) (run form)))) - -(test-group "Strings" - (test-group "Empty string" - (let ((form "\"\"")) - (test-equal 'string (lex form)) - (test-equal #vu8(0) (run form)))) - - (test-group "Simple string" - (let ((form "\"li\"")) - (test-equal '(string "li") (lex form)) - (test-equal #vu8(#x6C #x69 0) (run form)))) - - (test-group "Implicit concatenation of strings" - (let ((form "\"a\" \"b\"")) - (test-equal '((string "a") - (string "b")) - (lex form)) - (test-equal #vu8(#x61 #x62 0) - (run form)))) - - (test-group "Implicit concatenation of string and macro" - (let ((form "\"a\" MACRO")) - (test-equal '((string "a") (variable "MACRO")) (lex form)) - (test-equal '() (run form)))) - - (test-group "String with only escape" - (let ((form (string #\" #\\ #\" #\"))) - (test-equal `(string (escaped-char "\"")) (lex form)) - (test-equal #vu8(34 0) (run form)))) - - (test-group "String with escape at start" - (let ((form (string #\" #\\ #\" #\a #\"))) - (test-equal `(string (escaped-char "\"") "a") (lex form)) - (test-equal #vu8(34 #x61 0) (run form)))) - - (test-group "String with escape at end" - (let ((form (string #\" #\a #\\ #\" #\"))) - (test-equal `(string "a" (escaped-char "\"")) (lex form)) - (test-equal #vu8(#x61 34 0) (run form)))) - - (test-group "String with escape in middle" - (let ((form (string #\" #\a #\\ #\" #\b #\"))) - (test-equal `(string "a" (escaped-char "\"") "b") (lex form)) - (test-equal #vu8(#x61 34 #x62 0) (run form)))) - - ;; \e is semi non-standard - (test-group "String with bakslash-e esacpe" - (let ((form "\"\\e\"")) - (test-equal '(string (escaped-char "e")) (lex form)) - (test-equal #vu8(#x1b 0) (run form)))) - - (test-group "String with byte secquence escape" - (let ((form "\"\\xf0\\x9f\\x92\\xa9\"")) - (test-equal '(string (escaped-char (base-16-char "f0")) - (escaped-char (base-16-char "9f")) - (escaped-char (base-16-char "92")) - (escaped-char (base-16-char "a9"))) - (lex form)) - (test-equal #vu8(#xf0 #x9f #x92 #xa9 0) (run form))))) - -(test-group "__asm__" - (let ((form "__asm__(\".globl \" __XSTRING(sym))")) - (test-equal '() (lex form)) - ;; TODO implicit string concatenation - (test-equal '(funcall __asm__ - (string ".globl ") - (funcall __XSTRING sym)) (run form)))) - -(let ((form "__attribute__((__aligned__(x)))")) - (test-equal '(funcall (variable "__attribute__") - (group (group (funcall (variable "__aligned__") - (group (variable "x")))))) - (lex form)) - ;; This drops the extra set of parenthesis. Do we care? - (test-equal '(funcall __attribute__ - (funcall __aligned__ x)) - (run form))) -- cgit v1.2.3