From 0e3df321ab2fce795bdc6b9aeb92724733cf8ee0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Hugo=20H=C3=B6rnquist?= Date: Thu, 21 Jul 2022 16:04:56 +0200 Subject: Major work on parser. --- module/c/ast.scm | 25 ++ module/c/compiler.scm | 84 ++---- module/c/cpp-types.scm | 3 +- module/c/eval-basic.scm | 63 ++++ module/c/flatten-begin.scm | 71 +++++ module/c/parse2.scm | 614 +++++++++++++++++++++------------------ module/c/preprocessor2.scm | 188 ++++++++---- module/c/util.scm | 20 ++ tests/test/cpp/parse2.scm | 245 ++++++++++++++++ tests/test/cpp/preprocessor2.scm | 48 +-- 10 files changed, 945 insertions(+), 416 deletions(-) create mode 100644 module/c/ast.scm create mode 100644 module/c/eval-basic.scm create mode 100644 module/c/flatten-begin.scm create mode 100644 module/c/util.scm create mode 100644 tests/test/cpp/parse2.scm diff --git a/module/c/ast.scm b/module/c/ast.scm new file mode 100644 index 00000000..bf5ad630 --- /dev/null +++ b/module/c/ast.scm @@ -0,0 +1,25 @@ +(define-module (c ast) + :use-module ((c to-token) :select (preprocessing-token->token)) + :use-module ((c parse2) + :select (build-lexical-analyzer + make-parser + error-procedure)) + :use-module ((hnh util) :select (->>)) + :use-module ((c flatten-begin) + :select (flatten-begin + remove-invalid-struct-like-declarations)) + :export (build-ast)) + +(define (parse% lexical-analyzer) + ((make-parser) lexical-analyzer error-procedure)) + +(define (build-ast cpp-tokens) + (->> cpp-tokens +;;; 7. Whitespace tokens are discarded, each preprocessing token is converted into a token + (map preprocessing-token->token) +;;; 8. All external objects and functions are resolved + build-lexical-analyzer + parse% + flatten-begin + remove-invalid-struct-like-declarations + )) diff --git a/module/c/compiler.scm b/module/c/compiler.scm index 09d49578..c1563a0b 100644 --- a/module/c/compiler.scm +++ b/module/c/compiler.scm @@ -1,64 +1,36 @@ (define-module (c compiler) - :use-module ((c lex2) :select (lex)) - :use-module ((c trigraph) :select (replace-trigraphs)) - :use-module ((c line-fold) :select (fold-lines)) - :use-module (c cpp-environment object-like-macro) - :use-module ((c cpp-environment) - :select (make-environment - extend-environment - enter-file)) - :use-module (hnh util) - ;; TODO importort - ;; handle-preprocessing-tokens - ;; load-and-tokenize-file - :export (run-compiler)) + :use-module ((srfi srfi-1) :select (remove)) + :use-module (srfi srfi-71) + :use-module ((c cpp-environment) :select (enter-file)) + :use-module ((hnh util) :select (-> ->>)) + :use-module ((hnh util io) :select (read-file)) + :use-module ((c preprocessor2) + :select (preprocess-string + make-default-environment)) + :use-module ((hnh util values) :select (abort* on-fst)) + :use-module ((c ast) :select (build-ast)) + :export (run-compiler + compile-string + compile-string* + the-environment + )) -" -#define __STDC__ 1 -#define __STDC_HOSTED__ 1 -#define __STDC_VERSION__ 201112L -" - -(define now (localtime (current-time))) -(define default-macros - (list - ;; 6.10.8 - (object-like-macro - identifier: "__STDC__" - body: (lex "1")) - (object-like-macro - identifier: "__STDC_HOSTED__" - body: (lex "1")) - (object-like-macro - identifier: "__STDC_VERSION__" - body: (lex "201112L")) - (object-like-macro - identifier: "__DATE__" - ;; TODO format should always be in - ;; english, and not tranlated - body: (lex (strftime "\"%b %_d %Y\"" now))) - (object-like-macro - identifier: "__TIME__" - body: (lex (strftime "\"%H:%M:%S\"" now))))) - -(define environment - (-> (make-environment) - (extend-environment default-macros))) +;;; 5.1.11.2 Translation phases +(define* (run-compiler path key: (environment (make-default-environment))) + (-> path read-file (compile-string (enter-file environment path)))) -;;; 5.1.11.2 Translation phases +(define* (compile-string str optional: (environment (make-default-environment))) + (on-fst (build-ast (abort* (preprocess-string str environment))))) + +(define the-environment (make-parameter (make-default-environment))) -(define (run-compiler path) - (define environment (enter-file (make-environment) path)) - (-> (load-and-tokenize-file path) - (handle-preprocessing-tokens environment)) -;;; 5. (something with character sets) -;;; 6. concatenation of string literals -;;; 7. Whitespace tokens are discarded, each preprocessing token is converted into a token - ;; 6.4 paragraph 2 - ;; Each preprocessing toket thas is converted to a token shall have the lexcal form of a keyword, an identifier, a constant, a string literal, or a puncturtor -;;; 8. All external objects and functions are resolved - ) +(define* (compile-string* str) + (let ((result cpp-env (compile-string str))) + (if (null? result) + (compile-string (string-append str ";") + (the-environment)) + result))) diff --git a/module/c/cpp-types.scm b/module/c/cpp-types.scm index 6dad061e..bf51d009 100644 --- a/module/c/cpp-types.scm +++ b/module/c/cpp-types.scm @@ -86,7 +86,8 @@ ;; NOTE q-string tokens are never produced by the lexer, ;; since they instead are treated as regular strings (define (q-string-token? token) - (string-token? token)) + (cond ((string-token? token) + (lambda (a . _) a) => (lambda (_ . a) (apply values a))))) (define (make-string-literal parts) (typecheck parts (list-of (or string? list?))) diff --git a/module/c/eval-basic.scm b/module/c/eval-basic.scm new file mode 100644 index 00000000..9a16a095 --- /dev/null +++ b/module/c/eval-basic.scm @@ -0,0 +1,63 @@ +(define-module (c eval-basic) + :use-module (ice-9 match) + :use-module (c eval2) + :export (eval-basic-c)) + +(define operators + `((bitwise-ior . ,(@ (srfi srfi-60) bitwise-ior)) + (bitwise-xor . ,(@ (srfi srfi-60) bitwise-xor)) + (bitwise-and . ,(@ (srfi srfi-60) bitwise-and)) + (bitwise-not . ,(@ (srfi srfi-60) bitwise-not)) + (== . ,(compose boolean->c-boolean =)) + (!= . ,(compose boolean->c-boolean not =)) + (<= . ,(compose boolean->c-boolean <=)) + (>= . ,(compose boolean->c-boolean <=)) + (< . ,(compose boolean->c-boolean <)) + (> . ,(compose boolean->c-boolean >)) + (not . ,c-not) + (<< . ,(lambda (n c) (ash n c))) + (>> . ,(lambda (n c) (ash (- n) c))) + (+ . ,+) + (- . ,-) + (* . ,*) + (/ . ,floor-quotient) + (% . ,floor-remainder) + (unary+ . ,+) + (unary- . ,-))) + +(define (eval-basic-c ast) + (define (err fmt . args) + (scm-error 'cpp-error "eval-basic-c" + fmt args #f)) + (match ast + (`((constexpr ,body)) + (let loop ((ast body)) + (match ast + (('begin forms ...) + (err "begin should be impossible here: ~s" forms)) + (('constant value) + (if (exact-integer? value) + value + (err "Only exact integers supported, got: ~s" value))) + (('string-literal value) + (err "String literals not supported: ~s" value)) + (('ternary expr true false) + (if (c-boolean->boolean (loop expr)) + (loop true) + (loop false))) + (('and a b) + (let ((a* (loop a))) + (if (c-boolean->boolean a*) + (loop b) + a*))) + (('or a b) + (let ((a* (loop a))) + (if (c-boolean->boolean a*) + a* + (loop b)))) + ((f args ...) + (cond ((assoc-ref operators f) + => (lambda (op) + (apply op (map loop args)))) + (else + (err "Unknown operator ~s" f))))))))) diff --git a/module/c/flatten-begin.scm b/module/c/flatten-begin.scm new file mode 100644 index 00000000..7543a5ac --- /dev/null +++ b/module/c/flatten-begin.scm @@ -0,0 +1,71 @@ +(define-module (c flatten-begin) + :use-module (srfi srfi-1) + :use-module (ice-9 match) + :use-module (ice-9 control) + :export (flatten-begin remove-invalid-struct-like-declarations)) + +(define (flatten-begin-1 forms) + (append-map + (lambda (form) + (match form + (('begin x ...) x) + (x (list x)))) + forms)) + +(define (flatten-begin form) + (match form + (`(begin ,x) (flatten-begin x)) + (`(let () ,('let '() x ...)) `(let () ,(flatten-begin x))) + (('begin forms ...) + `(begin ,@(map flatten-begin + (flatten-begin-1 forms)))) + (('let '() forms ...) + `(let () ,@(map flatten-begin + (flatten-begin-1 forms)))) + ;; switch already has to traverse subforms in order to find its labels. + ;; See (for example) Duff's device + ;; (`(switch ,x (begin ,('let '() forms ...))) + ;; `(let () (switch ,(flatten-begin x) + ;; ,@(map flatten-begin (flatten-begin-1 forms))))) + ((x ...) (map flatten-begin x)) + (x x))) + + + +;; [A] +;; ((type (struct +;; (named s) +;; (struct-declaration-list +;; (struct-declarator-list +;; (named x +;; (specifier-qualifier-list +;; (type int)))))))))) + + +(define (tree-valid? tree abandon) + (match tree + (`(struct-like-declaration ((type ,('struct body ...)))) + `(struct-like-declaration ((type (struct ,@(tree-valid? body abandon)))))) + (`(struct-like-declaration ((type ,('union body ...)))) + `(struct-like-declaration ((type (union ,@(tree-valid? body abandon)))))) + (('struct-like-declaration body ...) + (abandon #f)) + (('struct-declaration-list body ...) + ;; Filters out case [A] + (for-each (match-lambda + (`(specifier-qualifier-list (type ,((or 'struct 'union) body ...))) + 'noop) + (('specifier-qualifier-list body ...) + (abandon #f)) + (_ 'noop)) + body) + `(struct-declaration-list ,@(tree-valid? body abandon))) + ((a b ...) + (cons (tree-valid? a abandon) + (tree-valid? b abandon))) + (x x))) + +(define (remove-invalid-struct-like-declarations lst) + (filter-map (lambda (tree) + (call/ec (lambda (abandon) (tree-valid? tree abandon)))) + lst)) diff --git a/module/c/parse2.scm b/module/c/parse2.scm index fad2ffd8..34c1730f 100644 --- a/module/c/parse2.scm +++ b/module/c/parse2.scm @@ -1,12 +1,16 @@ (define-module (c parse2) :use-module (hnh util) - :use-module (system base lalr)) + :use-module (system base lalr) + :export (make-parser + build-lexical-analyzer + error-procedure)) + (define (make-parser) (lalr-parser (#{out-table:}# "/tmp/c-parser.txt") (#{output:}# c-parser "/tmp/c-parser.scm") - ;; (#{driver:}# glr) + (#{driver:}# glr) ( @@ -47,23 +51,27 @@ ;; punctuator - already translated ) - ;; Primitives + (top-level + (translation-unit) : $1 + (statement) : $1 + (constant-expression) : $1) - ;; (identifier) : $1 - ;; (constant) : $1 - ;; (string-literal) : $1 + ;; compounds + ;; 6.9 + (translation-unit + (external-declaration) : (list 'translation-unit $1) + (translation-unit external-declaration) : (append $1 (list $2))) - ;; compounds (primary-expression - ;; 6.5.1 - (identifier) - (constant) - (string-literal) - (lparen expression rparen) - (generic-selection)) + (identifier) : $1 + (constant) : `(constant ,$1) + (string-literal) : `(string-constant ,$1) + ;; output parenthesis skipped, since all forms come with their own + (lparen expression rparen) : $2 + (generic-selection) : $1) (enumeration-constant (identifier)) @@ -71,470 +79,524 @@ ;; 6.5.1.1 (generic-selection - (_Generic lparen assignment-expression comma generic-assoc-list)) + (_Generic lparen assignment-expression comma generic-assoc-list rparen) : `(generic ,$3 ,@$5)) (generic-assoc-list - (generic-association) - (generic-assoc-list comma generic-association)) + (generic-assoc-list comma generic-association) : (append $1 (list $3)) + (generic-association) : (list $1)) (generic-association - (type-name : assignment-expression) - (default : assignment-expression)) + (type-name : assignment-expression) : (cons $1 $3) + (default : assignment-expression) : (cons $1 $3)) ;; 6.5.2 (postfix-expression - (primary-expression) - (postfix-expression lbrack expression rbrack) - (postfix-expression lparen rparen) - (postfix-expression lparen argument-expression-list rparen) - (postfix-expression dot identifier) - (postfix-expression -> identifier) - (postfix-expression ++) - (postfix-expression --) + (postfix-expression lbrack expression rbrack) : `(idx ,$1 ,$3) + (postfix-expression lparen rparen) : `(,$1) + (postfix-expression lparen argument-expression-list rparen) : `(,$1 ,@$3) + (postfix-expression dot identifier) : `(dot-access ,$1 ,$3) + (postfix-expression -> identifier) : `(ptr-access ,$1 ,$3) + (postfix-expression ++) : `(postfix++ ,$1) + (postfix-expression --) : `(postfix-- ,$1) (lparen type-name rparen lbrace initializer-list rbrace) - (lparen type-name rparen lbrace initializer-list comma rbrace)) + (lparen type-name rparen lbrace initializer-list comma rbrace) + (primary-expression) : $1 + ) (argument-expression-list - (assignment-expression) - (argument-expression-list comma assignment-expression)) + (argument-expression-list comma assignment-expression) : (append $1 (list $3)) + (assignment-expression) : (list $1)) ;; 6.5.3 (unary-expression - (postfix-expression) - (++ unary-expression) - (-- unary-expression) - (unary-operator cast-expression) - (sizeof unary-expression) - (sizeof lparen type-name rparen) - (_Alignof lparen type-name rparen)) + (++ unary-expression) : `(prefix++ ,$2) + (-- unary-expression) : `(prefix-- ,$2) + (unary-operator cast-expression) : `(,$1 ,$2) + (sizeof unary-expression) : `(sizeof (typeof ,$2)) + (sizeof lparen type-name rparen) : `(sizeof ,$3) + (_Alignof lparen type-name rparen) : `(alignof ,$3) + (postfix-expression) : $1) (unary-operator - (&) - (*) - (+) - (-) - (~) - (!)) + (&) : 'pointer-to + (*) : 'dereference + (+) : 'unary+ ; separate from + and - for + (-) : 'unary- ; easier eval procedure later + (~) : 'bitwise-not + (!) : 'not) ;; 6.5.4 (cast-expression - (unary-expression) - (lparen type-name rparen cast-expression)) + (lparen type-name rparen cast-expression) : `(as-type ,$2 ,$4) + (unary-expression) : $1) ;; 6.5.5 (multiplicative-expression - (cast-expression) - (multiplicative-expression * cast-expression) - (multiplicative-expression / cast-expression) - (multiplicative-expression % cast-expression)) + (multiplicative-expression * cast-expression) : `(* ,$1 ,$3) + (multiplicative-expression / cast-expression) : `(/ ,$1 ,$3) + (multiplicative-expression % cast-expression) : `(% ,$1 ,$3) + (cast-expression) : $1) ;; 6.5.6 (additive-expression - (multiplicative-expression) - (additive-expression + multiplicative-expression) - (additive-expression - multiplicative-expression)) + (additive-expression + multiplicative-expression) : `(+ ,$1 ,$3) + (additive-expression - multiplicative-expression) : `(- ,$1 ,$3) + (multiplicative-expression) : $1) ;; 6.5.7 (shift-expression - (additive-expression) - (shift-expression << additive-expression) - (shift-expression >> additive-expression)) + (shift-expression << additive-expression) : `(<< ,$1 ,$3) + (shift-expression >> additive-expression) : `(>> ,$1 ,$3) + (additive-expression) : $1) ;; 6.5.8 (relational-expression - (shift-expression) - (relational-expression < shift-expression) - (relational-expression > shift-expression) - (relational-expression <= shift-expression) - (relational-expression >= shift-expression)) + (relational-expression < shift-expression) : `(< ,$1 ,$3) +(relational-expression > shift-expression) : `(> ,$1 ,$3) + (relational-expression <= shift-expression) : `(<= ,$1 ,$3) + (relational-expression >= shift-expression) : `(>= ,$1 ,$3) + (shift-expression) : $1) ;; 6.5.9 (equality-expression - (relational-expression) - (equality-expression == relational-expression) - (equality-expression != relational-expression)) + (equality-expression == relational-expression) : `(== ,$1 ,$3) + (equality-expression != relational-expression) : `(!= ,$1 ,$3) + (relational-expression) : $1) ;; 6.5.10 (AND-expression - (equality-expression) - (AND-expression & equality-expression)) + (AND-expression & equality-expression) : `(bitwise-and ,$1 ,$3) + (equality-expression) : $1) ;; 6.5.11 (exclusive-OR-expression - (AND-expression) - (exclusive-OR-expression ^ AND-expression)) + (exclusive-OR-expression ^ AND-expression) : `(bitwise-xor ,$1 ,$3) + (AND-expression) : $1) ;; 6.5.12 (inclusive-OR-expression - (exclusive-OR-expression) - (inclusive-OR-expression pipe exclusive-OR-expression)) + (inclusive-OR-expression pipe exclusive-OR-expression) : `(bitwise-ior ,$1 ,$3) + (exclusive-OR-expression) : $1) ;; 6.5.13 (logical-AND-expression - (inclusive-OR-expression) - (logical-AND-expression && inclusive-OR-expression)) + (logical-AND-expression && inclusive-OR-expression) : `(and ,$1 ,$3) + (inclusive-OR-expression) : $1) ;; 6.5.14 (logical-OR-expression - (logical-AND-expression) - (logical-OR-expression pipe2 logical-AND-expression)) + (logical-OR-expression pipe2 logical-AND-expression) : `(or ,$1 ,$3) + (logical-AND-expression) : $1) ;; 6.5.15 (conditional-expression - (logical-OR-expression) - (logical-OR-expression ? expression : conditional-expression)) + (logical-OR-expression ? expression : conditional-expression) : `(ternary ,$1 ,$3 ,$5) + (logical-OR-expression) : $1) ;; 6.5.16 (assignment-expression - (conditional-expression) - (unary-expression assignment-operator assignment-expression)) + (unary-expression assignment-operator assignment-expression) : `(,$2 ,$1 ,$3) + (conditional-expression) : $1) (assignment-operator - (=) - (*=) - (/=) - (%=) - (+=) - (-=) - (<<=) - (>>=) - (&=) - (^=) - (pipe=)) + (=) : $1 + (*=) : $1 + (/=) : $1 + (%=) : $1 + (+=) : $1 + (-=) : $1 + (<<=) : $1 + (>>=) : $1 + (&=) : $1 + (^=) : $1 + (pipe=) : $1) ;; 6.5.17 (expression - (assignment-expression) - (expression comma assignment-expression)) + (assignment-expression) : `(begin ,$1) + ;; (This is the comma operator) + (expression comma assignment-expression) : (append $1 (list $3))) ;; 6.6 constant expression (constant-expression - (expression)) + (expression) : `(constexpr ,$1)) ;; 6.7 (declaration - (declaration-specifiers semicolon) - (declaration-specifiers init-declarator-list semicolon) + (declaration-specifiers init-declarator-list semicolon) : ($2 $1) + ;; TODO when is declare-specifiers without init-declarator-list case relevant? + ;; It when enabled, it thinks just about everything is this: + ;; (compile-string* "int x;") + ;; ⇒ (translation-unit (define (named x ((type int))) )) + ;; ⇒ (translation-unit (declare1 ((type int) (type (typedef x))))) + ;; NOTE this case is for structs + ;; "struct s;" + (declaration-specifiers semicolon) : `(struct-like-declaration ,$1) + (static_assert-declaration)) (declaration-specifiers - (storage-class-specifier) - (storage-class-specifier declaration-specifiers) + (storage-class-specifier declaration-specifiers) : (cons $1 $2) + (storage-class-specifier) : (list $1) + + (type-specifier declaration-specifiers) : (cons $1 $2) + (type-specifier) : (list $1) + + (type-qualifier declaration-specifiers) : (cons $1 $2) + (type-qualifier) : (list $1) - (type-specifier) - (type-specifier declaration-specifiers) + (function-specifier declaration-specifiers) : (cons $1 $2) + (function-specifier) : (list $1) - (type-qualifier) - (type-qualifier declaration-specifiers) + (alignment-specifier declaration-specifiers) : (cons $1 $2) + (alignment-specifier) : (list $1)) - (function-specifier) - (function-specifier declaration-specifiers) - (alignment-specifier) - (alignment-specifier declaration-specifiers)) (init-declarator-list - (init-declarator) - (init-declarator-list comma init-declarator)) + (init-declarator-list comma init-declarator) : (lambda (type) (append ($1 type) (list ($3 type)))) + (init-declarator) : (lambda (type) `(begin ,($1 type)))) (init-declarator - (declarator) - (declarator = initializer)) + (declarator = initializer) : (lambda (type) `(define ,($1 type) ,$3)) + (declarator) : (lambda (type) `(define ,($1 type) ))) ;; 6.7.1 (storage-class-specifier - (typedef) - (extern) - (static) - (_Thread_local) - (auto) - (register)) + (typedef) : '(storage typedef) + (extern) : '(storage extern) + (static) : '(storage static) + (_Thread_local) : '(storage thread-local) + (auto) : '(storage auto) + (register) : '(storage register)) - ;; 6.7.2 - (type-specifier - (void) - (char) - (short) - (int) - (long) - (float) - (double) - (signed) - (unsigned) - (_Bool) - (_Complex) - (atomic-type-specifier) - (struct-or-union-specifier) - (enum-specifier) - (typedef-name)) ;; 6.7.2.1 (struct-or-union-specifier - (struct-or-union lbrace struct-declaration-list rbrace) - (struct-or-union identifier lbrace struct-declaration-list rbrace) - (struct-or-union identifier)) + (struct-or-union lbrace struct-declaration-list rbrace) : `(,$1 ,$3) + (struct-or-union identifier lbrace struct-declaration-list rbrace) : `(,$1 (named ,$2) ,$4) + (struct-or-union identifier) : `(,$1 (named ,$2))) (struct-or-union - (struct) - (union)) + (struct) : 'struct + (union) : 'union) (struct-declaration-list - (struct-declaration) - (struct-declaration-list struct-declaration)) + (struct-declaration-list struct-declaration) : (append $1 (list $2)) + (struct-declaration) : (list 'struct-declaration-list $1) + ) (struct-declaration - (specifier-qualifier-list semicolon) - (specifier-qualifier-list struct-declarator-list semicolon) + (specifier-qualifier-list semicolon) : $1 + (specifier-qualifier-list struct-declarator-list semicolon) : ($2 $1) (static_assert-declaration)) (specifier-qualifier-list - (type-specifier) - (type-specifier specifier-qualifier-list) + (type-specifier specifier-qualifier-list) : `(specifier-qualifier-list ,$1 ,@(cdr $2)) + (type-specifier) : `(specifier-qualifier-list ,$1) - (type-qualifier) - (type-qualifier specifier-qualifier-list)) + (type-qualifier specifier-qualifier-list) : `(specifier-qualifier-list ,$1 ,@(cdr $2)) + (type-qualifier) : `(specifier-qualifier-list ,$1)) (struct-declarator-list - (struct-declarator) - (struct-declarator-list comma struct-declarator)) + (struct-declarator-list comma struct-declarator) : (lambda (type) (append ($1 type) (list ($3 type)))) + (struct-declarator) : (lambda (type) (list 'struct-declarator-list ($1 type))) + ) (struct-declarator - (declarator) - (: constant-expression) - (declarator : constant-expression)) + (: constant-expression) : (lambda (type) `(of-width *nothing* ,$2)) + (declarator : constant-expression) : (lambda (type) `(of-width ,($1 type) ,$3)) + (declarator) : $1) ;; 6.7.2.2 (enum-specifier - (enum identifier lbrace enumerator-list rbrace) - (enum lbrace enumerator-list rbrace) + (enum identifier lbrace enumerator-list rbrace) : `(enum (named ,$2) ,$4) + (enum lbrace enumerator-list rbrace) : `(enum ,$3) - (enum identifier lbrace enumerator-list comma rbrace) - (enum lbrace enumerator-list comma rbrace) + (enum identifier lbrace enumerator-list comma rbrace) : `(enum (named ,$2) ,$4) + (enum lbrace enumerator-list comma rbrace) : `(enum ,$3) - (enum identifier)) + (enum identifier) : `(enum (named ,$2))) (enumerator-list - (enumerator) - (enumerator-list comma enumerator)) + (enumerator-list comma enumerator) : (append $1 (list $3)) + (enumerator) : (list $1) + ) (enumerator - (enumeration-constant) - (enumeration-constant = constant-expression)) + (enumeration-constant = constant-expression) : (list $1 $3) + (enumeration-constant) : $1 + ) ;; 6.7.2.4 (atomic-type-specifier - (_Atomic lparen type-name rparen)) + (_Atomic lparen type-name rparen) : `(atomic ,$3)) ;; 6.7.3 (type-qualifier - (const) - (restrict) - (volatile) - (_Atomic)) + (const) : `(qualifier const) + (restrict) : `(qualifier restrict) + (volatile) : `(qualifier volatile) + (_Atomic) : `(qualifier atomic) + ) ;; 6.7.4 (function-specifier - (inline) - (_Noreturn)) + (inline) : 'inline + (_Noreturn) : 'noterun) ;; 6.7.5 (alignment-specifier - (_Alignas lparen type-name rparen) - (_Alignas lparen constant-expression rparen)) + (_Alignas lparen type-name rparen) : `(aligned-as ,$3) + (_Alignas lparen constant-expression rparen) : `(aligned-as ,$3)) ;; 6.7.6 (declarator - (pointer direct-declarator) - (direct-declarator)) + (pointer direct-declarator) : (lambda (type) ($2 ($1 type))) + (direct-declarator) : (lambda (type) ($1 type))) (direct-declarator - (identifier) - ( lparen declarator rparen ) - - (direct-declarator lbrack type-qualifier-list assignment-expression rbrack ) - (direct-declarator lbrack assignment-expression rbrack ) - (direct-declarator lbrack type-qualifier-list rbrack ) - (direct-declarator lbrack rbrack ) - (direct-declarator lbrack static type-qualifier-list assignment-expression rbrack) - (direct-declarator lbrack static assignment-expression rbrack) - - (direct-declarator lbrack type-qualifier-list static assignment-expression rbrack) - - (direct-declarator lbrack type-qualifier-list * rbrack) - (direct-declarator lbrack * rbrack) - - (direct-declarator lparen parameter-type-list rparen ) - (direct-declarator lparen identifier-list rparen ) - (direct-declarator lparen rparen )) + (direct-declarator lbrack type-qualifier-list assignment-expression rbrack ) : (lambda (type) ($1 `(array (of-length (,$3 ,$4)) + (containing ,type)))) + (direct-declarator lbrack assignment-expression rbrack ) : (lambda (type) ($1 `(array (of-length ,$3) + (containing ,type)))) + (direct-declarator lbrack type-qualifier-list rbrack ) : (lambda (type) ($1 `(array (of-length (,$3)) + (containing ,type)))) + (direct-declarator lbrack rbrack ) : (lambda (type) ($1 `(array (of-indeterminate-length) + (containing ,type)))) + + (direct-declarator lbrack static type-qualifier-list assignment-expression rbrack) : (lambda (type) `(array (static) + (containing ,type) + (of-length (,$3 ,$4)))) + (direct-declarator lbrack static assignment-expression rbrack) : (lambda (type) `(array (static) + (containing ,type) + (of-length ,$4))) + + ;; TODO static position + (direct-declarator lbrack type-qualifier-list static assignment-expression rbrack) : (lambda (type) `(array (static) + (containing ,type) + (of-length (,$3 ,$4)))) + + (direct-declarator lbrack type-qualifier-list * rbrack) : (lambda (type) ($1 '(array (containing ,type) (of-variable-length ,$3)))) + (direct-declarator lbrack * rbrack) : (lambda (type) ($1 '(array (containing ,type) (of-variable-length)))) + + (direct-declarator lparen parameter-type-list rparen ) : (lambda (returning) ($1 `(procedure (returning ,returning) (taking ,@(cdr $3))))) + (direct-declarator lparen identifier-list rparen ) : (lambda (returning) ($1 `(procedure (returning ,returning) (taking ,@(cdr $3))))) + (direct-declarator lparen rparen ) : (lambda (returning) ($1 `(procedure (returning ,returning) (taking *any*)))) + + ( lparen declarator rparen ) : (lambda (type) (list ($2 type))) + (identifier) : (lambda (type) `(named ,$1 ,type)) + ) (pointer - (* type-qualifier-list) - (*) - (* type-qualifier-list pointer) - (* pointer)) + (* type-qualifier-list) : (lambda (to) `(,$2 (pointer-to ,to))) + (* type-qualifier-list pointer) : (lambda (to) `(,$2 (pointer-to ,($3 to)))) + (* pointer) : (lambda (to) `(pointer-to ,($2 to))) + (*) : (lambda (to) `(pointer-to ,to))) (type-qualifier-list - (type-qualifier) - (type-qualifier-list type-qualifier)) + (type-qualifier-list type-qualifier) : (append $1 (list $2)) + (type-qualifier) : (list 'type-qualifier-list $1) + ) (parameter-type-list - (parameter-list) - (parameter-list comma ...)) + (parameter-list comma ...) : `(parameter-list ,$1 ...) + (parameter-list) : `(parameter-list ,$1) + ) (parameter-list - (parameter-declaration) - (parameter-list comma parameter-declaration)) + (parameter-list comma parameter-declaration) : (append $1 (list $3)) + (parameter-declaration) : (list $1) + ) (parameter-declaration - (declaration-specifiers declarator) - (declaration-specifiers abstract-declarator) - (declaration-specifiers)) + (declaration-specifiers declarator) : ($2 $1) + (declaration-specifiers abstract-declarator) : ($2 $1) + (declaration-specifiers) : $1) (identifier-list - (identifier) - (identifier-list comma identifier)) + (identifier-list comma identifier) : (append $1 (list $3)) + (identifier) : (list 'identifier-list $1)) ;; 6.7.7 (type-name - (specifier-qualifier-list) - (specifier-qualifier-list abstract-declarator)) + (specifier-qualifier-list abstract-declarator) : ($2 $1) + (specifier-qualifier-list) : $1 + ) (abstract-declarator - (pointer) - (pointer direct-abstract-declarator) - ( direct-abstract-declarator)) + (pointer direct-abstract-declarator) : (compose $1 $2) + (pointer) : $1 + ( direct-abstract-declarator) : $1 + ) (direct-abstract-declarator - ( lparen abstract-declarator rparen ) - (direct-abstract-declarator lbrack type-qualifier-list assignment-expression rbrack ) - (direct-abstract-declarator lbrack type-qualifier-list rbrack ) - (direct-abstract-declarator lbrack assignment-expression rbrack ) - ( lbrack rbrack ) - ( lbrack type-qualifier-list assignment-expression rbrack ) - ( lbrack type-qualifier-list rbrack ) - (direct-abstract-declarator lbrack * rbrack) - ( lbrack * rbrack) - (direct-abstract-declarator lparen parameter-type-list rparen ) - (direct-abstract-declarator lparen rparen ) - ( lparen parameter-type-list rparen ) - ( lparen rparen )) + ( lparen abstract-declarator rparen ) : (lambda (type) (list ($2 type))) + (direct-abstract-declarator lbrack type-qualifier-list assignment-expression rbrack ) : (lambda (type) ($1 `(array (of-length (,$3 ,$4)) + (containing ,type)))) + (direct-abstract-declarator lbrack type-qualifier-list rbrack ) : (lambda (type) ($1 `(array (of-length (,$3)) + (containing ,type)))) + (direct-abstract-declarator lbrack assignment-expression rbrack ) : (lambda (type) ($1 `(array (of-length ,$3) + (containing ,type)))) + ( lbrack rbrack ) : (lambda (type) `(array (of-indeterminate-length) + (containing ,type))) + ( lbrack type-qualifier-list assignment-expression rbrack ) : (lambda (type) `(array (containing ,type) + (of-length (,$3 ,$4)))) + ( lbrack type-qualifier-list rbrack ) : (lambda (type) `(array (containing ,type) + (of-length (,$3)))) + + (direct-abstract-declarator lbrack static type-qualifier-list assignment-expression rbrack ) : (lambda (type) ($1 `(array (static) + (of-length (,$4 ,$5)) + (containing ,type)))) + (direct-abstract-declarator lbrack static assignment-expression rbrack ) : (lambda (type) ($1 `(array (static) (of-length ,$4) (containing ,type)))) + ( lbrack static type-qualifier-list assignment-expression rbrack ) : (lambda (type) `(array (static) (of-length ,($4 $5)) (containing ,type))) + ( lbrack static assignment-expression rbrack ) : (lambda (type) `(array (static) (of-length ,$3) (containing ,type))) + + ;; TODO static position + (direct-abstract-declarator lbrack type-qualifier-list static assignment-expression rbrack ) : (lambda (type) `(array (static) (of-length (,$3 ,$4)) (containing ,type))) + ( lbrack type-qualifier-list static assignment-expression rbrack ) : (lambda (type) `(array (static) (of-length (,$2 ,$3)) (containing ,type))) + + (direct-abstract-declarator lbrack * rbrack) : (lambda (type) ($1 `(array (of-variable-length) (containing ,type)))) + ( lbrack * rbrack) : (lambda (type) `(array (of-variable-length) (containing ,type))) + (direct-abstract-declarator lparen parameter-type-list rparen ) : (lambda (returning) ($1 `(procedure (returning ,returning) (taking ,$3)))) + (direct-abstract-declarator lparen rparen ) : (lambda (returning) ($1 `(procedure (returning ,returning) (taking *any*)))) + ( lparen parameter-type-list rparen ) : (lambda (returning) `(procedure (returning ,returning) (taking ,$2))) + ( lparen rparen ) : (lambda (returning) `(procedure (returning ,returning) (taking *any*)))) ;; 6.7.8 (typedef-name - (identifier)) + (identifier) : `(typedef ,$1)) ;; 6.7.9 (initializer - (assignment-expression) - (lbrace initializer-list rbrace) - (lbrace initializer-list comma rbrace)) + (lbrace initializer-list rbrace) : $2 + (lbrace initializer-list comma rbrace) : $2 + (assignment-expression) : $1) (initializer-list - (designation initializer) - (initializer) - (initializer-list comma designation initializer) - (initializer-list comma initializer)) + (initializer-list comma designation initializer) : (append $1 (list `(designate ,$3 ,$4))) + (initializer-list comma initializer) : (append $1 (list $3)) + (designation initializer) : `(initializer-list (designate ,$1 ,$2)) + (initializer) : `(initializer-list ,$1)) (designation - (designator-list =)) + (designator-list =) : $1) (designator-list - (designator) - (designator-list designator)) + (designator-list designator) : (append $1 (list $2)) + (designator) : (list 'designators $1)) (designator - (lbrack constant-expression rbrack) - (dot identifier)) + (lbrack constant-expression rbrack) : `(idx ,$2) + (dot identifier) : `(slot ,$2)) ;; 6.7.10 (static_assert-declaration - (_Static_assert lparen constant-expression comma string-literal rparen semicolon)) + (_Static_assert lparen constant-expression comma string-literal rparen semicolon) + : `(static-assert ,$3 ,$5)) ;; 6.8 (statement - (labeled-statement) - (compound-statement) - (expression-statement) - (selection-statement) - (iteration-statement) - (jump-statement)) + (labeled-statement) : $1 + (compound-statement) : $1 + (expression-statement) : $1 + (selection-statement) : $1 + (iteration-statement) : $1 + (jump-statement) : $1) ;; 6.8.1 (labeled-statement - (identifier : statement) - (case constant-expression : statement) - (default : statement)) + (identifier : statement) : `(labeled ,$1 ,$3) + (case constant-expression : statement) : `(case ,$2 ,$4) + (default : statement) : `(case-default ,$3)) ;; 6.8.2 (compound-statement - (lbrace block-item-list rbrace) - (lbrace rbrace)) + (lbrace block-item-list rbrace) : `(begin ,$2) + (lbrace rbrace) : '(begin)) (block-item-list - (block-item) - (block-item-list block-item)) + (block-item-list block-item) : (append $1 (list $2)) + (block-item) : `(let () ,$1) + ) (block-item - (declaration) - (statement)) + (declaration) : $1 + (statement) : $1) ;; 6.8.3 (expression-statement - (expression semicolon) - (semicolon)) + (expression semicolon) : $1 + (semicolon) : '(noop)) (selection-statement - (if lparen expression rparen statement) - (if lparen expression rparen statement else statement) - (switch lparen expression rparen statement)) + (if lparen expression rparen statement else statement) : `(if ,$3 ,$5 ,$7) + (if lparen expression rparen statement) : `(when ,$3 ,$5) + (switch lparen expression rparen statement) : `(switch ,$3 ,$5)) ;; 6.8.5 (iteration-statement - (while lparen expression rparen statement) - (do statement while lparen expression rparen semicolon) - (for lparen expression semicolon expression semicolon expression rparen statement) - (for lparen expression semicolon expression semicolon rparen statement) - (for lparen expression semicolon semicolon expression rparen statement) - (for lparen semicolon semicolon rparen statement) - (for lparen semicolon expression semicolon expression rparen statement) - (for lparen semicolon expression semicolon rparen statement) - (for lparen declaration expression semicolon expression rparen statement) + (while lparen expression rparen statement) : `(while ,$3 ,$5) + (do statement while lparen expression rparen semicolon) : `(do-while ,$5 ,$2) + (for lparen expression semicolon expression semicolon expression rparen statement) : `(for (init ,$3) (cond ,$5) (step ,$7) ,$9) + (for lparen expression semicolon expression semicolon rparen statement) : `(for (init ,$3) (cond ,$5) (step ) ,$8) + (for lparen expression semicolon semicolon expression rparen statement) : `(for (init ,$3) (cond ) (step ,$6) ,$8) + (for lparen semicolon semicolon rparen statement) : `(for (init ) (cond ) (step ) ,$6) + (for lparen semicolon expression semicolon expression rparen statement) : `(for (init ) (cond ,$4) (step ,$6) ,$8) + (for lparen semicolon expression semicolon rparen statement) : `(for (init ) (cond ,$4) (step ) ,$7) + (for lparen declaration expression semicolon expression rparen statement) ; TODO (for lparen declaration expression semicolon rparen statement) (for lparen declaration semicolon expression rparen statement) (for lparen declaration semicolon rparen statement)) ;; 6.8.6 (jump-statement - (goto identifier semicolon) - (continue semicolon) - (break semicolon) - (return expression semicolon) - (return semicolon)) - + (goto identifier semicolon) : `(goto ,$2) + (continue semicolon) : `(continue) + (break semicolon) : `(break) + (return expression semicolon) : `(return ,$2) + (return semicolon) : `(return)) ;; 6.9 - (translation-unit - (external-declaration) - (translation-unit external-declaration)) - (external-declaration - (function-definition) - (declaration)) + (function-definition) : $1 + (declaration) : $1) ;; 6.9.1 (function-definition (declaration-specifiers declarator declaration-list compound-statement) - (declaration-specifiers declarator compound-statement)) + (declaration-specifiers declarator compound-statement) : `(define ,($2 $1) ,$3)) (declaration-list - (declaration) - (declaration-list declaration)))) + (declaration-list declaration) : (append $1 (list $2)) + (declaration) : `(declaration-list ,$1)) + + + ;; 6.7.2 + ;; Placed AFTER init-declarator to handle "int x = 5;" case. Otherwise it's + ;; only treated as a (really badly formed) typedef + (type-specifier + (void) : `(type ,$1) + (char) : `(type ,$1) + (short) : `(type ,$1) + (int) : `(type ,$1) + (long) : `(type ,$1) + (float) : `(type ,$1) + (double) : `(type ,$1) + (signed) : `(type ,$1) + (unsigned) : `(type ,$1) + (_Bool) : `(type bool) + (_Complex) : `(type complex) + (atomic-type-specifier) : `(type ,$1) + (struct-or-union-specifier) : `(type ,$1) + (enum-specifier) : `(type ,$1) + (typedef-name) : `(type ,$1)))) (define (build-lexical-analyzer tokens) diff --git a/module/c/preprocessor2.scm b/module/c/preprocessor2.scm index 3f9552c5..b9b11d0a 100644 --- a/module/c/preprocessor2.scm +++ b/module/c/preprocessor2.scm @@ -4,17 +4,20 @@ :use-module (srfi srfi-88) :use-module (c cpp-environment) - :use-module (c eval2) + :use-module ((c eval2) :select (c-boolean->boolean)) + :use-module ((c eval-basic) :select (eval-basic-c)) :use-module ((c cpp-environment function-like-macro) - :select (function-like-macro variadic? identifier-list)) - :use-module ((c cpp-environment object-like-macro) :select (object-like-macro object-like-macro?)) + :select (function-like-macro variadic?)) + :use-module ((c cpp-environment object-like-macro) + :select (object-like-macro object-like-macro?)) :use-module ((c cpp-environment internal-macro) :select (internal-macro)) - :use-module ((hnh util) :select (-> ->> intersperse aif swap unless unval break/all)) + :use-module ((hnh util) :select (-> ->> intersperse swap unless unval break/all)) :use-module ((hnh util lens) :select (set modify cdr*)) :use-module (hnh util path) :use-module (hnh util type) :use-module (hnh util object) :use-module ((hnh util values) :select (abort* on-fst on-snd apply/values)) + :use-module ((hnh util io) :select (read-file)) :use-module ((c lex2) :select (lex placemaker @@ -27,15 +30,16 @@ :use-module (c unlex) :use-module (c cpp-types) :use-module (c cpp-util) + :use-module ((c ast) :select (build-ast)) :export (_Pragma-macro - defined-macro + ;; defined-macro c-search-path - handle-preprocessing-tokens)) + handle-preprocessing-tokens + preprocess-string + make-default-environment + )) -(define (read-file path) - (call-with-input-file path (@ (ice-9 rdelim) read-string))) - (define-syntax-rule (alist-of variable key-type value-type) @@ -55,12 +59,6 @@ (define (ellipsis-token? token) (equal? "..." (punctuator-token? token))) -;; TODO -;; > #if defined X -;; is equivalent to -;; > #if defined(X) - - ;; parameters is a lexeme list, as returned by parse-parameter-list (define (build-parameter-map macro parameters) (typecheck macro cpp-macro?) @@ -201,15 +199,6 @@ expand##)))) - -;; Expand object-like macro - -;; #define VALUE 10 -;; #define str(x) #x -;; #define OTHER str(VALUE) -;; OTHER -;; ⇒ "VALUE" - ;; remaining-tokens should be the token stream just after the name of the macro (define (expand-macro environment macro noexpand-list remaining-tokens) (typecheck environment cpp-environment?) @@ -378,19 +367,6 @@ body: (lex (number->string (current-line environment))))))) -(define defined-macro - (internal-macro - identifier: "defined" - body: (lambda (environment arguments) - (typecheck arguments (and (list-of (list-of lexeme?)) - (not null?))) - (aif (identifier-token? (car (list-ref arguments 0))) - (let ((in-env (boolean->c-boolean (in-environment? environment it)))) - (values environment (lex (number->string in-env)))) - (scm-error 'cpp-error "defined" - "Invalid parameter list to `defined': ~s" - (list arguments) #f))))) - (define _Pragma-macro (internal-macro identifier: "_Pragma" @@ -436,21 +412,65 @@ environment)))) -;; 6.10.1 p. 4 -(define (resolve-constant-expression cpp-tokens) - ;; (typecheck tokens (list-of lexeme?)) +;; (next-token-or-group (lex " x y") +;; => (car (lex "x")) +;; => (lex " y") +;; next-token-or-group (lex " (x) y") +;; => (lex "(x)") +;; => (lex " y") +(define (next-token-or-group tokens) + (let loop ((tokens (drop-whitespace tokens))) + (cond ((null? tokens) + ;; TODO error here? + '()) + ((left-parenthesis-token? (car tokens)) + (parse-group tokens)) + ((preprocessing-token? (car tokens)) + (car+cdr tokens)) + (else + (loop (cdr tokens)))))) + + + +(define (parse-if-line environment cpp-tokens) (define zero (car (lex "0"))) - #; - (define tokens - (map preprocessing-token->token - (map (lambda (token) - (cond ((identifier-token? token) zero) - (else token))) - (remove whitespace-token? tokens)))) + (define one (car (lex "1"))) + + (define (drop-identifiers tokens) + (map (lambda (x) (if (identifier-token? x) + zero x)) + tokens)) + + (drop-identifiers + (let ((environment (join-file-line environment))) + (let loop ((tokens cpp-tokens)) + (cond ((null? tokens) '()) + ((identifier-token? (car tokens)) + (lambda (s) (and s (string=? s "defined"))) + => (lambda _ + (let ((next rest (next-token-or-group (cdr tokens)))) + (cons (if (and=> (identifier-token? (if (parenthesis-group? next) + ;; TODO empty group + (car (drop-whitespace (parenthesis-group-tokens next))) + next)) + (lambda (it) (in-environment? environment it))) + one zero) + (loop rest))))) + + ((and (identifier-token? (car tokens)) + (not (marked-noexpand? (car tokens)))) + (let ((_ tokens + (maybe-extend-identifier environment + (identifier-token? (car tokens)) + (lexeme-noexpand (car tokens)) + (cdr tokens)))) + (loop tokens))) + + (else (cons (car tokens) + (loop (cdr tokens))))))))) + + - 'TODO - ;; eval as per 6.6 - ) @@ -498,13 +518,16 @@ (typecheck environment cpp-environment?) ;; (typecheck tokens (list-of lexeme?)) - (-> (extend-environment environment (list defined-macro)) - ;; no newlines in #if line - ((unval resolve-token-stream 1) tokens) - resolve-constant-expression - c-boolean->boolean - (if (enter-active-if environment) - (enter-inactive-if environment)))) + (if (->> tokens + (parse-if-line environment) + (remove whitespace-token?) + merge-string-literals + build-ast + ;; 6.10.1 p. 4 + eval-basic-c + c-boolean->boolean) + (enter-active-if environment) + (enter-inactive-if environment))) ;; environment, string, (list token) → environment, (list token) (define (maybe-extend-identifier environment identifier noexpand-list remaining-tokens) @@ -719,11 +742,10 @@ (unless (in-conditional? env) (err "#endif outside conditional")) (leave-if env))) - ((else) (lambda (env _) - (unless (in-conditional? env) - (err "#else outside conditional")) - (flip-flop-if env))) - ;; ((elif) (lambda )) + ((else elif) (lambda (env _) + (unless (in-conditional? env) + (err "#else outside conditional")) + (flip-flop-if env))) ((define) resolve-define) ((undef) (lambda (env body) (remove-identifier env (identifier-token? (car body))))) ((line) handle-line-directive) @@ -750,3 +772,45 @@ (else (err "Unexpected middle of line, (near ~s)" (unlex tokens)))))) + + + + +(define* (make-default-environment key: (now (localtime (current-time)))) + (call-with-values + (lambda () + (preprocess-string + (format + #f + " +#define __STDC__ 1 +#define __STDC_HOSTED__ 1 +#define __STDC_VERSION__ 201112L +#define __DATE__ \"~a\" +#define __TIME__ \"~a\" +" + ;; TODO format should always be in + ;; english, and not tranlated + (strftime "\"%b %_d %Y\"" now) + (strftime "\"%H:%M:%S\"" now)) + (make-environment))) + (lambda (env _) env))) + + +(define* (preprocess-string str optional: (environment (make-default-environment))) + (on-snd + (->> + (abort* + (->> str +;;; Phase 1-3 + tokenize +;;; 4. Execution of preprocessing directives, all preprocessing directives are then deleted + (handle-preprocessing-tokens environment))) + +;;; 5. (something with character sets) +;;; 7. Whitespace tokens are discarded, each preprocessing token is converted into a token + (remove whitespace-token?) +;;; 6. concatenation of string literals +;;; Should be done before removal of whitespace, but I don't understand why + merge-string-literals + ))) diff --git a/module/c/util.scm b/module/c/util.scm new file mode 100644 index 00000000..f258d3e3 --- /dev/null +++ b/module/c/util.scm @@ -0,0 +1,20 @@ +(use-modules (c lex2) + (srfi srfi-1) + (srfi srfi-88) + (c to-token) + (c cpp-types)) + +(define (tok s) + (map preprocessing-token->token (remove whitespace-token? (lex s)))) + +(define* (parse tokens optional: (parser make-parser)) + ((parser) + (build-lexical-analyzer tokens) + error)) + + +(define (mm) + (lalr-parser + (x) + (y (x)) + )) diff --git a/tests/test/cpp/parse2.scm b/tests/test/cpp/parse2.scm new file mode 100644 index 00000000..41404480 --- /dev/null +++ b/tests/test/cpp/parse2.scm @@ -0,0 +1,245 @@ +(define-module (test cpp parse2) + :use-module (srfi srfi-64) + :use-module (srfi srfi-88) + :use-module ((c ast) :select (build-ast)) + :use-module ((c preprocessor2) + :select (preprocess-string + make-default-environment))) + + +(define (run str) + (call-with-values (lambda () (preprocess-string str (make-default-environment))) + (lambda (_ tokens) + (build-ast tokens)))) + + + +(test-group "primitives" + (test-equal "Simple integer" + '((constexpr (constant 1))) + (run "1")) + + (test-equal "Complex integer" + '((constexpr (constant 16))) + (run "0x10l")) + + (test-equal "Simple character" + '((constexpr (constant #x41))) + (run "'A'")) + + (test-equal "String literal" + '((constexpr (string-constant #vu8(#x48 #x65 #x6c #x6c #x6f 0)))) + (run "\"Hello\""))) + + + +(test-equal "_Generic" + '((constexpr + (generic X + ((specifier-qualifier-list (type long) (type double)) + . cbrtl) + (default . cbrt) + ((specifier-qualifier-list (type float)) . cbrtf)))) + (run "_Generic(X, long double: cbrtl, default: cbrt, float: cbrtf)")) + +(test-group "postfix expression" + (test-equal "array index" + '((constexpr (idx arr i))) + (run "arr[i]")) + + (test-equal "Funcall" + '((constexpr (f))) + (run "f()")) + + (test-equal "Funcall with args" + '((constexpr (f a b c))) + (run "f(a,b,c)")) + + (test-equal "Chained function calls" + '((constexpr ((f a) b))) + (run "f(a)(b)")) + + (test-equal "dot-access" + '((constexpr (dot-access a b))) + (run "a.b")) + + (test-equal "chained dotaccess" + '((constexpr (dot-access (dot-access a b) c))) + (run "a.b.c")) + + (test-equal "ptr-access" + '((constexpr (ptr-access a b))) + (run "a->b"))) + +;; unary expressions + + +;; cast expresions + +(test-equal "Chained casts" + '((constexpr + (as-type (specifier-qualifier-list (type short)) + (as-type (specifier-qualifier-list (type int)) + x)))) + (run "(short) (int) x")) + + +(test-equal "Ternary" + '((constexpr (ternary (constant 1) + (constant 2) + (constant 3)))) + (run "1 ? 2 : 3")) + + +(test-equal "Comma operator" + '((constexpr (begin (= x (constant 10)) + (= y (constant 20))))) + (run "x = 10, y = 20")) + + + +(test-group "Declarations" + (test-equal "Simple" + '((translation-unit + (define (named x ((type int))) + ))) + (run "int x;")) + + (test-equal "Simple with value" + '((translation-unit + (define (named x ((type int))) + (constant 1)))) + (run "int x = 1;")) + + + (test-equal "Multiple at same time" + '((translation-unit + (begin + (define (named x ((type long) (type int))) + (constant 1)) + (define (named y (pointer-to ((type long) (type int)))) + )))) + (run "long int x = 1, *y;")) + + ;; TODO static_assert-declaration + + (test-group "structs" + (test-equal "declaration" + '((translation-unit + (struct-like-declaration ((type (struct (named s))))))) + (run "struct s;")) + + (test-equal "definition" + '((translation-unit + (struct-like-declaration + ((type (struct (named s) + (struct-declaration-list + (struct-declarator-list + (named x (specifier-qualifier-list (type int))))))))))) + (run "struct s { int x; };")) + + (test-equal "Definition with multiple fields" + '((translation-unit + (struct-like-declaration + ((type (struct (named p) + (struct-declaration-list + (struct-declarator-list + (named x (specifier-qualifier-list (type int)))) + (struct-declarator-list + (named y (specifier-qualifier-list (type int))))))))))) + (run "struct p { int x; int y; };")) + + (test-equal "Anonymous definition" + '((translation-unit + (struct-like-declaration + ((type (struct + (struct-declaration-list + (struct-declarator-list + (named x (specifier-qualifier-list (type int))))))))))) + (run "struct { int x; };")) + + + (test-equal "struct with inner named struct" + '((translation-unit + (struct-like-declaration + ((type (struct (named p) + (struct-declaration-list + (struct-declarator-list + (named a (specifier-qualifier-list (type int)))) + (specifier-qualifier-list + (type (struct (named inner) + (struct-declaration-list + (struct-declarator-list + (named x (specifier-qualifier-list (type int))))))))))))))) + (run "struct p { int a; struct inner { int x; }; };")) + + (test-equal "struct with inner anonymous struct" + '((translation-unit + (struct-like-declaration + ((type (struct (named p) + (struct-declaration-list + (struct-declarator-list + (named a (specifier-qualifier-list (type int)))) + (specifier-qualifier-list + (type (struct + (struct-declaration-list + (struct-declarator-list + (named x (specifier-qualifier-list (type int))))))))))))))) + (run "struct p { int a; struct { int x; }; };")) + + (run "struct p { struct s; };") + + ) + + (test-group "Unions" + (test-equal + '((translation-unit + (struct-like-declaration + ((type (union (named X))))))) + (run "union X;")) + + ;; (run "union p { struct s; };") + + (test-equal + '((translation-unit + (struct-like-declaration + ((type (union (named int_or_char) + (struct-declaration-list + (struct-declarator-list + (named i (specifier-qualifier-list (type int)))) + (struct-declarator-list + (named s (specifier-qualifier-list (type char))))))))))) + (run "union int_or_char { int i; char s; };"))) + + (test-group "Typedef" + (test-equal "Simple" + '((translation-unit + (define (named uint + ((storage typedef) + (type unsigned) + (type int))) + ))) + (run "typedef unsigned int uint;")) + + ;; Interesting since the star "binds" to the right + (test-equal "with ptr" + '((translation-unit + (define (named int_ptr + (pointer-to + ((storage typedef) + (type int)))) + ))) + (run "typedef int *int_ptr;")) + + (test-equal "Function pointer" + '((translation-unit + (define ((named func_ptr + (pointer-to + (procedure + (returning (pointer-to ((storage typedef) + (type void)))) + (taking ((pointer-to ((type void))))))))) + ))) + (run "typedef void*(*func_ptr)(void*);"))) + + ) diff --git a/tests/test/cpp/preprocessor2.scm b/tests/test/cpp/preprocessor2.scm index 1df1a621..7fcaaccb 100644 --- a/tests/test/cpp/preprocessor2.scm +++ b/tests/test/cpp/preprocessor2.scm @@ -6,6 +6,7 @@ :use-module (srfi srfi-88) :use-module ((hnh util) :select (-> unval)) :use-module ((hnh util lens) :select (set)) + :use-module ((hnh util io) :select (call-with-tmpfile)) :use-module (c preprocessor2) :use-module ((c cpp-environment) :select (extend-environment @@ -47,7 +48,7 @@ "Example 3")) ;; TODO # if (and # elif) aren't yet implemented -(test-skip (test-match-group "Conditionals" "if")) +;; (test-skip (test-match-group "Conditionals" "if")) (define apply-macro (@@ (c preprocessor2) apply-macro)) (define build-parameter-map (@@ (c preprocessor2) build-parameter-map)) @@ -82,13 +83,12 @@ (drop-whitespace-both (remove-noexpand tokens)))) (define (call-with-tmp-header string proc) - (let* ((filename (string-copy "/tmp/headerfile-XXXXXXX")) - (port (mkstemp! filename))) - (with-output-to-port port - (lambda () (display string) - )) - (close-port port) - (proc filename))) + (proc + (call-with-tmpfile + (lambda (port filename) + (display string port) + filename) + tmpl: "/tmp/headerfile-XXXXXXX"))) @@ -554,19 +554,6 @@ body: (lex "x * 2")))) (lex "f(10, 20) + 30")))))) -(let ((e (extend-environment - (make-environment) - (list (@ (c preprocessor2) defined-macro))))) - (test-group "defined() macro" - (test-equal "defined(NOT_DEFINED)" - (lex "0") (remove-noexpand ((unval resolve-token-stream 1) e (lex "defined(X)")))) - (test-equal "defined(DEFINED)" - (lex "1") (remove-noexpand ((unval resolve-token-stream 1) - (extend-environment - e (list (object-like-macro identifier: "X" - body: (lex "10")))) - (lex "defined(X)")))))) - (let ((env (resolve-define (make-environment) (lex "f(x) x+1")))) @@ -1243,5 +1230,24 @@ a b #endif")) + + (test-group "defined without parenthesis" + (test-equal "negative" + (lex "b") + (run "#if defined X +a +#else +b +#endif")) + + (test-equal "positive" + (lex "a") + (run "#define X +#if defined X +a +#else +b +#endif"))) + ;; TODO test advanced constant expression )) -- cgit v1.2.3