From 1393ce3878e5d14214631fb83d58c819a7849b18 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Hugo=20H=C3=B6rnquist?= Date: Thu, 7 Jul 2022 18:40:27 +0200 Subject: work. --- module/c/compiler.scm | 21 +- module/c/cpp-environment.scm | 22 ++- module/c/lex2.scm | 13 +- module/c/preprocessor.scm | 34 +--- module/c/preprocessor2.scm | 302 +++++++++++++++++++---------- tests/test/cpp/cpp-environment.scm | 44 +++++ tests/test/cpp/lex2.scm | 24 ++- tests/test/cpp/preprocessor2.scm | 383 +++++++++++++++++++++++++++++++++++-- 8 files changed, 672 insertions(+), 171 deletions(-) create mode 100644 tests/test/cpp/cpp-environment.scm diff --git a/module/c/compiler.scm b/module/c/compiler.scm index 121e6c07..801c3752 100644 --- a/module/c/compiler.scm +++ b/module/c/compiler.scm @@ -2,14 +2,17 @@ :use-module ((c lex2) :select (lex)) :use-module ((c trigraph) :select (replace-trigraphs)) :use-module ((c line-fold) :select (fold-lines)) + :use-module (c cpp-environment object-like-macro) + :use-module ((c cpp-environment) + :select (make-environment + extend-environment + enter-file)) :use-module (hnh util) + ;; TODO importort + ;; handle-preprocessing-tokens + ;; load-and-tokenize-file :export (run-compiler)) -(define (comment->whitespace expr) - (match expr - (('comment _) '(whitespace " ")) - (other other))) - " #define __STDC__ 1 #define __STDC_HOSTED__ 1 @@ -36,17 +39,15 @@ body: `(preprocessing-token (string-literal ,(strftime "%b %_d %Y" now)))) (object-like-macro identifier: "__TIME__" - body: (preprocessing-token - (string-literal - ,(strftime "%H:%M:%S" now)))))) + body: `(preprocessing-token + (string-literal + ,(strftime "%H:%M:%S" now)))))) (define environment (-> (make-environment) (extend-environment default-macros))) -(define (read-file path) - (call-with-input-file path read-string)) ;;; 5.1.11.2 Translation phases diff --git a/module/c/cpp-environment.scm b/module/c/cpp-environment.scm index 20589b8e..3ce754df 100644 --- a/module/c/cpp-environment.scm +++ b/module/c/cpp-environment.scm @@ -43,7 +43,10 @@ (define identifier (cond ((obj:object-like-macro? x) obj:identifier) ((fun:function-like-macro? x) fun:identifier) - ((int:internal-macro? x) int:identifier))) + ((int:internal-macro? x) int:identifier) + (else (scm-error 'wrong-type-arg "macro-identifier" + "Not a macro: ~s" + (list x) #f)))) (identifier x)) @@ -51,7 +54,10 @@ (define body-proc (cond ((obj:object-like-macro? macro) obj:body) ((fun:function-like-macro? macro) fun:body) - ((int:internal-macro? macro) int:body))) + ((int:internal-macro? macro) int:body) + (else (scm-error 'wrong-type-arg "macro-body" + "Not a macro: ~s" + (list macro) #f)))) (body-proc macro)) (define macro-identifier-list fun:identifier-list) @@ -69,9 +75,10 @@ (define-type (cpp-environment) (cpp-if-status type: (list-of (memv '(outside active-if inactive-if))) default: '(outside)) - (cpp-variabes type: hash-table? default: (make-hash-table)) - (cpp-file-stack type: list? - default: '())) + (cpp-variables type: hash-table? default: (make-hash-table)) + (cpp-file-stack type: (and ((negate null?)) + (list-of (pair-of string? exact-integer?))) + default: '(("*outside*" . 1)))) @@ -116,7 +123,7 @@ (scm-error 'wrong-type-arg "add-identifier!" "Key must be a string, got: ~s" (list key) #f)) - (unless (macro? key) + (unless (macro? value) (scm-error 'wrong-type-arg "add-identifier!" "Value must be a macro, got: ~s" (list value) #f)) @@ -131,7 +138,6 @@ (define (extend-environment environment macros) (let ((env (modify environment cpp-variables clone-hash-table))) - (fold (lambda (pair m) - (add-identifier! env (macro-identifier m) m )) + (fold (lambda (m env) (add-identifier! env (macro-identifier m) m)) env macros))) diff --git a/module/c/lex2.scm b/module/c/lex2.scm index 23fa9da4..6083190f 100644 --- a/module/c/lex2.scm +++ b/module/c/lex2.scm @@ -144,7 +144,7 @@ hexadecimal-floating-constant)) ;; (6.4.4.2) -(define-peg-pattern floating-constant all +(define-peg-pattern decimal-floating-constant all (or (and fractional-constant (? exponent-part) (? floating-suffix)) (and digit-sequence exponent-part (? floating-suffix)))) @@ -253,10 +253,12 @@ ;; (6.4.6) (define-peg-pattern punctuator all - (or "[" "]" "(" ")" "{" "}" "." "->" + (or "[" "]" "(" ")" "{" "}" + "..." ; Moved to be before "." + "." "->" "++" "--" "&" "*" "+" "-" "~" "!" "/" "%" "<<" ">>" "<" ">" "<=" ">=" "==" "!=" "^" "|" "&&" "||" - "?" ":" ";" "..." + "?" ":" ";" "=" "*=" "/=" "%=" "+=" "-=" "<<=" ">>=" "&=" "^=" "|=" "," "#" "##" "<:" ":>" "<%" "%>" "%:" "%:%:")) @@ -313,11 +315,12 @@ (define-peg-pattern comment all (or line-comment block-comment)) -(define-peg-pattern preprocessing-tokens body +(define-peg-pattern preprocessing-tokens all (* (or whitespace comment preprocessing-token))) +;; returns a list of lexemes (define (lex string) - (peg:tree (match-pattern preprocessing-tokens string))) + (cdr (peg:tree (match-pattern preprocessing-tokens string)))) diff --git a/module/c/preprocessor.scm b/module/c/preprocessor.scm index 49ecfa27..71712b17 100644 --- a/module/c/preprocessor.scm +++ b/module/c/preprocessor.scm @@ -139,27 +139,6 @@ (every predicate lst)) -(define-type (cpp-environment) - (cpp-if-status type: (list-of? (lambda (x) (memv x '(outside active-if inactive-if)))) - ;; type: (list-of? (memv '(outside active-if inactive-if))) - default: '(outside)) - (cpp-variabes type: hash-table? default: (make-hash-table))) - -(define (make-environment) (cpp-environment)) - -(define (in-envirnoment? environment key) - (hash-get-handle (cpp-variables environment) key)) - -(define (remove-identifier! environment key) - (hash-remove! (cpp-variables environment) key) - environment) - -(define (add-identifier! environment key value) - (assert (string? key)) - (assert (macro? value)) - (hash-set! (cpp-variables environment) key value) - environment) - ;; Parantheses when defining macro (define (parse-parameter-string string) (map string-trim-both @@ -174,15 +153,12 @@ (formals type: (list-of? string?)) (body type: string?)) -(define (macro? x) - (or (object-macro? x) - (function-macro? x))) ;; The interesting part ;; environment, (list string) -> (values (list string) (list strings)) ;; multiple lines since since a function-like macro can extend over multiple lines -(define (expand-macros environment strings) - ) +;; (define (expand-macros environment strings) +;; ) (define (crash-if-not-if body guilty) @@ -235,7 +211,7 @@ ((elif) (case (car (cpp-if-status environment)) ((outside) (crash-if-not-if (directive-body m) "elif")) - (else ;; TODO + (else 'TODO ;; TODO ) )) @@ -272,7 +248,7 @@ xcons 'inactive-if) (cdr lines) done)) - (else ;; TODO + (else 'TODO ;; TODO ))) @@ -286,7 +262,7 @@ ((#\") (handle-file environment filename)))))) (else (scm-error 'cpp-error "parse-directives" "Invalid include" - '() #f)))) + '() #f))))) ((define) ;; TODO what are valid names? (cond ((string-match "^(\\w+)([(][^)]*[)])?\\s+(.*)" (directive-body directive?)) diff --git a/module/c/preprocessor2.scm b/module/c/preprocessor2.scm index 19daabfb..e99b1049 100644 --- a/module/c/preprocessor2.scm +++ b/module/c/preprocessor2.scm @@ -16,82 +16,120 @@ :use-module ((c line-fold) :select (fold-lines)) :export ()) +;; Returns two values: +;; - tokens until a newline token is met +;; - (potentially the newline token) and the remaining tokens (define (tokens-until-eol tokens) (break (lambda (token) (equal? token '(whitespace "\n"))) tokens)) +;; match in predicates so non-lists fail properly (define (whitespace-token? token) - (eq? 'whitespace (car token))) + (match token + (`(whitespace ,_) #t) + (_ #f))) + +(define (unwrap-preprocessing-token token) + (match token + (`(preprocessing-token ,x) x) + (_ (scm-error 'wrong-type-arg "unwrap-preprocessing-token" + "Not a preprocessing token: ~s" (list token) + #f)))) (define (preprocessing-token? token) - (eq? 'preprocessing-token token)) + (catch 'wrong-type-arg + (lambda () (unwrap-preprocessing-token token)) + (const #f))) + +;; Replace all whitespace with single spaces. (define (squeeze-whitespace tokens) (match tokens ('() '()) - (`((whitespace ,_) (whitespace ,_) ,rest ...) + ((`(whitespace ,_) `(whitespace ,_) rest ...) (squeeze-whitespace (cons '(whitespace " ") rest))) - (`((whitespace ,_) ,rest ...) + ((`(whitespace ,_) rest ...) (cons '(whitespace " ") (squeeze-whitespace rest))) ((token rest ...) (cons token (squeeze-whitespace rest))))) -(define (stringify-token token) - ;; TODO propperly implement this - `(preprocessing-token - (string-literal ,(with-output-to-string (lambda () (display token)))))) +;; Returns the "source" of the token, as a preprocessing string literal token +(define (stringify-token unwrapped-preprocessing-token) + (match unwrapped-preprocessing-token + (`(string-literal ,s) + (format #f "~s" s)) + (`(header-name (q-string ,s)) + (format #f "~s" s)) + (`(header-name (h-string ,s)) + (format #f "<~a>" s)) + (`(identifier ,id) id) + (`(pp-number ,n) n) + (`(character-constant ,c) + (format #f "'~a'" c)) + (`(punctuator ,p) p))) (define (stringify-tokens tokens) - (with-output-to-string - (lambda () - (for-each (compose display stringify-token) - (squeeze-whitespace tokens))))) + `(preprocessing-token + (string-literal + ,(string-concatenate + (map (match-lambda (`(preprocessing-token ,body) (stringify-token body)) + (`(whitespace ,_) " ")) + (squeeze-whitespace tokens)))))) ;; Expand ## tokens ;; TODO (define (expand-join macro tokens) tokens) -;; expand function like macro -(define (apply-macro environment macro parameters) - (define parameter-map - (if (macro-variadic? macro) - (let ((head rest (split-at parameters (length (macro-identifier-list macro))))) - ;; TODO commas (,) should be interleaved with rest - (cons (cons "__VA_ARGS__" rest) - (map cons (macro-identifier-list macro) head))) - (map cons - (macro-identifier-list macro) - parameters))) - - ;; resolve strigify operators - (define stringify-resolved - (let loop ((tokens (macro-body macro))) - (match tokens - (`((preprocessing-token (punctuator "#")) - (whitespace ,_) ... - (preprocessing-token (identifier ,x)) - ,rest ...) - (unless (member x (macro-identifier-list macro)) - (scm-error 'macro-expand-error "apply-macro" - "'#' is not followed by a macro parameter: ~s" - (list x) #f) +;; parameters is a lexeme list, as returned by parse-parameter-list +(define (build-parameter-map macro parameters) + (if (macro-variadic? macro) + (let ((head rest (split-at parameters (length (macro-identifier-list macro))))) + ;; TODO commas (,) should be interleaved with rest + (cons (cons "__VA_ARGS__" rest) + (map cons (macro-identifier-list macro) head))) + (map cons + (macro-identifier-list macro) + parameters))) + +;; Drop leading whitespace tokens +(define (drop-whitespace tokens) + (drop-while whitespace-token? tokens)) + +(define (drop-whitespace-right tokens) + (-> tokens reverse drop-whitespace reverse)) + +(define (drop-whitespace-both tokens) + (-> tokens + drop-whitespace + drop-whitespace-right)) + +(define (expand-stringifiers macro parameter-map) + (let loop ((tokens (macro-body macro))) + (match tokens + (('(preprocessing-token (punctuator "#")) + rest ...) + (match (drop-whitespace rest) + ((`(preprocessing-token (identifier ,x)) rest ...) + (unless (member x (macro-identifier-list macro)) + (scm-error 'macro-expand-error "expand-stringifiers" + "'#' is not followed by a macro parameter: ~s" + (list x) #f)) (cons (stringify-tokens (assoc-ref parameter-map x)) - (loop rest)))) - ('() '()) - ((token rest ...) - (cons token (loop rest)))))) - - ;; TODO - ;; - resolve ## - (define resulting-body - (expand-join macro stringify-resolved)) + (loop rest))))) + ('() '()) + ((token rest ...) + (cons token (loop rest)))))) - ;; - subtitute parameters - ;; TODO what am I doing here? - (expand-macro (-> environment - (extend-environment parameter-map)) - resulting-body)) +;; expand function like macro +(define (apply-macro environment macro parameters) + (define parameter-map (build-parameter-map macro parameters)) + (define stringify-resolved (expand-stringifiers macro parameter-map)) + ;; TODO resolve ## + (define resulting-body stringify-resolved #; (expand-join macro stringify-resolved) + ) + (resolve-token-stream (extend-environment environment parameter-map) + resulting-body)) @@ -103,8 +141,10 @@ ;; OTHER ;; ⇒ "VALUE" +;; token should be the token stream just after the name of the macro (define (expand-macro environment macro tokens) (cond ((object-macro? macro) + ;; Shouldn't we expand the macro body here? (values environment (append (macro-body macro) tokens))) ((function-macro? macro) @@ -118,7 +158,12 @@ (let ((containing remaining newlines (parse-parameter-list tokens))) (values (bump-line environment newlines) (append ((macro-body macro) environment containing) - remaining)))))) + remaining)))) + + (else + (scm-error 'wrong-type-arg "expand-macro" + "Macro isn't a macro: ~s" + (list macro) #f)))) ;; Takes a list of preprocessing tokens, and returns two values ;; if the last token was '...' @@ -130,16 +175,19 @@ (match tokens ('() (values #f (reverse done))) - ((`(preprocessing-token (punctuation "..."))) - (values #t (reverse done))) - ((`(preprocessing-token (identifier ,id)) rest ...) (loop rest (cons id done))) - ((`(preprocessing-token (punctuation "...")) rest ...) + ((`(preprocessing-token (punctuator "..."))) + (values #t (reverse done))) + + ((`(preprocessing-token (punctuator "...")) rest ...) (scm-error 'cpp-error "parse-identifier-list" - "'...' only allowed as last argument in identifier list" - '() #f)) + "'...' only allowed as last argument in identifier list. Rest: ~s" + (list rest) #f)) + + ((`(preprocessing-token (punctuator ",")) rest ...) + (loop rest done)) ((`(preprocessing-token ,other) rest ...) (scm-error 'cpp-error "parse-identifier-list" @@ -147,10 +195,23 @@ (list other) #f))))) + +;; helper procedure to parse-parameter-list. +;; If a parameter is complex then whitespace is kept, but squeezed to single spaces. Surounding whitespace is removed. +;; Example: +;; #define str(x, y) #y +;; str(x, ( 2, 4 ) ) +;; expands to: +;; "( 2, 4 )" +;; 6.10.3.2 p 2 +(define (cleanup-whitespace tokens) + (-> tokens drop-whitespace-both squeeze-whitespace)) + ;; returns three values: ;; - a list of tokens where each is a parameter to the function like macro ;; - the remaining tokenstream ;; - how many newlines were encountered +;; The standard might call these "replacement lists" (define (parse-parameter-list tokens) (let %loop ((depth 0) (newlines 0) (current '()) (parameters '()) (tokens tokens) (%first-iteration? #t)) @@ -162,26 +223,45 @@ current (cons (car tokens) current)))) (match tokens - (`((whitespace "\n") ,rest ...) + (('(whitespace "\n") rest ...) (loop rest newlines: (1+ newlines) current: current*)) - (`((whitespace ,_) ,rest ...) + ((`(whitespace ,_) rest ...) (loop rest current: current*)) - (`((preprocessing-token (punctuator "(")) ,rest ...) + (('(preprocessing-token (punctuator "(")) rest ...) (loop rest depth: (1+ depth) current: current*)) - (`((preprocessing-token (punctuator ")")) ,rest ...) + (('(preprocessing-token (punctuator ")")) rest ...) (if (= 1 depth) - (values (reverse (cons (reverse current) parameters)) + ;; return value + (values + (if (null? parameters) + (cond ((null? current) '()) + ((every whitespace-token? current) '()) + (else (reverse + (cons (cleanup-whitespace (reverse current)) + parameters)))) + (reverse + (cond ((null? current) parameters) + ((every whitespace-token? current) parameters) + (else (cons (cleanup-whitespace (reverse current)) + parameters))))) + rest newlines) (loop rest depth: (1- depth) current: current*))) - (`((preprocessing-token (punctuator ",")) ,rest ...) + (('(preprocessing-token (punctuator ",")) rest ...) (if (= 1 depth) (loop rest current: '() - parameters: (cons (reverse current) parameters)) - (loop rest current: current*))))))) + parameters: + (cons (cond ((null? current) '()) + ((every whitespace-token? current) '()) + (else (cleanup-whitespace (reverse current)))) + parameters)) + (loop rest current: current*))) + ((_ rest ...) + (loop rest current: current*)))))) (define (join-file-line environment) @@ -190,8 +270,13 @@ (extend-environment environment ;; 6.10.8 - `(("__FILE__" . (preprocessing-token (string-literal ,file))) - ("__LINE__" . (preprocessing-token (pp-number ,(number->string line))))))) + (list + (object-like-macro + identifier: "__FILE__" + body: `((preprocessing-token (string-literal ,file)))) + (object-like-macro + identifier: "__LINE__" + body: `((preprocessing-token (pp-number ,(number->string line)))))))) (define (c-search-path) (make-parameter (list "." "/usr/include"))) @@ -219,8 +304,8 @@ identifier: "defined" body: (lambda (environment tokens) (match tokens - (`((preprocessor-token (identifier ,id))) - `(preprocessor-token (pp-number ,(boolean->c-boolean (in-environment? environment id))))) + (`((preprocessing-token (identifier ,id))) + `(preprocessing-token (pp-number ,(boolean->c-boolean (in-environment? environment id))))) (_ (scm-error 'cpp-error "defined" "Invalid parameter list to `defined': ~s" (list tokens) #f)))))) @@ -252,14 +337,15 @@ 'TODO ) +;; Expands a token-stream which contains no pre-processing directives (#if:s, ...) (define (resolve-token-stream environment tokens) (let loop ((tokens tokens)) (match tokens ('() '()) - (`((preprocessing-token (identifier ,id)) ,rest ...) + ((`(preprocessing-token (identifier ,id)) rest ...) (call-with-values (lambda () (maybe-extend-identifier environment id rest)) (lambda (_ tokens) (loop tokens)))) - (`((whitespace ,_) ,rest ...) + ((`(whitespace ,_) rest ...) (loop rest)) ((token rest ...) (cons token (loop rest)))))) @@ -278,13 +364,16 @@ ;; environment, string, (list token) → environment, (list token) (define (maybe-extend-identifier environment identifier remaining-tokens) (cond ((get-identifier environment identifier) - => (lambda (value) (expand-macro (join-file-line environment) value remaining-tokens))) + => (lambda (value) (expand-macro (join-file-line environment) + value + remaining-tokens))) (else ; It wasn't an identifier, leave it as is + ;; TODO shouldn't we include the identifier in the remaining tokens stream? (values environment remaining-tokens)))) (define (resolve-and-include-header environment tokens) (let loop ((%first-time #t) (tokens tokens)) - (match (drop-while whitespace-token? tokens) + (match (drop-whitespace tokens) ((`(header-name (h-string ,str)) rest ...) (cond ((remove whitespace-token? rest) (negate null?) @@ -341,27 +430,28 @@ ;; environment, tokens → environment (define (resolve-define environment tokens) (match tokens - (`((preprocessing-token (identifier ,identifier)) tail ...) + ((`(preprocessing-token (identifier ,identifier)) tail ...) (-> environment bump-line (add-identifier! identifier - (if (equal? '(preprocessing-token (punctuator "(")) (car tail)) - ;; function like macro - (call-with-values (lambda () (break (lambda (token) (equal? token '(preprocessing-token (punctuator ")")))) - (cdr tail))) - (lambda (identifier-list replacement-list) - (let ((variadic? identifiers (parse-identifier-list identifier-list))) - (function-like-macro - identifier: identifier - variadic?: variadic? - identifier-list: identifiers - ;; NOTE 6.10.3 states that there needs to be at least on whitespace here - body: (cdr replacement-list))))) - - (object-like-macro - identifier: identifier - body: tail))))))) + (match tail + (('(preprocessing-token (punctuator "(")) rest ...) + ;; function like macro + (call-with-values (lambda () (break (lambda (token) (equal? token '(preprocessing-token (punctuator ")")))) + rest)) + (lambda (identifier-list replacement-list) + (let ((variadic? identifiers (parse-identifier-list identifier-list))) + + (function-like-macro + identifier: identifier + variadic?: variadic? + identifier-list: identifiers + ;; NOTE 6.10.3 states that there needs to be at least on whitespace here + body: (cdr replacement-list)))))) + (_ (object-like-macro + identifier: identifier + body: tail)))))))) @@ -376,14 +466,18 @@ args) #f)) + ;; TODO all of this needs to be surounded with a conditional for + ;; environmentns if status. However, ensure that each directive + ;; starts at start of line + (match tokens ('() '()) - (`((whitespace "\n") (whitespace ,_) ... (preprocessing-token (puntuator "#")) ,rest ...) + ((`(whitespace "\n") `(whitespace ,_) '... '(preprocessing-token (puntuator "#")) rest ...) ;; Line tokens are those in this line, ;; while remaining tokens are the newline, follewed by the rest of the files tokens (let ((line-tokens remaining-tokens (tokens-until-eol rest))) ;; Actual tokens just removes all whitespace between "#" and "define" - (let ((actual-tokens (drop-while whitespace-token? line-tokens))) + (let ((actual-tokens (drop-whitespace line-tokens))) (if (null? actual-tokens) (loop (bump-line environment) remaining-tokens) (match (car actual-tokens) @@ -393,7 +487,7 @@ (`(preprocessing-token (identifier "ifdef")) (match actual-tokens - (`((preprocessing-token (identifier ,id)) ,_ ...) + ((`(preprocessing-token (identifier ,id)) _ ...) (loop ((if (in-environment? environment id) enter-active-if enter-inactive-if) @@ -403,7 +497,7 @@ (`(preprocessing-token (identifier "ifndef")) (match actual-tokens - (`((preprocessing-token (identifier ,id)) ,_ ...) + ((`(preprocessing-token (identifier ,id)) _ ...) (loop ((if (in-environment? environment id) enter-inactive-if enter-active-if) @@ -411,49 +505,49 @@ remaining-tokens)) (_ (err "Non identifier in ifndef: ~s" actual-tokens)))) - (`(preprocessing-token (identifier "else")) + ('(preprocessing-token (identifier "else")) ;; TODO 'TODO ) - (`(preprocessing-token (identifier "elif")) + ('(preprocessing-token (identifier "elif")) (-> environment leave-if (resolve-for-if actual-tokens) (loop remaining-tokens))) - (`(preprocessing-token (identifier "endif")) + ('(preprocessing-token (identifier "endif")) (loop (leave-if environment) remaining-tokens)) - (`(preprocessing-token (identifier "include")) + ('(preprocessing-token (identifier "include")) (call-with-values (lambda () (resolve-and-include-header environment (cdr actual-tokens))) (lambda (environment tokens) (loop environment (append tokens remaining-tokens))))) - (`(preprocessing-token (identifier "define")) + ('(preprocessing-token (identifier "define")) (let ((env (resolve-define environment (cdr actual-tokens)))) (loop env remaining-tokens)) ) - (`(preprocessing-token (identifier "undef")) + ('(preprocessing-token (identifier "undef")) (loop (match actual-tokens (`((preprocessing-token (identifier ,id))) (-> environment bump-line (remove-identifier! id)))) remaining-tokens)) - (`(preprocessing-token (identifier "line")) + ('(preprocessing-token (identifier "line")) (loop (handle-line-directive environment actual-tokens) remaining-tokens)) - (`(preprocessing-token (identifier "error")) + ('(preprocessing-token (identifier "error")) ;; NOTE this is an "expected" error (throw 'cpp-error actual-tokens)) - (`(preprocessing-token (identifier "pragma")) + ('(preprocessing-token (identifier "pragma")) (loop (handle-pragma environment actual-tokens) remaining-tokens))))))) - (`((preprocessing-token (identifier ,id)) ,rest ...) + ((`(preprocessing-token (identifier ,id)) rest ...) (call-with-values (lambda () (maybe-extend-identifier environment id rest)) loop)) diff --git a/tests/test/cpp/cpp-environment.scm b/tests/test/cpp/cpp-environment.scm new file mode 100644 index 00000000..8600c731 --- /dev/null +++ b/tests/test/cpp/cpp-environment.scm @@ -0,0 +1,44 @@ +(define-module (test cpp cpp-environmunt) + :use-module (srfi srfi-64) + :use-module (srfi srfi-88) + :use-module (c cpp-environment) + :use-module (c cpp-environment object-like-macro) + ) + +(let ((e (make-environment))) + (test-equal '(outside) (cpp-if-status e)) + (let ((e* (enter-active-if e))) + (test-equal "Enter works" '(active-if outside) (cpp-if-status e*)) + (test-equal "Original object remainins unmodified" + '(outside) (cpp-if-status e)))) + +(define cpp-file-stack (@@ (c cpp-environment) cpp-file-stack)) + +(let ((e (make-environment))) + (test-equal "Default file stack" '(("*outside*" . 1)) (cpp-file-stack e)) + (let ((e* (enter-file e "test.c"))) + (test-equal "File stack after entering file" + '(("test.c" . 1) ("*outside*" . 1)) (cpp-file-stack e*)) + (let ((e** (bump-line e*))) + (test-equal 2 (current-line e**))))) + + + +(let ((e (make-environment))) + (let ((e* (add-identifier! + e "key" + (object-like-macro + identifier: "key" + body: '((preprocessing-token (identifier "value"))))))) + (let ((result (get-identifier e* "key"))) + (test-assert (macro? result)) + (test-equal '((preprocessing-token (identifier "value"))) + (macro-body result)))) + ;; (get-identifier e "key") here is undefined + ) + +(let ((e (make-environment))) + (let ((result (get-identifier e "key"))) + (test-assert "Missing identifier returns #f" + (not result))) + ) diff --git a/tests/test/cpp/lex2.scm b/tests/test/cpp/lex2.scm index 0342e25c..762ff176 100644 --- a/tests/test/cpp/lex2.scm +++ b/tests/test/cpp/lex2.scm @@ -6,11 +6,11 @@ (test-equal "Integer literal" - '(preprocessing-token (pp-number "10")) + '((preprocessing-token (pp-number "10"))) (lex "10")) (test-equal "String literal" - '(preprocessing-token (string-literal "Hello")) + '((preprocessing-token (string-literal "Hello"))) (lex "\"Hello\"")) @@ -21,13 +21,13 @@ (lex " 10 ")) (test-equal "Char literal" - '(preprocessing-token (character-constant "a")) + '((preprocessing-token (character-constant "a"))) (lex "'a'")) (test-equal "Comment inside string" - '(preprocessing-token (string-literal "Hel/*lo")) + '((preprocessing-token (string-literal "Hel/*lo"))) (lex "\"Hel/*lo\"")) (test-equal "#define line" @@ -62,3 +62,19 @@ (preprocessing-token (punctuator ")"))) (lex "f(1, (2, 3), 4)")) + + +;; Generating a single lexeme +;; (whitespace " ") +;; would also be ok +(test-equal "Grouped whitespace" + '((whitespace " ") + (whitespace " ")) + (lex " ")) + +(test-equal "Newlines get sepparate whitespace tokens" + '((whitespace " ") + (whitespace " ") + (whitespace "\n") + (whitespace " ")) + (lex " \n ")) diff --git a/tests/test/cpp/preprocessor2.scm b/tests/test/cpp/preprocessor2.scm index 117b7e49..3d62e224 100644 --- a/tests/test/cpp/preprocessor2.scm +++ b/tests/test/cpp/preprocessor2.scm @@ -1,22 +1,38 @@ (define-module (test cpp preprocessor2) :use-module (srfi srfi-64) - :use-module (srfi srfi-88)) + :use-module (srfi srfi-64 util) + :use-module (srfi srfi-64 test-error) + :use-module (srfi srfi-71) + :use-module (srfi srfi-88) + :use-module ((hnh util) :select (unval)) + :use-module (c preprocessor2) + :use-module (c cpp-environment) + :use-module (c cpp-environment function-like-macro) + :use-module (c cpp-environment object-like-macro) + :use-module (c lex2)) +;; TODO Not yet implemented +;; (test-expect-fail (test-match-group "Stringify")) +;; (test-expect-fail +;; (test-match-all (test-match-group "Expand stringifiers") +;; (test-match-name "Correct stringification of one param"))) +(define tokens-until-eol (@@ (c preprocessor2) tokens-until-eol)) (test-group "Tokens until End Of Line" - (call-with-values - (lambda () - (tokens-until-eol - '(before (whitespace "\n") after))) - (lambda (bef aft) - (test-equal '(before) bef) - (test-equal '((whitespace "\n") after) aft)))) + (call-with-values + (lambda () + (tokens-until-eol + '(before (whitespace "\n") after))) + (lambda (bef aft) + (test-equal '(before) bef) + (test-equal '((whitespace "\n") after) aft)))) +(define squeeze-whitespace (@@ (c preprocessor2) squeeze-whitespace)) (test-equal "Squeeze whitespace" - '(bef (whitespace " ") aft) + '(bef (whitespace " ") aft) (squeeze-whitespace '(bef (whitespace a) @@ -25,5 +41,350 @@ -(test-equal "(" - (stringify-token '(preprocessor-token (operator "(")))) +(define stringify-token (@@ (c preprocessor2) stringify-token)) +(define stringify-tokens (@@ (c preprocessor2) stringify-tokens)) + +(test-group "Stringify" + (test-equal "(" + (stringify-token '(punctuator "("))) + ;; TODO more cases + + (test-equal (car (lex "\"(a, b)\"")) + (stringify-tokens (lex "(a, b)"))) + ) + + +(define parse-identifier-list (@@ (c preprocessor2) parse-identifier-list)) + +(test-group "Parse identifier list" + (test-group "Single argument" + (let ((rest args (parse-identifier-list (lex "x")))) + (test-assert (not rest)) + (test-equal '("x") args))) + + (test-group "Multiple parameters" + (let ((rest args (parse-identifier-list (lex "x, y")))) + (test-assert (not rest)) + (test-equal '("x" "y") args))) + + + (test-group "Rest args after regular" + (let ((rest args (parse-identifier-list (lex "x, ...")))) + (test-assert rest) + (test-equal '("x") args))) + + (test-group "Only rest args" + (let ((rest args (parse-identifier-list (lex "...")))) + (test-assert rest) + (test-equal '() args))) + + (test-group "Errors" + (test-error "Compound forms are invalid" + 'cpp-error (parse-identifier-list (lex "(y)"))) + + (test-error "Non-identifier atoms are invalid" + 'cpp-error (parse-identifier-list (lex "1"))) + + (test-error "Rest args not at end is invalid" + 'cpp-error (parse-identifier-list (lex "..., y"))))) + + + +(define expand-stringifiers (@@ (c preprocessor2) expand-stringifiers)) +(define build-parameter-map (@@ (c preprocessor2) build-parameter-map)) +(define parse-parameter-list (@@ (c preprocessor2) parse-parameter-list)) +(define cleanup-whitespace (@@ (c preprocessor2) cleanup-whitespace)) + +(test-equal "Clean up whitespace" + (lex "( 2 , 4 )") + (cleanup-whitespace (lex " \n ( 2 , \n 4 ) \t "))) + + +;; Parameter lists (the callsite arguments to the macro) +(test-group "Parameter list" + (test-group "Empty parameter list" + (let ((containing remaining nls (parse-parameter-list (lex "()")))) + (test-equal '() containing) + (test-equal '() remaining) + (test-equal 0 nls))) + + (test-group "Single value in parameter list" + (let ((containing remaining nls (parse-parameter-list (lex "(x)")))) + (test-equal (list (lex "x")) containing) + (test-equal '() remaining) + (test-equal 0 nls))) + + (test-group "Two values in parameter list" + (let ((containing remaining nls (parse-parameter-list (lex "(x, y)")))) + (test-equal (list (lex "x") + (lex "y")) + containing) + (test-equal '() remaining) + (test-equal 0 nls))) + + (test-group "Three values in parameter list" + (let ((containing remaining nls (parse-parameter-list (lex "(x, y, z)")))) + (test-equal (list (lex "x") + (lex "y") + (lex "z")) + containing) + (test-equal '() remaining) + (test-equal 0 nls))) + + (test-group "Numeric parameter" + (let ((containing remaining nls (parse-parameter-list (lex "(1)")))) + (test-equal (list (lex "1")) containing) + (test-equal '() remaining) + (test-equal 0 nls)) + ) + + (test-group "Two values, one of which is a paretheseed pair" + (let ((containing remaining nls + (parse-parameter-list (lex "(x, (y, z))")))) + (test-equal (list (lex "x") (lex "(y, z)")) + containing) + (test-equal '() remaining) + (test-equal 0 nls)))) + +(test-group "Build parameter map" + (test-equal "Simplest case, zero arguments" + '() + (let ((m (function-like-macro + identifier: "str" + identifier-list: '() + body: (lex "#x")))) + (build-parameter-map + m '() #; (list (lex "x")) + ))) + + (test-equal "Single (simple) argument" + `(("x" . ,(lex "x"))) + (let ((m (function-like-macro + identifier: "str" + identifier-list: '("x") + body: '()))) + (build-parameter-map + m + (list (lex "x"))))) + + (test-equal "Single advanced argument" + `(("x" . ,(lex "(x)"))) + (let ((m (function-like-macro + identifier: "str" + identifier-list: '("x") + body: '()))) + (build-parameter-map + m (list (lex "(x)"))))) + + (test-group "Rest arguments" + (test-equal "Single simple" + `(("__VA_ARGS__" . ,(list (lex "x")))) + (let ((m (function-like-macro + identifier: "str" + identifier-list: '() + variadic?: #t + body: '()))) + (build-parameter-map + m (list (lex "x"))))) + + #; + (test-equal "Two simple" + '() + (let ((m (function-like-macro + identifier: "str" + identifier-list: '() + variadic?: #t + body: '()))) + (build-parameter-map + m (list (lex "x"))))) + )) + + + +(test-group "Expand stringifiers" + (let ((m (function-like-macro + identifier: "str" + identifier-list: '("x") + body: (lex "#x")))) + (test-equal "Correct stringification of one param" + (lex "\"10\"") + (expand-stringifiers + m (build-parameter-map + m (list (lex "10")))))) + + (let ((m (function-like-macro + identifier: "str" + identifier-list: '() + body: (lex "#x")))) + (test-error "Stringification fails for non-parameters" + 'macro-expand-error + (expand-stringifiers + m (build-parameter-map + m (list (lex "x"))))))) + +;; TODO expand-join +;; token ## token2 + +(define join-file-line (@@ (c preprocessor2) join-file-line)) + +(let ((e (join-file-line (make-environment)))) + (test-equal (object-like-macro identifier: "__FILE__" + body: '((preprocessing-token (string-literal "*outside*")))) + (get-identifier e "__FILE__")) + (test-equal (object-like-macro identifier: "__LINE__" + body: '((preprocessing-token (pp-number "1")))) + (get-identifier e "__LINE__"))) + +(define resolve-token-stream (@@ (c preprocessor2) resolve-token-stream)) + +(test-group "Token streams" + (test-group "Non-expanding" + (test-equal "Null stream" + '() (resolve-token-stream (make-environment) '())) + (test-equal "Constant resolve to themselves" + (lex "1") (resolve-token-stream (make-environment) (lex "1"))) + (test-equal "Identifier-likes not in environment stay put" + (lex "x") (resolve-token-stream (make-environment) (lex "x"))) + (test-equal "Identifier-likes with stuff after keep stuff after" + (lex "x 1") (resolve-token-stream (make-environment) (lex "x 1")))) + + (test-group "Object likes" + (test-equal "Expansion of single token" + (lex "10") (resolve-token-stream (extend-environment (make-environment) + (list (object-like-macro + identifier: "x" + body: (lex "10")))) + (lex "x"))) + + (test-equal "Expansion keeps stuff after" + (lex "10 1") (resolve-token-stream (extend-environment (make-environment) + (list (object-like-macro + identifier: "x" + body: (lex "10")))) + (lex "x 1"))) + + (test-equal "Multiple object like macros in one stream" + (lex "10 20") + (resolve-token-stream (extend-environment (make-environment) + (list (object-like-macro + identifier: "x" + body: (lex "10")) + (object-like-macro + identifier: "y" + body: (lex "20")))) + (lex "x y"))) + ) + + ;; TODO + + ;; (test-group "Function likes") + + ;; (test-group "Mix of object and function likes") + + ) + +(define expand-macro (@@ (c preprocessor2) expand-macro)) +(define resolve-define (@@ (c preprocessor2) resolve-define)) +(define apply-macro (@@ (c preprocessor2) apply-macro)) +(define maybe-extend-identifier (@@ (c preprocessor2) maybe-extend-identifier)) + +(test-group "Macro expansion" + (test-group "Expand macro part 1" + ;; Expand object like macros + ;; apply-macro depends on this, but expand macro with function like macros + ;; depend on apply-macro, thereby the two parter + (test-group "Object like macros" + (call-with-values + (lambda () (expand-macro (make-environment) + (object-like-macro + identifier: "x" body: (lex "1 + 2")) + '())) + (lambda (_ tokens) (test-equal "Simplest case" (lex "1 + 2") tokens))) + + (call-with-values + (lambda () (expand-macro (make-environment) + (object-like-macro + identifier: "x" body: (lex "1+2")) + (cdr (lex "x something else")))) + (lambda (_ tokens) (test-equal "Expansion with stuff after" + (lex "1+2 something else") tokens))) + + ;; (call-with-values (expand-macro (make-environment))) + + )) + + +(test-group "Maybe extend identifier" + (test-equal "Non-identifier returns remaining" + '() ((unval maybe-extend-identifier 1) + (make-environment) + "x" + '())) + + (test-equal "Non-identifiers remaining tokens are returned verbatim" + '(remaining) ((unval maybe-extend-identifier 1) + (make-environment) + "x" + '(remaining))) + + (test-equal "Object like identifier expands" + (lex "1 + 2") + ((unval maybe-extend-identifier 1) + (extend-environment (make-environment) + (list + (object-like-macro + identifier: "x" + body: (lex "1 + 2")))) + "x" + '())) + + (test-equal "Object like macro still returns remaining verbatim" + (append (lex "1 + 2") '(remaining)) + ((unval maybe-extend-identifier 1) + (extend-environment (make-environment) + (list + (object-like-macro + identifier: "x" + body: (lex "1 + 2")))) + "x" + '(remaining))) + + ) + + (test-group "Apply macro" + (test-equal "zero arg macro on nothing" + (lex "1") + (apply-macro + (make-environment) + (function-like-macro identifier: "f" + identifier-list: '() + body: (lex "1")) + '())) + + (test-equal "Single arg macro" + (lex "10") + (apply-macro + (make-environment) + (function-like-macro identifier: "f" + identifier-list: '("x") + body: (lex "x")) + (lex "10")))) + + (test-group "Expand macro part 2" + (test-group "Function like macros" + (let ((e (make-environment)) + (m (function-like-macro + identifier: "f" + identifier-list: '() + body: (lex "1")))) + (call-with-values (lambda () (expand-macro e m (lex "()"))) + (lambda (_ tokens*) (test-equal (lex "1") tokens*))) + ;; TODO this should raise an arity error + (call-with-values (lambda () (expand-macro e m (lex "(10)"))) + (lambda (_ tokens*) (test-equal '() tokens*))))))) + +(define apply-macro (@@ (c preprocessor2) apply-macro)) + + +;; (resolve-define (make-environment) +;; (lex "f(x) x+1")) -- cgit v1.2.3