From 1393ce3878e5d14214631fb83d58c819a7849b18 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Hugo=20H=C3=B6rnquist?= Date: Thu, 7 Jul 2022 18:40:27 +0200 Subject: work. --- module/c/preprocessor2.scm | 302 +++++++++++++++++++++++++++++---------------- 1 file changed, 198 insertions(+), 104 deletions(-) (limited to 'module/c/preprocessor2.scm') diff --git a/module/c/preprocessor2.scm b/module/c/preprocessor2.scm index 19daabfb..e99b1049 100644 --- a/module/c/preprocessor2.scm +++ b/module/c/preprocessor2.scm @@ -16,82 +16,120 @@ :use-module ((c line-fold) :select (fold-lines)) :export ()) +;; Returns two values: +;; - tokens until a newline token is met +;; - (potentially the newline token) and the remaining tokens (define (tokens-until-eol tokens) (break (lambda (token) (equal? token '(whitespace "\n"))) tokens)) +;; match in predicates so non-lists fail properly (define (whitespace-token? token) - (eq? 'whitespace (car token))) + (match token + (`(whitespace ,_) #t) + (_ #f))) + +(define (unwrap-preprocessing-token token) + (match token + (`(preprocessing-token ,x) x) + (_ (scm-error 'wrong-type-arg "unwrap-preprocessing-token" + "Not a preprocessing token: ~s" (list token) + #f)))) (define (preprocessing-token? token) - (eq? 'preprocessing-token token)) + (catch 'wrong-type-arg + (lambda () (unwrap-preprocessing-token token)) + (const #f))) + +;; Replace all whitespace with single spaces. (define (squeeze-whitespace tokens) (match tokens ('() '()) - (`((whitespace ,_) (whitespace ,_) ,rest ...) + ((`(whitespace ,_) `(whitespace ,_) rest ...) (squeeze-whitespace (cons '(whitespace " ") rest))) - (`((whitespace ,_) ,rest ...) + ((`(whitespace ,_) rest ...) (cons '(whitespace " ") (squeeze-whitespace rest))) ((token rest ...) (cons token (squeeze-whitespace rest))))) -(define (stringify-token token) - ;; TODO propperly implement this - `(preprocessing-token - (string-literal ,(with-output-to-string (lambda () (display token)))))) +;; Returns the "source" of the token, as a preprocessing string literal token +(define (stringify-token unwrapped-preprocessing-token) + (match unwrapped-preprocessing-token + (`(string-literal ,s) + (format #f "~s" s)) + (`(header-name (q-string ,s)) + (format #f "~s" s)) + (`(header-name (h-string ,s)) + (format #f "<~a>" s)) + (`(identifier ,id) id) + (`(pp-number ,n) n) + (`(character-constant ,c) + (format #f "'~a'" c)) + (`(punctuator ,p) p))) (define (stringify-tokens tokens) - (with-output-to-string - (lambda () - (for-each (compose display stringify-token) - (squeeze-whitespace tokens))))) + `(preprocessing-token + (string-literal + ,(string-concatenate + (map (match-lambda (`(preprocessing-token ,body) (stringify-token body)) + (`(whitespace ,_) " ")) + (squeeze-whitespace tokens)))))) ;; Expand ## tokens ;; TODO (define (expand-join macro tokens) tokens) -;; expand function like macro -(define (apply-macro environment macro parameters) - (define parameter-map - (if (macro-variadic? macro) - (let ((head rest (split-at parameters (length (macro-identifier-list macro))))) - ;; TODO commas (,) should be interleaved with rest - (cons (cons "__VA_ARGS__" rest) - (map cons (macro-identifier-list macro) head))) - (map cons - (macro-identifier-list macro) - parameters))) - - ;; resolve strigify operators - (define stringify-resolved - (let loop ((tokens (macro-body macro))) - (match tokens - (`((preprocessing-token (punctuator "#")) - (whitespace ,_) ... - (preprocessing-token (identifier ,x)) - ,rest ...) - (unless (member x (macro-identifier-list macro)) - (scm-error 'macro-expand-error "apply-macro" - "'#' is not followed by a macro parameter: ~s" - (list x) #f) +;; parameters is a lexeme list, as returned by parse-parameter-list +(define (build-parameter-map macro parameters) + (if (macro-variadic? macro) + (let ((head rest (split-at parameters (length (macro-identifier-list macro))))) + ;; TODO commas (,) should be interleaved with rest + (cons (cons "__VA_ARGS__" rest) + (map cons (macro-identifier-list macro) head))) + (map cons + (macro-identifier-list macro) + parameters))) + +;; Drop leading whitespace tokens +(define (drop-whitespace tokens) + (drop-while whitespace-token? tokens)) + +(define (drop-whitespace-right tokens) + (-> tokens reverse drop-whitespace reverse)) + +(define (drop-whitespace-both tokens) + (-> tokens + drop-whitespace + drop-whitespace-right)) + +(define (expand-stringifiers macro parameter-map) + (let loop ((tokens (macro-body macro))) + (match tokens + (('(preprocessing-token (punctuator "#")) + rest ...) + (match (drop-whitespace rest) + ((`(preprocessing-token (identifier ,x)) rest ...) + (unless (member x (macro-identifier-list macro)) + (scm-error 'macro-expand-error "expand-stringifiers" + "'#' is not followed by a macro parameter: ~s" + (list x) #f)) (cons (stringify-tokens (assoc-ref parameter-map x)) - (loop rest)))) - ('() '()) - ((token rest ...) - (cons token (loop rest)))))) - - ;; TODO - ;; - resolve ## - (define resulting-body - (expand-join macro stringify-resolved)) + (loop rest))))) + ('() '()) + ((token rest ...) + (cons token (loop rest)))))) - ;; - subtitute parameters - ;; TODO what am I doing here? - (expand-macro (-> environment - (extend-environment parameter-map)) - resulting-body)) +;; expand function like macro +(define (apply-macro environment macro parameters) + (define parameter-map (build-parameter-map macro parameters)) + (define stringify-resolved (expand-stringifiers macro parameter-map)) + ;; TODO resolve ## + (define resulting-body stringify-resolved #; (expand-join macro stringify-resolved) + ) + (resolve-token-stream (extend-environment environment parameter-map) + resulting-body)) @@ -103,8 +141,10 @@ ;; OTHER ;; ⇒ "VALUE" +;; token should be the token stream just after the name of the macro (define (expand-macro environment macro tokens) (cond ((object-macro? macro) + ;; Shouldn't we expand the macro body here? (values environment (append (macro-body macro) tokens))) ((function-macro? macro) @@ -118,7 +158,12 @@ (let ((containing remaining newlines (parse-parameter-list tokens))) (values (bump-line environment newlines) (append ((macro-body macro) environment containing) - remaining)))))) + remaining)))) + + (else + (scm-error 'wrong-type-arg "expand-macro" + "Macro isn't a macro: ~s" + (list macro) #f)))) ;; Takes a list of preprocessing tokens, and returns two values ;; if the last token was '...' @@ -130,16 +175,19 @@ (match tokens ('() (values #f (reverse done))) - ((`(preprocessing-token (punctuation "..."))) - (values #t (reverse done))) - ((`(preprocessing-token (identifier ,id)) rest ...) (loop rest (cons id done))) - ((`(preprocessing-token (punctuation "...")) rest ...) + ((`(preprocessing-token (punctuator "..."))) + (values #t (reverse done))) + + ((`(preprocessing-token (punctuator "...")) rest ...) (scm-error 'cpp-error "parse-identifier-list" - "'...' only allowed as last argument in identifier list" - '() #f)) + "'...' only allowed as last argument in identifier list. Rest: ~s" + (list rest) #f)) + + ((`(preprocessing-token (punctuator ",")) rest ...) + (loop rest done)) ((`(preprocessing-token ,other) rest ...) (scm-error 'cpp-error "parse-identifier-list" @@ -147,10 +195,23 @@ (list other) #f))))) + +;; helper procedure to parse-parameter-list. +;; If a parameter is complex then whitespace is kept, but squeezed to single spaces. Surounding whitespace is removed. +;; Example: +;; #define str(x, y) #y +;; str(x, ( 2, 4 ) ) +;; expands to: +;; "( 2, 4 )" +;; 6.10.3.2 p 2 +(define (cleanup-whitespace tokens) + (-> tokens drop-whitespace-both squeeze-whitespace)) + ;; returns three values: ;; - a list of tokens where each is a parameter to the function like macro ;; - the remaining tokenstream ;; - how many newlines were encountered +;; The standard might call these "replacement lists" (define (parse-parameter-list tokens) (let %loop ((depth 0) (newlines 0) (current '()) (parameters '()) (tokens tokens) (%first-iteration? #t)) @@ -162,26 +223,45 @@ current (cons (car tokens) current)))) (match tokens - (`((whitespace "\n") ,rest ...) + (('(whitespace "\n") rest ...) (loop rest newlines: (1+ newlines) current: current*)) - (`((whitespace ,_) ,rest ...) + ((`(whitespace ,_) rest ...) (loop rest current: current*)) - (`((preprocessing-token (punctuator "(")) ,rest ...) + (('(preprocessing-token (punctuator "(")) rest ...) (loop rest depth: (1+ depth) current: current*)) - (`((preprocessing-token (punctuator ")")) ,rest ...) + (('(preprocessing-token (punctuator ")")) rest ...) (if (= 1 depth) - (values (reverse (cons (reverse current) parameters)) + ;; return value + (values + (if (null? parameters) + (cond ((null? current) '()) + ((every whitespace-token? current) '()) + (else (reverse + (cons (cleanup-whitespace (reverse current)) + parameters)))) + (reverse + (cond ((null? current) parameters) + ((every whitespace-token? current) parameters) + (else (cons (cleanup-whitespace (reverse current)) + parameters))))) + rest newlines) (loop rest depth: (1- depth) current: current*))) - (`((preprocessing-token (punctuator ",")) ,rest ...) + (('(preprocessing-token (punctuator ",")) rest ...) (if (= 1 depth) (loop rest current: '() - parameters: (cons (reverse current) parameters)) - (loop rest current: current*))))))) + parameters: + (cons (cond ((null? current) '()) + ((every whitespace-token? current) '()) + (else (cleanup-whitespace (reverse current)))) + parameters)) + (loop rest current: current*))) + ((_ rest ...) + (loop rest current: current*)))))) (define (join-file-line environment) @@ -190,8 +270,13 @@ (extend-environment environment ;; 6.10.8 - `(("__FILE__" . (preprocessing-token (string-literal ,file))) - ("__LINE__" . (preprocessing-token (pp-number ,(number->string line))))))) + (list + (object-like-macro + identifier: "__FILE__" + body: `((preprocessing-token (string-literal ,file)))) + (object-like-macro + identifier: "__LINE__" + body: `((preprocessing-token (pp-number ,(number->string line)))))))) (define (c-search-path) (make-parameter (list "." "/usr/include"))) @@ -219,8 +304,8 @@ identifier: "defined" body: (lambda (environment tokens) (match tokens - (`((preprocessor-token (identifier ,id))) - `(preprocessor-token (pp-number ,(boolean->c-boolean (in-environment? environment id))))) + (`((preprocessing-token (identifier ,id))) + `(preprocessing-token (pp-number ,(boolean->c-boolean (in-environment? environment id))))) (_ (scm-error 'cpp-error "defined" "Invalid parameter list to `defined': ~s" (list tokens) #f)))))) @@ -252,14 +337,15 @@ 'TODO ) +;; Expands a token-stream which contains no pre-processing directives (#if:s, ...) (define (resolve-token-stream environment tokens) (let loop ((tokens tokens)) (match tokens ('() '()) - (`((preprocessing-token (identifier ,id)) ,rest ...) + ((`(preprocessing-token (identifier ,id)) rest ...) (call-with-values (lambda () (maybe-extend-identifier environment id rest)) (lambda (_ tokens) (loop tokens)))) - (`((whitespace ,_) ,rest ...) + ((`(whitespace ,_) rest ...) (loop rest)) ((token rest ...) (cons token (loop rest)))))) @@ -278,13 +364,16 @@ ;; environment, string, (list token) → environment, (list token) (define (maybe-extend-identifier environment identifier remaining-tokens) (cond ((get-identifier environment identifier) - => (lambda (value) (expand-macro (join-file-line environment) value remaining-tokens))) + => (lambda (value) (expand-macro (join-file-line environment) + value + remaining-tokens))) (else ; It wasn't an identifier, leave it as is + ;; TODO shouldn't we include the identifier in the remaining tokens stream? (values environment remaining-tokens)))) (define (resolve-and-include-header environment tokens) (let loop ((%first-time #t) (tokens tokens)) - (match (drop-while whitespace-token? tokens) + (match (drop-whitespace tokens) ((`(header-name (h-string ,str)) rest ...) (cond ((remove whitespace-token? rest) (negate null?) @@ -341,27 +430,28 @@ ;; environment, tokens → environment (define (resolve-define environment tokens) (match tokens - (`((preprocessing-token (identifier ,identifier)) tail ...) + ((`(preprocessing-token (identifier ,identifier)) tail ...) (-> environment bump-line (add-identifier! identifier - (if (equal? '(preprocessing-token (punctuator "(")) (car tail)) - ;; function like macro - (call-with-values (lambda () (break (lambda (token) (equal? token '(preprocessing-token (punctuator ")")))) - (cdr tail))) - (lambda (identifier-list replacement-list) - (let ((variadic? identifiers (parse-identifier-list identifier-list))) - (function-like-macro - identifier: identifier - variadic?: variadic? - identifier-list: identifiers - ;; NOTE 6.10.3 states that there needs to be at least on whitespace here - body: (cdr replacement-list))))) - - (object-like-macro - identifier: identifier - body: tail))))))) + (match tail + (('(preprocessing-token (punctuator "(")) rest ...) + ;; function like macro + (call-with-values (lambda () (break (lambda (token) (equal? token '(preprocessing-token (punctuator ")")))) + rest)) + (lambda (identifier-list replacement-list) + (let ((variadic? identifiers (parse-identifier-list identifier-list))) + + (function-like-macro + identifier: identifier + variadic?: variadic? + identifier-list: identifiers + ;; NOTE 6.10.3 states that there needs to be at least on whitespace here + body: (cdr replacement-list)))))) + (_ (object-like-macro + identifier: identifier + body: tail)))))))) @@ -376,14 +466,18 @@ args) #f)) + ;; TODO all of this needs to be surounded with a conditional for + ;; environmentns if status. However, ensure that each directive + ;; starts at start of line + (match tokens ('() '()) - (`((whitespace "\n") (whitespace ,_) ... (preprocessing-token (puntuator "#")) ,rest ...) + ((`(whitespace "\n") `(whitespace ,_) '... '(preprocessing-token (puntuator "#")) rest ...) ;; Line tokens are those in this line, ;; while remaining tokens are the newline, follewed by the rest of the files tokens (let ((line-tokens remaining-tokens (tokens-until-eol rest))) ;; Actual tokens just removes all whitespace between "#" and "define" - (let ((actual-tokens (drop-while whitespace-token? line-tokens))) + (let ((actual-tokens (drop-whitespace line-tokens))) (if (null? actual-tokens) (loop (bump-line environment) remaining-tokens) (match (car actual-tokens) @@ -393,7 +487,7 @@ (`(preprocessing-token (identifier "ifdef")) (match actual-tokens - (`((preprocessing-token (identifier ,id)) ,_ ...) + ((`(preprocessing-token (identifier ,id)) _ ...) (loop ((if (in-environment? environment id) enter-active-if enter-inactive-if) @@ -403,7 +497,7 @@ (`(preprocessing-token (identifier "ifndef")) (match actual-tokens - (`((preprocessing-token (identifier ,id)) ,_ ...) + ((`(preprocessing-token (identifier ,id)) _ ...) (loop ((if (in-environment? environment id) enter-inactive-if enter-active-if) @@ -411,49 +505,49 @@ remaining-tokens)) (_ (err "Non identifier in ifndef: ~s" actual-tokens)))) - (`(preprocessing-token (identifier "else")) + ('(preprocessing-token (identifier "else")) ;; TODO 'TODO ) - (`(preprocessing-token (identifier "elif")) + ('(preprocessing-token (identifier "elif")) (-> environment leave-if (resolve-for-if actual-tokens) (loop remaining-tokens))) - (`(preprocessing-token (identifier "endif")) + ('(preprocessing-token (identifier "endif")) (loop (leave-if environment) remaining-tokens)) - (`(preprocessing-token (identifier "include")) + ('(preprocessing-token (identifier "include")) (call-with-values (lambda () (resolve-and-include-header environment (cdr actual-tokens))) (lambda (environment tokens) (loop environment (append tokens remaining-tokens))))) - (`(preprocessing-token (identifier "define")) + ('(preprocessing-token (identifier "define")) (let ((env (resolve-define environment (cdr actual-tokens)))) (loop env remaining-tokens)) ) - (`(preprocessing-token (identifier "undef")) + ('(preprocessing-token (identifier "undef")) (loop (match actual-tokens (`((preprocessing-token (identifier ,id))) (-> environment bump-line (remove-identifier! id)))) remaining-tokens)) - (`(preprocessing-token (identifier "line")) + ('(preprocessing-token (identifier "line")) (loop (handle-line-directive environment actual-tokens) remaining-tokens)) - (`(preprocessing-token (identifier "error")) + ('(preprocessing-token (identifier "error")) ;; NOTE this is an "expected" error (throw 'cpp-error actual-tokens)) - (`(preprocessing-token (identifier "pragma")) + ('(preprocessing-token (identifier "pragma")) (loop (handle-pragma environment actual-tokens) remaining-tokens))))))) - (`((preprocessing-token (identifier ,id)) ,rest ...) + ((`(preprocessing-token (identifier ,id)) rest ...) (call-with-values (lambda () (maybe-extend-identifier environment id rest)) loop)) -- cgit v1.2.3