aboutsummaryrefslogtreecommitdiff
path: root/module/c/preprocessor2.scm
diff options
context:
space:
mode:
Diffstat (limited to 'module/c/preprocessor2.scm')
-rw-r--r--module/c/preprocessor2.scm752
1 files changed, 752 insertions, 0 deletions
diff --git a/module/c/preprocessor2.scm b/module/c/preprocessor2.scm
new file mode 100644
index 00000000..3f9552c5
--- /dev/null
+++ b/module/c/preprocessor2.scm
@@ -0,0 +1,752 @@
+(define-module (c preprocessor2)
+ :use-module (srfi srfi-1)
+ :use-module (srfi srfi-71)
+ :use-module (srfi srfi-88)
+
+ :use-module (c cpp-environment)
+ :use-module (c eval2)
+ :use-module ((c cpp-environment function-like-macro)
+ :select (function-like-macro variadic? identifier-list))
+ :use-module ((c cpp-environment object-like-macro) :select (object-like-macro object-like-macro?))
+ :use-module ((c cpp-environment internal-macro) :select (internal-macro))
+ :use-module ((hnh util) :select (-> ->> intersperse aif swap unless unval break/all))
+ :use-module ((hnh util lens) :select (set modify cdr*))
+ :use-module (hnh util path)
+ :use-module (hnh util type)
+ :use-module (hnh util object)
+ :use-module ((hnh util values) :select (abort* on-fst on-snd apply/values))
+ :use-module ((c lex2)
+ :select (lex
+ placemaker
+ lexeme?
+ lexeme-body
+ lexeme-noexpand
+
+ tokenize
+ ))
+ :use-module (c unlex)
+ :use-module (c cpp-types)
+ :use-module (c cpp-util)
+ :export (_Pragma-macro
+ defined-macro
+ c-search-path
+ handle-preprocessing-tokens))
+
+
+(define (read-file path)
+ (call-with-input-file path (@ (ice-9 rdelim) read-string)))
+
+
+
+(define-syntax-rule (alist-of variable key-type value-type)
+ (build-validator-body variable (list-of (pair-of key-type value-type))))
+
+(define (list-of-length lst n)
+ (= n (length lst)))
+
+(define parameter-map? (of-type? (alist-of string? (list-of lexeme?))))
+
+(define (concat-token? token) (and (equal? "##" (punctuator-token? token))
+ (not (member "##" (lexeme-noexpand token)))))
+(define (stringify-token? token) (equal? "#" (punctuator-token? token)))
+(define (left-parenthesis-token? token) (equal? "(" (punctuator-token? token)))
+(define (right-parenthesis-token? token) (equal? ")" (punctuator-token? token)))
+(define (comma-token? token) (equal? "," (punctuator-token? token)))
+(define (ellipsis-token? token) (equal? "..." (punctuator-token? token)))
+
+
+;; TODO
+;; > #if defined X
+;; is equivalent to
+;; > #if defined(X)
+
+
+;; parameters is a lexeme list, as returned by parse-parameter-list
+(define (build-parameter-map macro parameters)
+ (typecheck macro cpp-macro?)
+ (typecheck parameters (list-of (list-of lexeme?)))
+ (map (lambda (pair) (modify pair cdr* drop-whitespace-both))
+ (if (macro-variadic? macro)
+ (let ((head rest (split-at parameters (length (macro-identifier-list macro)))))
+ (cons (cons "__VA_ARGS__" (concatenate (intersperse
+ (lex ",")
+ rest)))
+ (map cons (macro-identifier-list macro) head)))
+ (map cons
+ (macro-identifier-list macro)
+ parameters))))
+
+(define (expand# macro parameter-map)
+ (typecheck macro cpp-macro?)
+ (typecheck parameter-map parameter-map?)
+ (let loop ((tokens (macro-body macro)))
+ (cond ((null? tokens) '())
+ ((stringify-token? (car tokens))
+ (let* ((head rest (car+cdr (drop-whitespace (cdr tokens))))
+ (x (identifier-token? head)))
+ (cond ((assoc-ref parameter-map x)
+ => (lambda (tokens)
+ (cons (stringify-tokens tokens)
+ (loop rest))))
+ (else
+ (scm-error 'macro-expand-error "expand#"
+ "'#' is not followed by a macro parameter: ~s"
+ (list x) #f)))))
+ (else (cons (car tokens)
+ (loop (cdr tokens)))))))
+
+
+;; 6.10.3.3
+(define (expand## tokens)
+ ;; (typecheck tokens (list-of lexeme?))
+
+ (let loop ((left '())
+ (right tokens))
+ (cond ((null? right)
+ (reverse left))
+ ((concat-token? (car right))
+ (let ((l (drop-whitespace left))
+ (r (drop-whitespace (cdr right))))
+ (cond ((or (null? l) (null? r))
+ (scm-error 'cpp-error "expand##"
+ "## can't be first or last token: ~s"
+ (list (unlex tokens)) #f))
+ ((and (placemaker-token? (car l))
+ (placemaker-token? (car r)))
+ (loop (cdr l) (cons (placemaker) (cdr r))))
+ ((placemaker-token? (car l))
+ (loop (cdr l) r))
+ ((placemaker-token? (car r))
+ (loop (cdr l) (cons (car l) (cdr r))))
+ (else
+ ;; 6.10.3.3 p. 3
+ ;; I believe that ## is the only special case where the
+ ;; result of concatenation is differente from the token directly.
+ (let ((token (concatenate-tokens (car l) (car r))))
+ (let ((token (if (concat-token? token)
+ (modify token lexeme-noexpand xcons "##")
+ token)))
+ (loop (cdr l) (cons token (cdr r)))))))))
+ (else
+ (let ((pre post (break concat-token? right)))
+ (loop (append left (reverse pre)) post))))))
+
+
+(define (check-arity macro parameters)
+ (if (variadic? macro)
+ (unless (>= (length parameters)
+ (length (macro-identifier-list macro)))
+ (scm-error 'cpp-arity-error "apply-macro"
+ "Too few arguments to variadic macro ~s, expected at least ~s, got ~s"
+ (list (macro-identifier macro)
+ (length (macro-identifier-list macro))
+ (length parameters))
+ (list macro)))
+ (unless (or (and (= 0 (length (macro-identifier-list macro)))
+ (= 1 (length parameters))
+ (null? (car parameters)))
+ (= (length (macro-identifier-list macro))
+ (length parameters)))
+ (scm-error 'cpp-arity-error "apply-macro"
+ "Wrong number of arguments to macro ~s, expected ~s, got ~s"
+ (list (macro-identifier macro)
+ (length (macro-identifier-list macro))
+ (length parameters))
+ (list macro)))))
+
+;; expand function like macro
+;; parameter is a list of lexeme-lists, each "top level" element matching one
+;; argument to the macro
+(define (apply-macro environment macro parameters)
+ (typecheck environment cpp-environment?)
+ ;; Each element should be the lexeme list for that argument
+ (typecheck parameters (list-of (list-of lexeme?)))
+ (typecheck macro cpp-macro?)
+ (check-arity macro parameters)
+
+ (let ()
+
+ (define (resolve-cpp-variables tokens parameter-map)
+ (define (bound-identifier? id)
+ (assoc-ref parameter-map id))
+
+ ;; expand parameters, and place placemaker tokens
+ (let loop ((tokens tokens) (last #f))
+ (cond ((null? tokens) '())
+ ((identifier-token? (car tokens))
+ bound-identifier?
+ => (lambda (id)
+ (let ((replacement (assoc-ref parameter-map id)))
+ (if (null? replacement)
+ (cons (placemaker) (loop (cdr tokens) #f))
+ ;; macroexpand replacement here! But only if the token isn't used with ## (or #)
+ (append
+ (if (or (concat-token? last)
+ (next-token-matches? concat-token? tokens))
+ replacement
+ ;; resolve-token-stream only modifies environment by updating current line
+ ;; that can't happen in a macro body
+ ((unval resolve-token-stream 1) environment replacement once?: #t))
+ (loop (cdr tokens) #f))))))
+ ((whitespace-token? (car tokens))
+ (cons (car tokens) (loop (cdr tokens) last)))
+ (else (cons (car tokens) (loop (cdr tokens) (car tokens)))))))
+
+
+ (define parameter-map (build-parameter-map macro parameters))
+ (remove placemaker-token?
+ (-> macro
+ (expand# parameter-map)
+ (resolve-cpp-variables parameter-map)
+ expand##))))
+
+
+
+;; Expand object-like macro
+
+;; #define VALUE 10
+;; #define str(x) #x
+;; #define OTHER str(VALUE)
+;; OTHER
+;; ⇒ "VALUE"
+
+;; remaining-tokens should be the token stream just after the name of the macro
+(define (expand-macro environment macro noexpand-list remaining-tokens)
+ (typecheck environment cpp-environment?)
+ (typecheck macro cpp-macro?)
+ ;; (typecheck remaining-tokens (list-of lexeme?))
+ (typecheck noexpand-list (list-of string?))
+
+ (let ((name (macro-identifier macro)))
+ (cond ((object-macro? macro)
+ (values environment (append (fold (swap mark-noexpand)
+ (expand## (macro-body macro))
+ (cons name noexpand-list))
+ remaining-tokens)))
+
+ ((function-macro? macro)
+ (if (next-token-matches? left-parenthesis-token? remaining-tokens)
+ (let ((containing remaining newlines (parse-parameter-list remaining-tokens)))
+ (values (bump-line environment newlines)
+ (append (fold (swap mark-noexpand)
+ (apply-macro environment macro containing)
+ (cons name noexpand-list))
+ remaining)))
+ (values environment
+ ;; TODO#1 the token shouldn't be expanded here, but it should neither be marked no-expand?
+ ;; Consider the case
+ ;; #define m(a) a(0,1)
+ ;; #define f(a) f(2 * (a))
+ ;; m(f)
+ (append (mark-noexpand (lex (macro-identifier macro)) (macro-identifier macro))
+ remaining-tokens))))
+
+ ((internal-macro? macro)
+ (if (next-token-matches? left-parenthesis-token? remaining-tokens)
+ (let ((containing remaining newlines (parse-parameter-list remaining-tokens)))
+ (let ((env* tokens* ((macro-body macro) environment containing)))
+ (values (bump-line env* newlines)
+ (append (fold (swap mark-noexpand)
+ tokens*
+ (cons name noexpand-list))
+ remaining))))
+ (values environment
+ (append (mark-noexpand (lex (macro-identifier macro)) (macro-identifier macro))
+ remaining-tokens))))
+
+ (else
+ (scm-error 'wrong-type-arg "expand-macro"
+ "Macro isn't a macro: ~s"
+ (list macro) #f)))))
+
+
+
+(define-type (parenthesis-group)
+ (parenthesis-group-tokens
+ type: (list-of (or lexeme? parenthesis-group?))))
+
+(define (make-parenthesis-group tokens)
+ (parenthesis-group parenthesis-group-tokens: tokens))
+
+
+(define (flatten-group tokens)
+ (cond ((null? tokens) '())
+ ((lexeme? (car tokens))
+ (cons (car tokens) (flatten-group (cdr tokens))))
+ ((parenthesis-group? (car tokens))
+ (append (lex "(")
+ (flatten-group (parenthesis-group-tokens (car tokens)))
+ (lex ")")
+ (flatten-group (cdr tokens))))))
+
+
+;; Takes a list of preprocessing tokens, and returns three values
+;; - if the last token was '...'
+;; - a list of strings of all token names
+;; - the remaining tokens
+;; Note that this is ONLY #define f(x) forms
+;; not usage forms
+(define (parse-identifier-list tokens)
+ ;; (typecheck tokens (list-of lexeme?))
+ (let* ((group remaining (parse-group (drop-whitespace tokens)))
+ (groups (reverse (map drop-whitespace-both
+ (break/all comma-token? (parenthesis-group-tokens group))))))
+ ;; Checks that there where no nested parenthesis
+ (cond ((equal? '(()) groups)
+ (values #f '() remaining))
+ (else
+ (typecheck groups (list-of (and (list-of-length 1)
+ (list-of lexeme?))))
+
+ (let ((variadic? groups (if (ellipsis-token? (caar groups))
+ (values #t (cdr groups))
+ (values #f groups))))
+ (values
+ variadic?
+ (map (lambda (x) (or (identifier-token? x)
+ (scm-error 'cpp-error "parse-identifier-list"
+ "Unexpected preprocessing-token in identifier list: ~s"
+ (list x) #f)))
+ (map car (reverse groups)))
+ remaining))))))
+
+
+
+(define (newline-count group)
+ (let loop ((tokens (parenthesis-group-tokens group)))
+ (fold (lambda (item nls)
+ (+ nls
+ (cond ((newline-token? item) 1)
+ ((parenthesis-group? item) (newline-count item))
+ (else 0))))
+ 0 tokens)))
+
+;; tokens ⇒ parenthesis-group, remaining-tokens
+(define (parse-group tokens)
+ (typecheck tokens (not null?))
+ (typecheck (car tokens) left-parenthesis-token?)
+
+ (let loop ((stack '()) (remaining tokens))
+ (cond ((and (not (null? stack))
+ (null? (cdr stack))
+ (car stack))
+ parenthesis-group?
+ => (lambda (item) (values item remaining)))
+ ((null? remaining)
+ (scm-error 'misc-error "parse-group"
+ "Ran out of tokens while parsing: ~s (stack: ~s)"
+ (list (unlex tokens) stack) #f))
+ (else
+ (let ((token remaining (car+cdr remaining)))
+ (loop (cond ((right-parenthesis-token? token)
+ (let ((group rest (break left-parenthesis-token? stack)))
+ (cons (make-parenthesis-group (reverse group))
+ ;; Remove left-parenthesis
+ (cdr rest))))
+ (else (cons token stack)))
+ remaining))))))
+
+
+;; returns three values:
+;; - a list of tokens where each is a parameter to the function like macro
+;; - the remaining tokenstream
+;; - how many newlines were encountered
+;; The standard might call these "replacement lists"
+;; Note that each returned token-list might have padding whitespace which should be trimmed.
+;; It's kept to allow __VA_ARGS__ to "remember" its whitespace
+(define (parse-parameter-list tokens)
+ (let ((group remaining (parse-group (drop-whitespace tokens))))
+ ;; Checks that no inner groups where here
+ ;; (typecheck tokens (list-of lexeme?))
+ (values (map flatten-group
+ (break/all comma-token? (parenthesis-group-tokens group)))
+ remaining
+ (newline-count group))))
+
+
+;; Add __FILE__ and __LINE__ object macros to the environment
+(define (join-file-line environment)
+ (extend-environment
+ environment
+ ;; 6.10.8
+ (list
+ (object-like-macro
+ identifier: "__FILE__"
+ body: (lex (format #f "~s" (current-file environment))))
+ (object-like-macro
+ identifier: "__LINE__"
+ body: (lex (number->string (current-line environment)))))))
+
+
+(define defined-macro
+ (internal-macro
+ identifier: "defined"
+ body: (lambda (environment arguments)
+ (typecheck arguments (and (list-of (list-of lexeme?))
+ (not null?)))
+ (aif (identifier-token? (car (list-ref arguments 0)))
+ (let ((in-env (boolean->c-boolean (in-environment? environment it))))
+ (values environment (lex (number->string in-env))))
+ (scm-error 'cpp-error "defined"
+ "Invalid parameter list to `defined': ~s"
+ (list arguments) #f)))))
+
+(define _Pragma-macro
+ (internal-macro
+ identifier: "_Pragma"
+ body: (lambda (environment arguments)
+ (typecheck arguments (and (list-of (list-of lexeme?))
+ (not null?)))
+ (cond ((string-token? (caar arguments))
+ (lambda (a . _) a)
+ ;; TODO handle rest
+ => (lambda (encoding it . rest)
+ (values (handle-pragma environment (lex it))
+ '())))
+ (else (scm-error 'cpp-pragma-error "_Pragma"
+ "Invalid argument to _Pragma: ~s"
+ (list (car arguments)) #f))))))
+
+
+
+;; environment, tokens → environment
+(define (handle-pragma environment tokens)
+ (typecheck environment cpp-environment?)
+ ;; (typecheck tokens (list-of lexeme?))
+
+ (let ((err (lambda ()
+ (scm-error 'cpp-pragma-error "handle-pragma"
+ "Invalid pragma directive: ~a"
+ (list (unlex tokens)) #f))))
+
+ (cond ((null? tokens) (err))
+ ((equal? "STDC" (identifier-token? (car tokens)))
+ (call-with-values (lambda () (apply values (filter identifier-token? (cdr tokens))))
+ (case-lambda ((identifier on-off-switch)
+ (format (current-output-port)
+ "#Pragma STDC ~a ~a"
+ (unlex (list identifier))
+ (unlex (list on-off-switch)))
+ environment)
+ (_ (err)))))
+ (else
+ (format (current-output-port)
+ "Non-standard #Pragma: ~a"
+ (unlex tokens))
+ environment))))
+
+
+;; 6.10.1 p. 4
+(define (resolve-constant-expression cpp-tokens)
+ ;; (typecheck tokens (list-of lexeme?))
+ (define zero (car (lex "0")))
+ #;
+ (define tokens
+ (map preprocessing-token->token
+ (map (lambda (token)
+ (cond ((identifier-token? token) zero)
+ (else token)))
+ (remove whitespace-token? tokens))))
+
+ 'TODO
+ ;; eval as per 6.6
+ )
+
+
+
+(define (mark-noexpand1 token name)
+ (modify token lexeme-noexpand xcons name))
+
+(define (mark-noexpand tokens name)
+ ;; (typecheck tokens (list-of lexeme?))
+ ;; (typecheck name string?)
+ (map (lambda (token) (mark-noexpand1 token name)) tokens))
+
+(define (marked-noexpand? token)
+ (cond ((identifier-token? token)
+ => (lambda (id) (member id (lexeme-noexpand token))))
+ (else #f)))
+
+;; Expands a token-stream which contains no pre-processing directives (#if:s, ...)
+;; If @var{once?} is true then the resulting body won't be scanned again for tokens to expand
+;; environment, tokens, [boolean] → environment, tokens
+(define* (resolve-token-stream environment tokens key: once?)
+ (typecheck environment cpp-environment?)
+ ;; (typecheck tokens (list-of lexeme?))
+ ;; (pprint-environment environment)
+ ;; (format (current-error-port) "~a~%~%" (unlex tokens))
+ (let loop ((environment environment) (tokens tokens))
+ (cond ((null? tokens) (values environment '()))
+ ((newline-token? (car tokens))
+ (on-snd (cons (car tokens) (abort* (loop (bump-line environment) (cdr tokens))))))
+ ((and (identifier-token? (car tokens))
+ (not (marked-noexpand? (car tokens))))
+ ;; Here is the loop after expansion
+ (apply/values (if once? values loop)
+ (maybe-extend-identifier environment
+ (identifier-token? (car tokens))
+ (lexeme-noexpand (car tokens))
+ (cdr tokens))))
+ (else (on-snd (cons (car tokens) (abort* (loop environment (cdr tokens)))))))))
+
+
+
+;; returns a new environment
+;; handle body of #if
+;; environment, (list token) → environment
+(define (resolve-for-if environment tokens)
+ (typecheck environment cpp-environment?)
+ ;; (typecheck tokens (list-of lexeme?))
+
+ (-> (extend-environment environment (list defined-macro))
+ ;; no newlines in #if line
+ ((unval resolve-token-stream 1) tokens)
+ resolve-constant-expression
+ c-boolean->boolean
+ (if (enter-active-if environment)
+ (enter-inactive-if environment))))
+
+;; environment, string, (list token) → environment, (list token)
+(define (maybe-extend-identifier environment identifier noexpand-list remaining-tokens)
+ (typecheck environment cpp-environment?)
+ (typecheck identifier string?)
+ ;; (typecheck remaining-tokens (list-of lexeme?))
+ (typecheck noexpand-list (list-of string?))
+ (cond ((get-identifier (join-file-line environment) identifier)
+ => (lambda (value)
+ (expand-macro (join-file-line environment)
+ value
+ noexpand-list
+ remaining-tokens)))
+ (else ; It wasn't an identifier, leave it as is
+ (values environment
+ (append (mark-noexpand (lex identifier)
+ identifier)
+ remaining-tokens)))))
+
+;; 'gcc -xc -E -v /dev/null' prints GCC:s search path
+(define c-search-path
+ (make-parameter (list "/usr/include"
+ "/usr/local/include")))
+
+;; #include <stdio.h>
+(define (resolve-h-file string)
+ (typecheck string string?)
+ (cond
+ ;; NOTE do I want this case?
+ ;; GCC has it
+ ((path-absolute? string) string)
+ (else
+ (or
+ (find file-exists?
+ (map (lambda (path-prefix)
+ (path-append path-prefix string))
+ (c-search-path)))
+ (scm-error 'cpp-error "resolve-h-file"
+ "Can't resolve file: ~s"
+ (list string) #f)))))
+
+;; #include "myheader.h"
+(define (resolve-q-file string)
+ (typecheck string string?)
+ (cond ((file-exists? string) string)
+ ;; This should always be a fallback (6.10.2, p. 3)
+ (else (resolve-h-file string))))
+
+
+(define (resolve-header environment tokens)
+ (typecheck environment cpp-environment?)
+ ;; (typecheck tokens (list-of lexeme?))
+
+ (let ((err (lambda (msg . args)
+ (scm-error 'cpp-error "resolve-and-include-header"
+ (string-append msg ", tokens: ~s")
+ (append args (list (unlex tokens))) #f))))
+ (let loop ((%first-time #t) (tokens tokens))
+ (cond ((null? tokens) (err "Invalid #include line"))
+ ((h-string-token? (car tokens))
+ => (lambda (str)
+ (unless (null? (drop-whitespace (cdr tokens)))
+ (err "Unexpected tokens after #include <>"))
+ (resolve-h-file str)))
+ ((q-string-token? (car tokens))
+ => (lambda (str)
+ (unless (null? (drop-whitespace (cdr tokens)))
+ (err "Unexpected tokens after #include \"\""))
+ (resolve-q-file str)))
+ (else
+ (unless %first-time (err "Failed parsing tokens"))
+ ;; No newlines in #include
+ (loop #f ((unval resolve-token-stream 1) environment tokens)))))))
+
+;; environment, tokens → environment
+(define (handle-line-directive environment tokens*)
+ (typecheck environment cpp-environment?)
+ ;; (typecheck tokens* (list-of lexeme?))
+
+ (let ((err (lambda () (scm-error 'cpp-error "handle-line-directive"
+ "Invalid line directive: ~s"
+ (list tokens*) #f))))
+ (let loop ((%first-time #t) (tokens tokens*))
+ (cond ((null? tokens))
+ ((pp-number? (car tokens))
+ => (lambda (line)
+ (let ((line (string->number line))
+ (remaining (drop-whitespace (cdr tokens))))
+ (cond ((null? remaining) (set environment current-line (1- line)))
+ ((string-token? (car remaining))
+ (lambda (a . _) a)
+ => (lambda (encoding . fragments)
+ (-> environment
+ (set current-line (1- line))
+ ;; TODO properly join string
+ (set current-file (car fragments)))))
+ ;; no newlines in #line
+ (%first-time (loop #f ((unval resolve-token-stream 1) environment tokens)))
+ (else (err))))))
+ ;; no newlines in #line
+ (%first-time (loop #f ((unval resolve-token-stream 1) environment tokens)))
+ (else (err))))))
+
+;; environment, tokens → environment
+(define (resolve-define environment tokens)
+ (typecheck environment cpp-environment?)
+ ;; (typecheck tokens (list-of lexeme?))
+
+ (let ((identifier (identifier-token? (car tokens)))
+ (tail (cdr tokens)))
+ (-> environment
+ bump-line
+ (add-identifier
+ identifier
+ (cond ((and (not (null? tail))
+ (left-parenthesis-token? (car tail)))
+ ;; function like macro
+ (let ((variadic? identifiers replacement-list
+ (parse-identifier-list tail)))
+ (function-like-macro
+ identifier: identifier
+ variadic?: variadic?
+ identifier-list: identifiers
+ ;; surrounding whitespace is not part of the replacement list
+ ;; (6.10.3 p.7)
+ body: (drop-whitespace-both replacement-list))))
+ (else (object-like-macro
+ identifier: identifier
+ body: (drop-whitespace-both tail))))))))
+
+
+
+
+;; environment, tokens -> environment, tokens
+(define (handle-preprocessing-tokens environment tokens)
+ ;; Prepend a newline to ensure that the token stream always starts with a
+ ;; newline (otherwise guaranteed by how we loop). Decrement line-counter
+ ;; by one to compensate.
+ (let loop ((environment (bump-line environment -1))
+ (tokens (append (lex "\n") tokens)))
+
+ (define (err fmt . args)
+ (scm-error 'cpp-error "handle-preprocessing-tokens"
+ (string-append "~a:~a " fmt)
+ (cons* (current-file environment)
+ (current-line environment)
+ args)
+ #f))
+
+ (cond ((null? tokens) (values environment '()))
+ ((newline-token? (car tokens))
+ (let ((environment (bump-line environment))
+ (tokens* (drop-whitespace (cdr tokens))))
+ (cond ((null? tokens*) (values environment '()))
+ ((equal? "#" (punctuator-token? (car tokens*)))
+ (let ((line-tokens remaining-tokens (tokens-until-eol (cdr tokens*))))
+ ;; drop whitespace after to not "eat" the next newline token
+ (let ((line-tokens (drop-whitespace line-tokens)))
+ (cond ((null? line-tokens)
+ ;; null directive
+ (loop environment remaining-tokens))
+
+ ((in-conditional/inactive? environment)
+ (case (string->symbol (identifier-token? (car line-tokens)))
+ ((ifdef if) (loop (enter-inactive-if environment) remaining-tokens))
+ ((else) (loop (flip-flop-if environment) remaining-tokens))
+ ((endif) (loop (leave-if environment) remaining-tokens))
+ ((elif) (loop (resolve-for-if
+ (leave-if environment)
+ (drop-whitespace (cdr line-tokens)))
+ remaining-tokens))
+ (else (loop environment remaining-tokens))))
+
+ ;; From here on we are not in a comment block
+ (else
+ (let ((directive (string->symbol (identifier-token? (car line-tokens))))
+ (body (drop-whitespace (cdr line-tokens))))
+ (if (eq? 'include directive)
+ ;; include is special since it returns a token stream
+ (let ((path (resolve-header environment body)))
+ ;; TODO change to store source location in lexemes
+ ;; and rewrite the following to
+ ;; (loop environment
+ ;; (append (-> path read-file tokenize) remaining-tokens))
+ ;; TODO and then transfer these source locations when we move
+ ;; to "real" tokens (c to-token)
+ (let ((env* tokens*
+ (loop
+ ;; same hack as at start of loop
+ (-> environment
+ (enter-file path)
+ (bump-line -1))
+ (append (lex "\n")
+ (-> path read-file tokenize)))))
+ (on-snd (append tokens* (abort* (loop (leave-file env*) remaining-tokens))))))
+
+ (let ((operation ; (environment, list token) → environment
+ (case directive
+ ((if) resolve-for-if)
+ ((ifdef)
+ (lambda (env body)
+ ((if (in-environment? env (identifier-token? (car body)))
+ enter-active-if enter-inactive-if)
+ env)))
+ ((ifndef)
+ (lambda (env body)
+ ((if (in-environment? env (identifier-token? (car body)))
+ enter-inactive-if enter-active-if)
+ env)))
+ ;; NOTE possibly validate that body is empty for endif and else
+ ((endif) (lambda (env _)
+ (unless (in-conditional? env)
+ (err "#endif outside conditional"))
+ (leave-if env)))
+ ((else) (lambda (env _)
+ (unless (in-conditional? env)
+ (err "#else outside conditional"))
+ (flip-flop-if env)))
+ ;; ((elif) (lambda ))
+ ((define) resolve-define)
+ ((undef) (lambda (env body) (remove-identifier env (identifier-token? (car body)))))
+ ((line) handle-line-directive)
+ ((error) (lambda (_ tokens)
+ (throw 'cpp-error-directive (unlex tokens))))
+ ((pragma) handle-pragma)
+ (else (err "Unknown preprocessing directive: ~s"
+ (list line-tokens))))))
+ (loop (operation environment body)
+ remaining-tokens)))))))))
+
+ ;; Line is not a pre-processing directive
+ (else (let ((preceding-tokens remaining-tokens (tokens-until-cpp-directive (cdr tokens))))
+ (let* ((env* resolved-tokens (if (in-conditional/inactive? environment)
+ (values environment '())
+ (resolve-token-stream environment preceding-tokens))))
+ (on-snd (append resolved-tokens
+ ;; The initial newline is presreved here, for better output,
+ ;; and to keep at least one whitespace token when there was one previously.
+ ;; possibly also keep a newline for line-directives.
+ (unless (null? remaining-tokens) (lex "\n"))
+ (abort* (loop env* remaining-tokens))))))))))
+
+ (else (err "Unexpected middle of line, (near ~s)"
+ (unlex tokens))))))
+