(define-module (c preprocessor2) :use-module (srfi srfi-1) :use-module (srfi srfi-71) :use-module (srfi srfi-88) :use-module (c cpp-environment) :use-module (c eval2) :use-module ((c cpp-environment function-like-macro) :select (function-like-macro variadic? identifier-list)) :use-module ((c cpp-environment object-like-macro) :select (object-like-macro object-like-macro?)) :use-module ((c cpp-environment internal-macro) :select (internal-macro)) :use-module ((hnh util) :select (-> ->> intersperse aif swap unless unval break/all)) :use-module ((hnh util lens) :select (set modify cdr*)) :use-module (hnh util path) :use-module (hnh util type) :use-module (hnh util object) :use-module ((hnh util values) :select (abort* on-fst on-snd apply/values)) :use-module ((c lex2) :select (lex placemaker lexeme? lexeme-body lexeme-noexpand tokenize )) :use-module (c unlex) :use-module (c cpp-types) :use-module (c cpp-util) :export (_Pragma-macro defined-macro c-search-path handle-preprocessing-tokens)) (define (read-file path) (call-with-input-file path (@ (ice-9 rdelim) read-string))) (define-syntax-rule (alist-of variable key-type value-type) (build-validator-body variable (list-of (pair-of key-type value-type)))) (define (list-of-length lst n) (= n (length lst))) (define parameter-map? (of-type? (alist-of string? (list-of lexeme?)))) (define (concat-token? token) (and (equal? "##" (punctuator-token? token)) (not (member "##" (lexeme-noexpand token))))) (define (stringify-token? token) (equal? "#" (punctuator-token? token))) (define (left-parenthesis-token? token) (equal? "(" (punctuator-token? token))) (define (right-parenthesis-token? token) (equal? ")" (punctuator-token? token))) (define (comma-token? token) (equal? "," (punctuator-token? token))) (define (ellipsis-token? token) (equal? "..." (punctuator-token? token))) ;; TODO ;; > #if defined X ;; is equivalent to ;; > #if defined(X) ;; parameters is a lexeme list, as returned by parse-parameter-list (define (build-parameter-map macro parameters) (typecheck macro cpp-macro?) (typecheck parameters (list-of (list-of lexeme?))) (map (lambda (pair) (modify pair cdr* drop-whitespace-both)) (if (macro-variadic? macro) (let ((head rest (split-at parameters (length (macro-identifier-list macro))))) (cons (cons "__VA_ARGS__" (concatenate (intersperse (lex ",") rest))) (map cons (macro-identifier-list macro) head))) (map cons (macro-identifier-list macro) parameters)))) (define (expand# macro parameter-map) (typecheck macro cpp-macro?) (typecheck parameter-map parameter-map?) (let loop ((tokens (macro-body macro))) (cond ((null? tokens) '()) ((stringify-token? (car tokens)) (let* ((head rest (car+cdr (drop-whitespace (cdr tokens)))) (x (identifier-token? head))) (cond ((assoc-ref parameter-map x) => (lambda (tokens) (cons (stringify-tokens tokens) (loop rest)))) (else (scm-error 'macro-expand-error "expand#" "'#' is not followed by a macro parameter: ~s" (list x) #f))))) (else (cons (car tokens) (loop (cdr tokens))))))) ;; 6.10.3.3 (define (expand## tokens) ;; (typecheck tokens (list-of lexeme?)) (let loop ((left '()) (right tokens)) (cond ((null? right) (reverse left)) ((concat-token? (car right)) (let ((l (drop-whitespace left)) (r (drop-whitespace (cdr right)))) (cond ((or (null? l) (null? r)) (scm-error 'cpp-error "expand##" "## can't be first or last token: ~s" (list (unlex tokens)) #f)) ((and (placemaker-token? (car l)) (placemaker-token? (car r))) (loop (cdr l) (cons (placemaker) (cdr r)))) ((placemaker-token? (car l)) (loop (cdr l) r)) ((placemaker-token? (car r)) (loop (cdr l) (cons (car l) (cdr r)))) (else ;; 6.10.3.3 p. 3 ;; I believe that ## is the only special case where the ;; result of concatenation is differente from the token directly. (let ((token (concatenate-tokens (car l) (car r)))) (let ((token (if (concat-token? token) (modify token lexeme-noexpand xcons "##") token))) (loop (cdr l) (cons token (cdr r))))))))) (else (let ((pre post (break concat-token? right))) (loop (append left (reverse pre)) post)))))) (define (check-arity macro parameters) (if (variadic? macro) (unless (>= (length parameters) (length (macro-identifier-list macro))) (scm-error 'cpp-arity-error "apply-macro" "Too few arguments to variadic macro ~s, expected at least ~s, got ~s" (list (macro-identifier macro) (length (macro-identifier-list macro)) (length parameters)) (list macro))) (unless (or (and (= 0 (length (macro-identifier-list macro))) (= 1 (length parameters)) (null? (car parameters))) (= (length (macro-identifier-list macro)) (length parameters))) (scm-error 'cpp-arity-error "apply-macro" "Wrong number of arguments to macro ~s, expected ~s, got ~s" (list (macro-identifier macro) (length (macro-identifier-list macro)) (length parameters)) (list macro))))) ;; expand function like macro ;; parameter is a list of lexeme-lists, each "top level" element matching one ;; argument to the macro (define (apply-macro environment macro parameters) (typecheck environment cpp-environment?) ;; Each element should be the lexeme list for that argument (typecheck parameters (list-of (list-of lexeme?))) (typecheck macro cpp-macro?) (check-arity macro parameters) (let () (define (resolve-cpp-variables tokens parameter-map) (define (bound-identifier? id) (assoc-ref parameter-map id)) ;; expand parameters, and place placemaker tokens (let loop ((tokens tokens) (last #f)) (cond ((null? tokens) '()) ((identifier-token? (car tokens)) bound-identifier? => (lambda (id) (let ((replacement (assoc-ref parameter-map id))) (if (null? replacement) (cons (placemaker) (loop (cdr tokens) #f)) ;; macroexpand replacement here! But only if the token isn't used with ## (or #) (append (if (or (concat-token? last) (next-token-matches? concat-token? tokens)) replacement ;; resolve-token-stream only modifies environment by updating current line ;; that can't happen in a macro body ((unval resolve-token-stream 1) environment replacement once?: #t)) (loop (cdr tokens) #f)))))) ((whitespace-token? (car tokens)) (cons (car tokens) (loop (cdr tokens) last))) (else (cons (car tokens) (loop (cdr tokens) (car tokens))))))) (define parameter-map (build-parameter-map macro parameters)) (remove placemaker-token? (-> macro (expand# parameter-map) (resolve-cpp-variables parameter-map) expand##)))) ;; Expand object-like macro ;; #define VALUE 10 ;; #define str(x) #x ;; #define OTHER str(VALUE) ;; OTHER ;; ⇒ "VALUE" ;; remaining-tokens should be the token stream just after the name of the macro (define (expand-macro environment macro noexpand-list remaining-tokens) (typecheck environment cpp-environment?) (typecheck macro cpp-macro?) ;; (typecheck remaining-tokens (list-of lexeme?)) (typecheck noexpand-list (list-of string?)) (let ((name (macro-identifier macro))) (cond ((object-macro? macro) (values environment (append (fold (swap mark-noexpand) (expand## (macro-body macro)) (cons name noexpand-list)) remaining-tokens))) ((function-macro? macro) (if (next-token-matches? left-parenthesis-token? remaining-tokens) (let ((containing remaining newlines (parse-parameter-list remaining-tokens))) (values (bump-line environment newlines) (append (fold (swap mark-noexpand) (apply-macro environment macro containing) (cons name noexpand-list)) remaining))) (values environment ;; TODO#1 the token shouldn't be expanded here, but it should neither be marked no-expand? ;; Consider the case ;; #define m(a) a(0,1) ;; #define f(a) f(2 * (a)) ;; m(f) (append (mark-noexpand (lex (macro-identifier macro)) (macro-identifier macro)) remaining-tokens)))) ((internal-macro? macro) (if (next-token-matches? left-parenthesis-token? remaining-tokens) (let ((containing remaining newlines (parse-parameter-list remaining-tokens))) (let ((env* tokens* ((macro-body macro) environment containing))) (values (bump-line env* newlines) (append (fold (swap mark-noexpand) tokens* (cons name noexpand-list)) remaining)))) (values environment (append (mark-noexpand (lex (macro-identifier macro)) (macro-identifier macro)) remaining-tokens)))) (else (scm-error 'wrong-type-arg "expand-macro" "Macro isn't a macro: ~s" (list macro) #f))))) (define-type (parenthesis-group) (parenthesis-group-tokens type: (list-of (or lexeme? parenthesis-group?)))) (define (make-parenthesis-group tokens) (parenthesis-group parenthesis-group-tokens: tokens)) (define (flatten-group tokens) (cond ((null? tokens) '()) ((lexeme? (car tokens)) (cons (car tokens) (flatten-group (cdr tokens)))) ((parenthesis-group? (car tokens)) (append (lex "(") (flatten-group (parenthesis-group-tokens (car tokens))) (lex ")") (flatten-group (cdr tokens)))))) ;; Takes a list of preprocessing tokens, and returns three values ;; - if the last token was '...' ;; - a list of strings of all token names ;; - the remaining tokens ;; Note that this is ONLY #define f(x) forms ;; not usage forms (define (parse-identifier-list tokens) ;; (typecheck tokens (list-of lexeme?)) (let* ((group remaining (parse-group (drop-whitespace tokens))) (groups (reverse (map drop-whitespace-both (break/all comma-token? (parenthesis-group-tokens group)))))) ;; Checks that there where no nested parenthesis (cond ((equal? '(()) groups) (values #f '() remaining)) (else (typecheck groups (list-of (and (list-of-length 1) (list-of lexeme?)))) (let ((variadic? groups (if (ellipsis-token? (caar groups)) (values #t (cdr groups)) (values #f groups)))) (values variadic? (map (lambda (x) (or (identifier-token? x) (scm-error 'cpp-error "parse-identifier-list" "Unexpected preprocessing-token in identifier list: ~s" (list x) #f))) (map car (reverse groups))) remaining)))))) (define (newline-count group) (let loop ((tokens (parenthesis-group-tokens group))) (fold (lambda (item nls) (+ nls (cond ((newline-token? item) 1) ((parenthesis-group? item) (newline-count item)) (else 0)))) 0 tokens))) ;; tokens ⇒ parenthesis-group, remaining-tokens (define (parse-group tokens) (typecheck tokens (not null?)) (typecheck (car tokens) left-parenthesis-token?) (let loop ((stack '()) (remaining tokens)) (cond ((and (not (null? stack)) (null? (cdr stack)) (car stack)) parenthesis-group? => (lambda (item) (values item remaining))) ((null? remaining) (scm-error 'misc-error "parse-group" "Ran out of tokens while parsing: ~s (stack: ~s)" (list (unlex tokens) stack) #f)) (else (let ((token remaining (car+cdr remaining))) (loop (cond ((right-parenthesis-token? token) (let ((group rest (break left-parenthesis-token? stack))) (cons (make-parenthesis-group (reverse group)) ;; Remove left-parenthesis (cdr rest)))) (else (cons token stack))) remaining)))))) ;; returns three values: ;; - a list of tokens where each is a parameter to the function like macro ;; - the remaining tokenstream ;; - how many newlines were encountered ;; The standard might call these "replacement lists" ;; Note that each returned token-list might have padding whitespace which should be trimmed. ;; It's kept to allow __VA_ARGS__ to "remember" its whitespace (define (parse-parameter-list tokens) (let ((group remaining (parse-group (drop-whitespace tokens)))) ;; Checks that no inner groups where here ;; (typecheck tokens (list-of lexeme?)) (values (map flatten-group (break/all comma-token? (parenthesis-group-tokens group))) remaining (newline-count group)))) ;; Add __FILE__ and __LINE__ object macros to the environment (define (join-file-line environment) (extend-environment environment ;; 6.10.8 (list (object-like-macro identifier: "__FILE__" body: (lex (format #f "~s" (current-file environment)))) (object-like-macro identifier: "__LINE__" body: (lex (number->string (current-line environment))))))) (define defined-macro (internal-macro identifier: "defined" body: (lambda (environment arguments) (typecheck arguments (and (list-of (list-of lexeme?)) (not null?))) (aif (identifier-token? (car (list-ref arguments 0))) (let ((in-env (boolean->c-boolean (in-environment? environment it)))) (values environment (lex (number->string in-env)))) (scm-error 'cpp-error "defined" "Invalid parameter list to `defined': ~s" (list arguments) #f))))) (define _Pragma-macro (internal-macro identifier: "_Pragma" body: (lambda (environment arguments) (typecheck arguments (and (list-of (list-of lexeme?)) (not null?))) (aif (string-token? (caar arguments)) (values (handle-pragma environment (lex it)) '()) (scm-error 'cpp-pragma-error "_Pragma" "Invalid argument to _Pragma: ~s" (list (car arguments)) #f))))) ;; environment, tokens → environment (define (handle-pragma environment tokens) (typecheck environment cpp-environment?) ;; (typecheck tokens (list-of lexeme?)) (let ((err (lambda () (scm-error 'cpp-pragma-error "handle-pragma" "Invalid pragma directive: ~a" (list (unlex tokens)) #f)))) (cond ((null? tokens) (err)) ((equal? "STDC" (identifier-token? (car tokens))) (call-with-values (lambda () (apply values (filter identifier-token? (cdr tokens)))) (case-lambda ((identifier on-off-switch) (format (current-output-port) "#Pragma STDC ~a ~a" (unlex (list identifier)) (unlex (list on-off-switch))) environment) (_ (err))))) (else (format (current-output-port) "Non-standard #Pragma: ~a" (unlex tokens)) environment)))) ;; 6.10.1 p. 4 (define (resolve-constant-expression cpp-tokens) ;; (typecheck tokens (list-of lexeme?)) (define zero (car (lex "0"))) #; (define tokens (map preprocessing-token->token (map (lambda (token) (cond ((identifier-token? token) zero) (else token))) (remove whitespace-token? tokens)))) 'TODO ;; eval as per 6.6 ) (define (mark-noexpand1 token name) (modify token lexeme-noexpand xcons name)) (define (mark-noexpand tokens name) ;; (typecheck tokens (list-of lexeme?)) ;; (typecheck name string?) (map (lambda (token) (mark-noexpand1 token name)) tokens)) (define (marked-noexpand? token) (cond ((identifier-token? token) => (lambda (id) (member id (lexeme-noexpand token)))) (else #f))) ;; Expands a token-stream which contains no pre-processing directives (#if:s, ...) ;; If @var{once?} is true then the resulting body won't be scanned again for tokens to expand ;; environment, tokens, [boolean] → environment, tokens (define* (resolve-token-stream environment tokens key: once?) (typecheck environment cpp-environment?) ;; (typecheck tokens (list-of lexeme?)) ;; (pprint-environment environment) ;; (format (current-error-port) "~a~%~%" (unlex tokens)) (let loop ((environment environment) (tokens tokens)) (cond ((null? tokens) (values environment '())) ((newline-token? (car tokens)) (on-snd (cons (car tokens) (abort* (loop (bump-line environment) (cdr tokens)))))) ((and (identifier-token? (car tokens)) (not (marked-noexpand? (car tokens)))) ;; Here is the loop after expansion (apply/values (if once? values loop) (maybe-extend-identifier environment (identifier-token? (car tokens)) (lexeme-noexpand (car tokens)) (cdr tokens)))) (else (on-snd (cons (car tokens) (abort* (loop environment (cdr tokens))))))))) ;; returns a new environment ;; handle body of #if ;; environment, (list token) → environment (define (resolve-for-if environment tokens) (typecheck environment cpp-environment?) ;; (typecheck tokens (list-of lexeme?)) (-> (extend-environment environment (list defined-macro)) ;; no newlines in #if line ((unval resolve-token-stream 1) tokens) resolve-constant-expression c-boolean->boolean (if (enter-active-if environment) (enter-inactive-if environment)))) ;; environment, string, (list token) → environment, (list token) (define (maybe-extend-identifier environment identifier noexpand-list remaining-tokens) (typecheck environment cpp-environment?) (typecheck identifier string?) ;; (typecheck remaining-tokens (list-of lexeme?)) (typecheck noexpand-list (list-of string?)) (cond ((get-identifier (join-file-line environment) identifier) => (lambda (value) (expand-macro (join-file-line environment) value noexpand-list remaining-tokens))) (else ; It wasn't an identifier, leave it as is (values environment (append (mark-noexpand (lex identifier) identifier) remaining-tokens))))) ;; 'gcc -xc -E -v /dev/null' prints GCC:s search path (define c-search-path (make-parameter (list "/usr/include" "/usr/local/include"))) ;; #include (define (resolve-h-file string) (typecheck string string?) (cond ;; NOTE do I want this case? ;; GCC has it ((path-absolute? string) string) (else (or (find file-exists? (map (lambda (path-prefix) (path-append path-prefix string)) (c-search-path))) (scm-error 'cpp-error "resolve-h-file" "Can't resolve file: ~s" (list string) #f))))) ;; #include "myheader.h" (define (resolve-q-file string) (typecheck string string?) (cond ((file-exists? string) string) ;; This should always be a fallback (6.10.2, p. 3) (else (resolve-h-file string)))) (define (resolve-header environment tokens) (typecheck environment cpp-environment?) ;; (typecheck tokens (list-of lexeme?)) (let ((err (lambda (msg . args) (scm-error 'cpp-error "resolve-and-include-header" (string-append msg ", tokens: ~s") (append args (list (unlex tokens))) #f)))) (let loop ((%first-time #t) (tokens tokens)) (cond ((null? tokens) (err "Invalid #include line")) ((h-string-token? (car tokens)) => (lambda (str) (unless (null? (drop-whitespace (cdr tokens))) (err "Unexpected tokens after #include <>")) (resolve-h-file str))) ((q-string-token? (car tokens)) => (lambda (str) (unless (null? (drop-whitespace (cdr tokens))) (err "Unexpected tokens after #include \"\"")) (resolve-q-file str))) (else (unless %first-time (err "Failed parsing tokens")) ;; No newlines in #include (loop #f ((unval resolve-token-stream 1) environment tokens))))))) ;; environment, tokens → environment (define (handle-line-directive environment tokens*) (typecheck environment cpp-environment?) ;; (typecheck tokens* (list-of lexeme?)) (let ((err (lambda () (scm-error 'cpp-error "handle-line-directive" "Invalid line directive: ~s" (list tokens*) #f)))) (let loop ((%first-time #t) (tokens tokens*)) (cond ((null? tokens)) ((pp-number? (car tokens)) => (lambda (line) (let ((line (string->number line)) (remaining (drop-whitespace (cdr tokens)))) (cond ((null? remaining) (set environment current-line (1- line))) ((string-token? (car remaining)) => (lambda (file) (-> environment (set current-line (1- line)) (set current-file file)))) ;; no newlines in #line (%first-time (loop #f ((unval resolve-token-stream 1) environment tokens))) (else (err)))))) ;; no newlines in #line (%first-time (loop #f ((unval resolve-token-stream 1) environment tokens))) (else (err)))))) ;; environment, tokens → environment (define (resolve-define environment tokens) (typecheck environment cpp-environment?) ;; (typecheck tokens (list-of lexeme?)) (let ((identifier (identifier-token? (car tokens))) (tail (cdr tokens))) (-> environment bump-line (add-identifier identifier (cond ((and (not (null? tail)) (left-parenthesis-token? (car tail))) ;; function like macro (let ((variadic? identifiers replacement-list (parse-identifier-list tail))) (function-like-macro identifier: identifier variadic?: variadic? identifier-list: identifiers ;; surrounding whitespace is not part of the replacement list ;; (6.10.3 p.7) body: (drop-whitespace-both replacement-list)))) (else (object-like-macro identifier: identifier body: (drop-whitespace-both tail)))))))) ;; environment, tokens -> environment, tokens (define (handle-preprocessing-tokens environment tokens) ;; Prepend a newline to ensure that the token stream always starts with a ;; newline (otherwise guaranteed by how we loop). Decrement line-counter ;; by one to compensate. (let loop ((environment (bump-line environment -1)) (tokens (append (lex "\n") tokens))) (define (err fmt . args) (scm-error 'cpp-error "handle-preprocessing-tokens" (string-append "~a:~a " fmt) (cons* (current-file environment) (current-line environment) args) #f)) (cond ((null? tokens) (values environment '())) ((newline-token? (car tokens)) (let ((environment (bump-line environment)) (tokens* (drop-whitespace (cdr tokens)))) (cond ((null? tokens*) (values environment '())) ((equal? "#" (punctuator-token? (car tokens*))) (let ((line-tokens remaining-tokens (tokens-until-eol (cdr tokens*)))) ;; drop whitespace after to not "eat" the next newline token (let ((line-tokens (drop-whitespace line-tokens))) (cond ((null? line-tokens) ;; null directive (loop environment remaining-tokens)) ((in-conditional/inactive? environment) (case (string->symbol (identifier-token? (car line-tokens))) ((ifdef if) (loop (enter-inactive-if environment) remaining-tokens)) ((else) (loop (flip-flop-if environment) remaining-tokens)) ((endif) (loop (leave-if environment) remaining-tokens)) ((elif) (loop (resolve-for-if (leave-if environment) (drop-whitespace (cdr line-tokens))) remaining-tokens)) (else (loop environment remaining-tokens)))) ;; From here on we are not in a comment block (else (let ((directive (string->symbol (identifier-token? (car line-tokens)))) (body (drop-whitespace (cdr line-tokens)))) (if (eq? 'include directive) ;; include is special since it returns a token stream (let ((path (resolve-header environment body))) ;; TODO change to store source location in lexemes ;; and rewrite the following to ;; (loop environment ;; (append (-> path read-file tokenize) remaining-tokens)) ;; TODO and then transfer these source locations when we move ;; to "real" tokens (c to-token) (let ((env* tokens* (loop ;; same hack as at start of loop (-> environment (enter-file path) (bump-line -1)) (append (lex "\n") (-> path read-file tokenize))))) (on-snd (append tokens* (abort* (loop (leave-file env*) remaining-tokens)))))) (let ((operation ; (environment, list token) → environment (case directive ((if) resolve-for-if) ((ifdef) (lambda (env body) ((if (in-environment? env (identifier-token? (car body))) enter-active-if enter-inactive-if) env))) ((ifndef) (lambda (env body) ((if (in-environment? env (identifier-token? (car body))) enter-inactive-if enter-active-if) env))) ;; NOTE possibly validate that body is empty for endif and else ((endif) (lambda (env _) (unless (in-conditional? env) (err "#endif outside conditional")) (leave-if env))) ((else) (lambda (env _) (unless (in-conditional? env) (err "#else outside conditional")) (flip-flop-if env))) ;; ((elif) (lambda )) ((define) resolve-define) ((undef) (lambda (env body) (remove-identifier env (identifier-token? (car body))))) ((line) handle-line-directive) ((error) (lambda (_ tokens) (throw 'cpp-error-directive (unlex tokens)))) ((pragma) handle-pragma) (else (err "Unknown preprocessing directive: ~s" (list line-tokens)))))) (loop (operation environment body) remaining-tokens))))))))) ;; Line is not a pre-processing directive (else (let ((preceding-tokens remaining-tokens (tokens-until-cpp-directive (cdr tokens)))) (let* ((env* resolved-tokens (if (in-conditional/inactive? environment) (values environment '()) (resolve-token-stream environment preceding-tokens)))) (on-snd (append resolved-tokens ;; The initial newline is presreved here, for better output, ;; and to keep at least one whitespace token when there was one previously. ;; possibly also keep a newline for line-directives. (unless (null? remaining-tokens) (lex "\n")) (abort* (loop env* remaining-tokens)))))))))) (else (err "Unexpected middle of line, (near ~s)" (unlex tokens))))))