(define-module (c preprocessor2) :use-module (srfi srfi-1) :use-module (srfi srfi-71) :use-module (srfi srfi-88) :use-module (c cpp-environment) :use-module (c eval2) :use-module ((c cpp-environment function-like-macro) :select (function-like-macro variadic? identifier-list)) :use-module ((c cpp-environment object-like-macro) :select (object-like-macro object-like-macro?)) :use-module ((c cpp-environment internal-macro) :select (internal-macro)) :use-module ((hnh util) :select (-> intersperse aif swap unless unval)) :use-module ((hnh util lens) :select (set modify cdr*)) :use-module (hnh util path) :use-module (hnh util type) :use-module ((c lex2) :select (lex placemaker lexeme? lexeme-body lexeme-noexpand)) :use-module ((c trigraph) :select (replace-trigraphs)) :use-module ((c line-fold) :select (fold-lines)) :use-module (c unlex) :use-module (c cpp-types) :use-module (c cpp-util) :use-module (ice-9 control) :export (defined-macro _Pragma-macro)) (define-syntax-rule (alist-of variable key-type value-type) (build-validator-body variable (list-of (pair-of key-type value-type)))) (define parameter-map? (of-type? (alist-of string? (list-of lexeme?)))) (define (concat-token? token) (and (equal? "##" (punctuator-token? token)) (not (member "##" (lexeme-noexpand token))))) (define (stringify-token? token) (equal? "#" (punctuator-token? token))) (define (left-parenthesis-token? token) (equal? "(" (punctuator-token? token))) (define (right-parenthesis-token? token) (equal? ")" (punctuator-token? token))) (define (comma-token? token) (equal? "," (punctuator-token? token))) (define (ellipsis-token? token) (equal? "..." (punctuator-token? token))) (define-syntax-rule (abort* form) (call-with-values (lambda () form) abort)) (define-syntax-rule (on-fst form) (% form (lambda (prompt fst . rest) (apply values (prompt fst) rest)))) (define-syntax-rule (on-snd form) (% form (lambda (prompt fst snd . rest) (apply values fst (prompt snd) rest)))) ;; parameters is a lexeme list, as returned by parse-parameter-list (define (build-parameter-map macro parameters) (typecheck macro cpp-macro?) (typecheck parameters (list-of (list-of lexeme?))) (map (lambda (pair) (modify pair cdr* drop-whitespace-both)) (if (macro-variadic? macro) (let ((head rest (split-at parameters (length (macro-identifier-list macro))))) (cons (cons "__VA_ARGS__" (concatenate (intersperse (lex ",") rest))) (map cons (macro-identifier-list macro) head))) (map cons (macro-identifier-list macro) parameters)))) (define (expand# macro parameter-map) (typecheck macro cpp-macro?) (typecheck parameter-map parameter-map?) (let loop ((tokens (macro-body macro))) (cond ((null? tokens) '()) ((stringify-token? (car tokens)) (let* ((head rest (car+cdr (drop-whitespace (cdr tokens)))) (x (identifier-token? head))) (cond ((assoc-ref parameter-map x) => (lambda (tokens) (cons (stringify-tokens tokens) (loop rest)))) (else (scm-error 'macro-expand-error "expand#" "'#' is not followed by a macro parameter: ~s" (list x) #f))))) (else (cons (car tokens) (loop (cdr tokens))))))) ;; 6.10.3.3 (define (expand## tokens) (typecheck tokens (list-of lexeme?)) (let loop ((left '()) (right tokens)) (cond ((null? right) (reverse left)) ((concat-token? (car right)) (let ((l (drop-whitespace left)) (r (drop-whitespace (cdr right)))) (cond ((or (null? l) (null? r)) (scm-error 'cpp-error "expand##" "## can't be first or last token: ~s" (list (unlex tokens)) #f)) ((and (placemaker-token? (car l)) (placemaker-token? (car r))) (loop (cdr l) (cons (placemaker) (cdr r)))) ((placemaker-token? (car l)) (loop (cdr l) r)) ((placemaker-token? (car r)) (loop (cdr l) (cons (car l) (cdr r)))) (else ;; 6.10.3.3 p. 3 ;; I believe that ## is the only special case where the ;; result of concatenation is differente from the token directly. (let ((token (concatenate-tokens (car l) (car r)))) (let ((token (if (concat-token? token) (modify token lexeme-noexpand xcons "##") token))) (loop (cdr l) (cons token (cdr r))))))))) (else (let ((pre post (break concat-token? right))) (loop (append left (reverse pre)) post)))))) (define (check-arity macro parameters) (if (variadic? macro) (unless (>= (length parameters) (length (macro-identifier-list macro))) (scm-error 'cpp-arity-error "apply-macro" "Too few arguments to variadic macro ~s, expected at least ~s, got ~s" (list (macro-identifier macro) (length (macro-identifier-list macro)) (length parameters)) (list macro))) (unless (or (and (= 0 (length (macro-identifier-list macro))) (= 1 (length parameters)) (null? (car parameters))) (= (length (macro-identifier-list macro)) (length parameters))) (scm-error 'cpp-arity-error "apply-macro" "Wrong number of arguments to macro ~s, expected ~s, got ~s" (list (macro-identifier macro) (length (macro-identifier-list macro)) (length parameters)) (list macro))))) ;; expand function like macro ;; parameter is a list of lexeme-lists, each "top level" element matching one ;; argument to the macro (define (apply-macro environment macro parameters) (typecheck environment cpp-environment?) ;; Each element should be the lexeme list for that argument (typecheck parameters (list-of (list-of lexeme?))) (typecheck macro cpp-macro?) (check-arity macro parameters) (let () (define (resolve-cpp-variables tokens parameter-map) (define (bound-identifier? id) (assoc-ref parameter-map id)) ;; expand parameters, and place placemaker tokens (let loop ((tokens tokens) (last #f)) (cond ((null? tokens) '()) ((identifier-token? (car tokens)) bound-identifier? => (lambda (id) (let ((replacement (assoc-ref parameter-map id))) (if (null? replacement) (cons (placemaker) (loop (cdr tokens) #f)) ;; macroexpand replacement here! But only if the token isn't used with ## (or #) (append (if (or (concat-token? last) (next-token-matches? concat-token? tokens)) replacement ;; resolve-token-stream only modifies environment by updating current line ;; that can't happen in a macro body ((unval resolve-token-stream 1) environment replacement once?: #t)) (loop (cdr tokens) #f)))))) ((whitespace-token? (car tokens)) (cons (car tokens) (loop (cdr tokens) last))) (else (cons (car tokens) (loop (cdr tokens) (car tokens))))))) (define parameter-map (build-parameter-map macro parameters)) (remove placemaker-token? (-> macro (expand# parameter-map) (resolve-cpp-variables parameter-map) expand##)))) ;; Expand object-like macro ;; #define VALUE 10 ;; #define str(x) #x ;; #define OTHER str(VALUE) ;; OTHER ;; ⇒ "VALUE" ;; remaining-tokens should be the token stream just after the name of the macro (define (expand-macro environment macro noexpand-list remaining-tokens) (typecheck environment cpp-environment?) (typecheck macro cpp-macro?) (typecheck remaining-tokens (list-of lexeme?)) (typecheck noexpand-list (list-of string?)) (let ((name (macro-identifier macro))) (cond ((object-macro? macro) (values environment (append (fold (swap mark-noexpand) (expand## (macro-body macro)) (cons name noexpand-list)) remaining-tokens))) ((function-macro? macro) (if (next-token-matches? left-parenthesis-token? remaining-tokens) (let ((containing remaining newlines (parse-parameter-list remaining-tokens))) (values (bump-line environment newlines) (append (fold (swap mark-noexpand) (apply-macro environment macro containing) (cons name noexpand-list)) remaining))) (values environment ;; TODO#1 the token shouldn't be expanded here, but it should neither be marked no-expand? ;; Consider the case ;; #define m(a) a(0,1) ;; #define f(a) f(2 * (a)) ;; m(f) (append (mark-noexpand (lex (macro-identifier macro)) (macro-identifier macro)) remaining-tokens)))) ((internal-macro? macro) (if (next-token-matches? left-parenthesis-token? remaining-tokens) (let ((containing remaining newlines (parse-parameter-list remaining-tokens))) (let ((env* tokens* ((macro-body macro) environment containing))) (values (bump-line env* newlines) (append (fold (swap mark-noexpand) tokens* (cons name noexpand-list)) remaining)))) (values environment (append (mark-noexpand (lex (macro-identifier macro)) (macro-identifier macro)) remaining-tokens)))) (else (scm-error 'wrong-type-arg "expand-macro" "Macro isn't a macro: ~s" (list macro) #f))))) ;; Takes a list of preprocessing tokens, and returns two values ;; if the last token was '...' ;; and a list of strings of all token names ;; Note that this is ONLY #define f(x) forms ;; not usage forms (define (parse-identifier-list tokens) (typecheck tokens (list-of lexeme?)) (let loop ((tokens (remove whitespace-token? tokens)) (done '())) (cond ((null? tokens) (values #f (reverse done))) ((identifier-token? (car tokens)) => (lambda (id) (loop (cdr tokens) (cons id done)))) ((ellipsis-token? (car tokens)) (unless (null? (cdr tokens)) (scm-error 'cpp-error "parse-identifier-list" "'...' only allowed as last argument in identifier list. Rest: ~s" (list (cdr tokens)) #f)) (values #t (reverse done))) ((comma-token? (car tokens)) (loop (cdr tokens) done)) (else (scm-error 'cpp-error "parse-identifier-list" "Unexpected preprocessing-token in identifier list: ~s" (list (car tokens)) #f))))) ;; returns three values: ;; - a list of tokens where each is a parameter to the function like macro ;; - the remaining tokenstream ;; - how many newlines were encountered ;; The standard might call these "replacement lists" ;; Note that each returned token-list might have padding whitespace which should be trimmed. ;; It's kept to allow __VA_ARGS__ to "remember" its whitespace (define (parse-parameter-list tokens*) (typecheck tokens* (list-of lexeme?)) (let %loop ((depth 0) (newlines 0) (current '()) (parameters '()) (tokens tokens*) (%first-iteration? #t)) (define* (loop tokens key: (depth depth) (newlines newlines) (current current) (parameters parameters)) (%loop depth newlines current parameters tokens #f)) (let ((current* (if (zero? depth) current (cons (car tokens) current)))) (cond ((null? tokens) (scm-error 'misc-error "parse-parameter-list" "Ran out of tokens while parsing: ~s" (list tokens*) #f)) ((newline-token? (car tokens)) (loop (cdr tokens) newlines: (1+ newlines) current: current*)) ((whitespace-token? (car tokens)) (loop (cdr tokens) current: current*)) ((left-parenthesis-token? (car tokens)) (loop (cdr tokens) depth: (1+ depth) current: current*)) ((right-parenthesis-token? (car tokens)) (if (= 1 depth) ;; return value (values (reverse (cons (reverse current) parameters)) (cdr tokens) newlines) (loop (cdr tokens) depth: (1- depth) current: current*))) ((comma-token? (car tokens)) (if (= 1 depth) (loop (cdr tokens) current: '() parameters: (cons (reverse current) parameters)) (loop (cdr tokens) current: current*))) (else (loop (cdr tokens) current: current*)))))) ;; Add __FILE__ and __LINE__ object macros to the environment (define (join-file-line environment) (extend-environment environment ;; 6.10.8 (list (object-like-macro identifier: "__FILE__" body: (lex (format #f "~s" (current-file environment)))) (object-like-macro identifier: "__LINE__" body: (lex (number->string (current-line environment))))))) (define (c-search-path) (make-parameter (list "." "/usr/include"))) ;; #include (define (resolve-h-file string) (typecheck string string?) (cond ((path-absolute? string) string) (else (let ((filename (find file-exists? (map (lambda (path-prefix) (path-append path-prefix string)) (c-search-path))))) (if filename filename (scm-error 'cpp-error "resolve-h-file" "Can't resolve file: ~s" (list string) #f)))))) ;; #include "myheader.h" (define (resolve-q-file string) (typecheck string string?) ;; This should always be a fallback (6.10.2, p. 3) (cond (else (resolve-h-file string)))) (define defined-macro (internal-macro identifier: "defined" body: (lambda (environment arguments) (typecheck arguments (and (list-of (list-of lexeme?)) (not null?))) (aif (identifier-token? (car (list-ref arguments 0))) (let ((in-env (boolean->c-boolean (in-environment? environment it)))) (values environment (lex (number->string in-env)))) (scm-error 'cpp-error "defined" "Invalid parameter list to `defined': ~s" (list arguments) #f))))) (define _Pragma-macro (internal-macro identifier: "_Pragma" body: (lambda (environment arguments) (typecheck arguments (and (list-of (list-of lexeme?)) (not null?))) (aif (string-token? (caar arguments)) (values (handle-pragma environment (lex it)) '()) (scm-error 'cpp-pragma-error "_Pragma" "Invalid argument to _Pragma: ~s" (list (car arguments)) #f))))) ;; environment, tokens → environment (define (handle-pragma environment tokens) (typecheck environment cpp-environment?) (typecheck tokens (list-of lexeme?)) (let ((err (lambda () (scm-error 'cpp-pragma-error "handle-pragma" "Invalid pragma directive: ~a" (list (unlex tokens)) #f)))) (cond ((null? tokens) (err)) ((equal? "STDC" (identifier-token? (car tokens))) (call-with-values (lambda () (apply values (filter identifier-token? (cdr tokens)))) (case-lambda ((identifier on-off-switch) (format (current-output-port) "#Pragma STDC ~a ~a" (unlex (list identifier)) (unlex (list on-off-switch))) environment) (_ (err))))) (else (format (current-output-port) "Non-standard #Pragma: ~s~%" (unlex (list tokens))) environment)))) (define (resolve-constant-expression tokens) (typecheck tokens (list-of lexeme?)) 'TODO ) (define (mark-noexpand1 token name) (modify token lexeme-noexpand xcons name)) (define (mark-noexpand tokens name) ;; (typecheck tokens (list-of lexeme?)) ;; (typecheck name string?) (map (lambda (token) (mark-noexpand1 token name)) tokens)) (define (marked-noexpand? token) (cond ((identifier-token? token) => (lambda (id) (member id (lexeme-noexpand token)))) (else #f))) ;; Expands a token-stream which contains no pre-processing directives (#if:s, ...) ;; If @var{once?} is true then the resulting body won't be scanned again for tokens to expand ;; environment, tokens, [boolean] → environment, tokens (define* (resolve-token-stream environment tokens key: once?) (typecheck environment cpp-environment?) (typecheck tokens (list-of lexeme?)) ;; (pprint-environment environment) ;; (format (current-error-port) "~a~%~%" (unlex tokens)) (let loop ((environment environment) (tokens tokens)) (cond ((null? tokens) (values environment '())) ((newline-token? (car tokens)) (on-snd (cons (car tokens) (abort* (loop (bump-line environment) (cdr tokens)))))) ((and (identifier-token? (car tokens)) (not (marked-noexpand? (car tokens)))) (call-with-values (lambda () (maybe-extend-identifier environment (identifier-token? (car tokens)) (lexeme-noexpand (car tokens)) (cdr tokens))) ;; Here is the after expansion (if once? values loop))) (else (on-snd (cons (car tokens) (abort* (loop environment (cdr tokens))))))))) ;; returns a new environment ;; handle body of #if ;; environment, (list token) → environment (define (resolve-for-if environment tokens) (typecheck environment cpp-environment?) (typecheck tokens (list-of lexeme?)) (-> (extend-environment environment defined-macro) ;; no newlines in #if line ((unval resolve-token-stream 1) tokens) resolve-constant-expression c-boolean->boolean (if (enter-active-if environment) (enter-inactive-if environment)))) ;; environment, string, (list token) → environment, (list token) (define (maybe-extend-identifier environment identifier noexpand-list remaining-tokens) (typecheck environment cpp-environment?) (typecheck identifier string?) (typecheck remaining-tokens (list-of lexeme?)) (typecheck noexpand-list (list-of string?)) (cond ((get-identifier (join-file-line environment) identifier) => (lambda (value) (expand-macro (join-file-line environment) value noexpand-list remaining-tokens))) (else ; It wasn't an identifier, leave it as is (values environment (append (mark-noexpand (lex identifier) identifier) remaining-tokens))))) (define (resolve-and-include-header environment tokens) (typecheck environment cpp-environment?) (typecheck tokens (list-of lexeme?)) (let ((err (lambda (msg . args) (scm-error 'cpp-error "resolve-and-include-header" (string-append msg ", tokens: ~s") (append args (list (unlex tokens))) #f)))) (let loop ((%first-time #t) (tokens tokens)) (cond ((null? tokens) '()) ((h-string-token? (car tokens)) => (lambda (str) (unless (null? (drop-whitespace (cdr tokens))) (err "Unexpected tokens after #include <>")) (handle-preprocessing-tokens environment (-> str resolve-h-file read-file tokenize)))) ((q-string-token? (car tokens)) => (lambda (str) (unless (null? (drop-whitespace (cdr tokens))) (err "Unexpected tokens after #include \"\"")) (handle-preprocessing-tokens environment (-> str resolve-q-file read-file tokenize)))) (else (unless %first-time (err "Failed parsing tokens")) ;; No newlines in #include (loop #f ((unval resolve-token-stream 1) environment tokens))))))) ;; environment, tokens → environment (define (handle-line-directive environment tokens*) (typecheck environment cpp-environment?) (typecheck tokens* (list-of lexeme?)) (let ((err (lambda () (scm-error 'cpp-error "handle-line-directive" "Invalid line directive: ~s" (list tokens*) #f)))) (let loop ((%first-time #t) (tokens tokens*)) (cond ((null? tokens)) ((number-token? (car tokens)) => (lambda (line) (let ((line (string->number line)) (remaining (drop-whitespace (cdr tokens)))) (cond ((null? remaining) (set environment current-line (1- line))) ((string-token? (car remaining)) => (lambda (file) (-> environment (set current-line (1- line)) (set current-file file)))) ;; no newlines in #line (%first-time (loop #f ((unval resolve-token-stream 1) environment tokens))) (else (err)))))) ;; no newlines in #line (%first-time (loop #f ((unval resolve-token-stream 1) environment tokens))) (else (err)))))) ;; environment, tokens → environment (define (resolve-define environment tokens) (typecheck environment cpp-environment?) (typecheck tokens (list-of lexeme?)) (let ((identifier (identifier-token? (car tokens))) (tail (cdr tokens))) (-> environment bump-line (add-identifier identifier (cond ((and (not (null? tail)) (left-parenthesis-token? (car tail))) ;; function like macro (let ((identifier-list replacement-list (break right-parenthesis-token? (cdr tail)))) (let ((variadic? identifiers (parse-identifier-list identifier-list))) (function-like-macro identifier: identifier variadic?: variadic? identifier-list: identifiers ;; cdr drops the end parenthesis of the definition ;; surrounding whitespace is not part of the replacement list (6.10.3 p.7) body: (drop-whitespace-both (cdr replacement-list)))))) (else (object-like-macro identifier: identifier body: (drop-whitespace-both tail)))))))) ;; environment, tokens -> environment, tokens (define (handle-preprocessing-tokens environment tokens) ;; Prepend a newline to ensure that the token stream always starts with a ;; newline (otherwise guaranteed by how we loop). Decrement line-counter ;; by one to compensate. (let loop ((environment (bump-line environment -1)) (tokens (append (lex "\n") tokens))) (define (err fmt . args) (scm-error 'cpp-error "handle-preprocessing-tokens" (string-append "~a:~a " fmt) (cons* (current-file environment) (current-line environment) args) #f)) (cond ((null? tokens) (values environment '())) ((newline-token? (car tokens)) (let ((environment (bump-line environment)) (tokens* (drop-whitespace (cdr tokens)))) (cond ((null? tokens*) (values environment '())) ((equal? "#" (punctuator-token? (car tokens*))) (let ((line-tokens remaining-tokens (tokens-until-eol (cdr tokens*)))) ;; drop whitespace after to not "eat" the next newline token (let ((line-tokens (drop-whitespace line-tokens))) (cond ((null? line-tokens) ;; null directive (loop environment remaining-tokens)) ((in-comment-block? environment) (case (string->symbol (identifier-token? (car line-tokens))) ((else) (loop (flip-flop-if environment) remaining-tokens)) ((endif) (loop (leave-if environment) remaining-tokens)) ((elif) (loop (resolve-for-if (leave-if environment) (drop-whitespace (cdr line-tokens))) remaining-tokens)) (else (loop environment remaining-tokens)))) ;; From here on we are not in a comment block (else (let ((directive (string->symbol (identifier-token? (car line-tokens)))) (body (drop-whitespace (cdr line-tokens)))) (if (eq? 'include directive) ;; include is special since it returns a token stream (call-with-values (lambda () (resolve-and-include-header environment body)) (lambda (environment tokens) (loop environment (append tokens remaining-tokens)))) (let ((operation ; (environment, list token) → environment (case directive ((if) resolve-for-if) ((ifdef) (lambda (env body) (if (in-environment? env (identifier-token? (car body))) enter-active-if enter-inactive-if))) ((ifndef) (lambda (env body) (if (in-environment? env (identifier-token? (car body))) enter-inactive-if enter-active-if))) ((define) resolve-define) ((undef) (lambda (env body) (remove-identifier env (identifier-token? (car body))))) ((line) handle-line-directive) ((error) (lambda (_ tokens) (throw 'cpp-error-directive (unlex tokens)))) ((pragma) handle-pragma) (else (err "Unknown preprocessing directive: ~s" (list line-tokens)))))) (loop (operation environment body) remaining-tokens))))))))) ;; Line is not a pre-processing directive (else (let ((preceding-tokens remaining-tokens (tokens-until-cpp-directive (cdr tokens)))) (let* ((env* resolved-tokens (if (in-comment-block? environment) (values environment '()) (resolve-token-stream environment preceding-tokens)))) (on-snd (append resolved-tokens ;; The initial newline is presreved here, for better output, ;; and to keep at least one whitespace token when there was one previously. ;; possibly also keep a newline for line-directives. (unless (null? remaining-tokens) (lex "\n")) (abort* (loop env* remaining-tokens)))))))))) (else (err "Unexpected middle of line"))))) (define (read-file path) (call-with-input-file path (@ (ice-9 rdelim) read-string))) (define (comment->whitespace token) (if (comment-token? token) (car (lex " ")) token)) (define (comments->whitespace tokens) (map comment->whitespace tokens)) ;;; 5.1.11.2 Translation phases (define (tokenize string) (-> string ;;; 1. trigraph replacement replace-trigraphs ;;; 2. Line folding fold-lines ;;; 3. Decomposition into preprocenning tokens, whitespaces, and comments lex ;;; 4. Execution of preprocessing directives, all preprocessing directives are then deleted comments->whitespace ;; squeeze-whitespace-blocks ))