(define-module (c preprocessor2) :use-module (srfi srfi-1) :use-module (srfi srfi-71) :use-module (srfi srfi-88) :use-module (ice-9 match) :use-module (c cpp-environment) :use-module (c eval2) :use-module ((c cpp-environment function-like-macro) :select (function-like-macro variadic? identifier-list)) :use-module ((c cpp-environment object-like-macro) :select (object-like-macro)) :use-module ((c cpp-environment internal-macro) :select (internal-macro)) :use-module ((hnh util) :select (-> intersperse aif swap)) :use-module ((hnh util lens) :select (set modify cdr*)) :use-module (hnh util path) :use-module (hnh util type) :use-module (hnh util object) :use-module ((c lex2) :select (lex placemaker lexeme? lexeme-body lexeme-noexpand)) :use-module ((c trigraph) :select (replace-trigraphs)) :use-module ((c line-fold) :select (fold-lines)) :use-module (c unlex) :use-module (c cpp-types) :use-module (c cpp-util) :export ()) (define-syntax-rule (alist-of variable key-type value-type) (build-validator-body variable (list-of (pair-of key-type value-type)))) (define parameter-map? (of-type? (alist-of string? (list-of lexeme?)))) ;; parameters is a lexeme list, as returned by parse-parameter-list (define (build-parameter-map macro parameters) (typecheck macro macro?) (typecheck parameters (list-of (list-of lexeme?))) (map (lambda (pair) (modify pair cdr* drop-whitespace-both)) (if (macro-variadic? macro) (let ((head rest (split-at parameters (length (macro-identifier-list macro))))) (cons (cons "__VA_ARGS__" (concatenate (intersperse (lex ",") rest))) (map cons (macro-identifier-list macro) head))) (map cons (macro-identifier-list macro) parameters)))) (define (expand# macro parameter-map) (typecheck macro macro?) (typecheck parameter-map parameter-map?) (let loop ((tokens (macro-body macro))) (cond ((null? tokens) '()) ((equal? "#" (punctuator-token? (car tokens))) (let* ((head rest (car+cdr (drop-whitespace (cdr tokens)))) (x (identifier-token? head))) (cond ((assoc-ref parameter-map x) => (lambda (tokens) (cons (stringify-tokens tokens) (loop rest)))) (else (scm-error 'macro-expand-error "expand#" "'#' is not followed by a macro parameter: ~s" (list x) #f))))) (else (cons (car tokens) (loop (cdr tokens))))))) (define-type (list-zipper) (left type: list?) focused (right type: list?)) ;; Move zipper one step to the left (define (zip-left zipper) (if (null? (left zipper)) zipper (list-zipper left: (cdr (left zipper)) right: (cons (focused zipper) (right zipper)) focused: (car (left zipper))))) ;; Move zipper one step to the right (define (zip-right zipper) (if (null? (right zipper)) zipper (list-zipper left: (cons (focused zipper) (left zipper)) right: (cdr (right zipper)) focused: (car (right zipper))))) ;; find first element matching predicate, going right (define (zip-find-right predicate zipper) (cond ((null? (right zipper)) zipper) ((predicate (focused zipper)) zipper) (else (zip-find-right predicate (zip-right zipper))))) (define (list->zipper list) (list-zipper left: '() focused: (car list) right: (cdr list))) (define (rezip zipper) (if (null? (left zipper)) zipper (rezip (zip-left zipper)))) (define (zipper->list zipper) (let ((z (rezip zipper))) (cons (focused z) (right z)))) (define (concatenate-tokens a b) (car (lex (string-append (unlex (list a)) (unlex (list b)))))) ;; 6.10.3.3 (define (expand## tokens) (typecheck tokens (list-of lexeme?)) (let loop ((zipper (list->zipper tokens))) (cond ((equal? "##" (punctuator-token? (focused zipper))) (let ((l (drop-whitespace (left zipper))) (r (drop-whitespace (right zipper)))) (cond ((or (null? l) (null? r)) (scm-error 'cpp-error "expand##" "## can't be first or last token: ~s" (list (unlex tokens)) #f)) ((and (placemaker-token? (car l)) (placemaker-token? (car r))) (loop (list-zipper left: (cdr l) right: (cdr r) focused: (placemaker)))) ((placemaker-token? (car l)) (loop (list-zipper left: (cdr l) right: (cdr r) focused: (car r)))) ((placemaker-token? (car r)) (loop (list-zipper left: (cdr l) right: (cdr r) focused: (car l)))) (else (loop (list-zipper left: (cdr l) right: (cdr r) focused: (concatenate-tokens (car l) (car r)))))))) ((null? (right zipper)) (zipper->list zipper)) (else (loop (zip-find-right (lambda (token) (equal? "##" (punctuator-token? token))) zipper)))))) ;; expand function like macro ;; parameter is a list of lexeme-lists, each "top level" element matching one ;; argument to the macro (define (apply-macro environment macro parameters) (typecheck environment cpp-environment?) ;; Each element should be the lexeme list for that argument (typecheck parameters (list-of (list-of lexeme?))) (typecheck macro macro?) (when (and (variadic? macro) (> (length (macro-identifier-list macro)) (length parameters))) (scm-error 'cpp-arity-error "apply-macro" "Too few arguments to variadic macro ~s, expected at least ~s, got ~s" (list (macro-identifier macro) (length (macro-identifier-list macro)) (length parameters)) (list macro))) (when (and (not (variadic? macro)) (not (= (length (macro-identifier-list macro)) (length parameters)))) (scm-error 'cpp-arity-error "apply-macro" "Wrong number of arguments to macro ~s, expected ~s, got ~s" (list (macro-identifier macro) (length (macro-identifier-list macro)) (length parameters)) (list macro))) (let () (define (resolve-cpp-variables tokens parameter-map) (define (bound-identifier? id) (assoc-ref parameter-map id)) ;; expand parameters, and place placemaker tokens (let loop ((tokens tokens)) (cond ((null? tokens) '()) ((identifier-token? (car tokens)) bound-identifier? => (lambda (id) (let ((replacement (assoc-ref parameter-map id))) (if (null? replacement) (cons (placemaker) (loop (cdr tokens))) ;; TODO macroexpand replacement here? (append replacement (loop (cdr tokens))))))) (else (cons (car tokens) (loop (cdr tokens))))))) (define parameter-map (build-parameter-map macro parameters)) (define stringify-resolved (expand# macro parameter-map)) (remove placemaker-token? (expand## (resolve-cpp-variables stringify-resolved parameter-map))))) ;; Expand object-like macro ;; #define VALUE 10 ;; #define str(x) #x ;; #define OTHER str(VALUE) ;; OTHER ;; ⇒ "VALUE" ;; remaining-tokens should be the token stream just after the name of the macro (define (expand-macro environment macro noexpand-list remaining-tokens) (typecheck environment cpp-environment?) (typecheck macro macro?) (typecheck remaining-tokens (list-of lexeme?)) (typecheck noexpand-list (list-of string?)) (let ((name (macro-identifier macro))) (cond ((object-macro? macro) (values environment (append (fold (swap mark-noexpand) (expand## (macro-body macro)) (cons name noexpand-list)) remaining-tokens))) ((function-macro? macro) (let ((containing remaining newlines (parse-parameter-list remaining-tokens))) (values (bump-line environment newlines) (append (fold (swap mark-noexpand) (apply-macro environment macro containing) (cons name noexpand-list)) remaining)))) ((internal-macro? macro) (let ((containing remaining newlines (parse-parameter-list remaining-tokens))) (values (bump-line environment newlines) (append (fold (swap mark-noexpand) ((macro-body macro) environment containing) (cons name noexpand-list)) remaining)))) (else (scm-error 'wrong-type-arg "expand-macro" "Macro isn't a macro: ~s" (list macro) #f))))) ;; Takes a list of preprocessing tokens, and returns two values ;; if the last token was '...' ;; and a list of strings of all token names ;; Note that this is ONLY #define f(x) forms ;; not usage forms (define (parse-identifier-list tokens) (typecheck tokens (list-of lexeme?)) (let loop ((tokens (remove whitespace-token? tokens)) (done '())) (cond ((null? tokens) (values #f (reverse done))) ((identifier-token? (car tokens)) => (lambda (id) (loop (cdr tokens) (cons id done)))) ((equal? '(punctuator "...") (lexeme-body (car tokens))) (unless (null? (cdr tokens)) (scm-error 'cpp-error "parse-identifier-list" "'...' only allowed as last argument in identifier list. Rest: ~s" (list (cdr tokens)) #f)) (values #t (reverse done))) ((equal? '(punctuator ",") (lexeme-body (car tokens))) (loop (cdr tokens) done)) (else (scm-error 'cpp-error "parse-identifier-list" "Unexpected preprocessing-token in identifier list: ~s" (list (car tokens)) #f))))) ;; returns three values: ;; - a list of tokens where each is a parameter to the function like macro ;; - the remaining tokenstream ;; - how many newlines were encountered ;; The standard might call these "replacement lists" (define (parse-parameter-list tokens*) (typecheck tokens* (list-of lexeme?)) (let %loop ((depth 0) (newlines 0) (current '()) (parameters '()) (tokens tokens*) (%first-iteration? #t)) (define* (loop tokens key: (depth depth) (newlines newlines) (current current) (parameters parameters)) (%loop depth newlines current parameters tokens #f)) (let ((current* (if (zero? depth) current (cons (car tokens) current)))) (cond ((null? tokens) (scm-error 'misc-error "parse-parameter-list" "Ran out of tokens while parsing: ~s" (list tokens*) #f)) ((newline-token? (car tokens)) (loop (cdr tokens) newlines: (1+ newlines) current: current*)) ((whitespace-token? (car tokens)) (loop (cdr tokens) current: current*)) ((equal? '(punctuator "(") (lexeme-body (car tokens))) (loop (cdr tokens) depth: (1+ depth) current: current*)) ((equal? '(punctuator ")") (lexeme-body (car tokens))) (if (= 1 depth) ;; return value (values (reverse (if (null? current) parameters (cons (reverse current) parameters))) (cdr tokens) newlines) (loop (cdr tokens) depth: (1- depth) current: current*))) ((equal? '(punctuator ",") (lexeme-body (car tokens))) (if (= 1 depth) (loop (cdr tokens) current: '() parameters: (cons (reverse current) parameters)) (loop (cdr tokens) current: current*))) (else (loop (cdr tokens) current: current*)))))) ;; Add __FILE__ and __LINE__ object macros to the environment (define (join-file-line environment) (define file (current-file environment)) (define line (current-line environment)) (extend-environment environment ;; 6.10.8 (list (object-like-macro identifier: "__FILE__" body: (lex (format #f "~s" file))) (object-like-macro identifier: "__LINE__" body: (lex (number->string line)))))) (define (c-search-path) (make-parameter (list "." "/usr/include"))) ;; #include (define (resolve-h-file string) (typecheck string string?) (cond ((path-absolute? string) string) (else (let ((filename (find file-exists? (map (lambda (path-prefix) (path-append path-prefix string)) (c-search-path))))) (if filename filename (scm-error 'cpp-error "resolve-h-file" "Can't resolve file: ~s" (list string) #f)))))) ;; #include "myheader.h" (define (resolve-q-file string) (typecheck string string?) ;; This should always be a fallback (6.10.2, p. 3) (cond (else (resolve-h-file string)))) (define defined-macro (internal-macro identifier: "defined" body: (lambda (environment arguments) (typecheck arguments (and (list-of (list-of lexeme?)) (not null?))) (aif (identifier-token? (car (list-ref arguments 0))) (let ((in-env (boolean->c-boolean (in-environment? environment it)))) (lex (number->string in-env))) (scm-error 'cpp-error "defined" "Invalid parameter list to `defined': ~s" (list arguments) #f))))) ;; environment, tokens → environment (define (handle-pragma environment tokens) ;; TODO rewrite without match (match tokens (`((preprocessing-token (identifier "STDC")) (whitespace ,_) ... (preprocessing-token (identifier ,identifier)) (whitespace ,_) ... (preprocessing-token (identifier ,on-off-switch)) (whitespace ,_) ...) ;; TODO actually do something with the pragmas (probably just store them in the environment) (format (current-error-port) "#Pragma STDC ~a ~a" identifier on-off-switch) environment) (_ (format (current-error-port) "Non-standard #Pragma: ~s~%" tokens) environment))) ;; TODO ;; (define _Pragma-macro ;; (internal-macro ;; identifier: "_Pragma" ;; body: (lambda (environment tokens) ;; ))) ;; TODO (define (resolve-constant-expression tokens) (typecheck tokens (list-of lexeme?)) 'TODO ) (define* (pprint-macro x optional: (p (current-output-port))) (cond ((internal-macro? x) (format p "/* ~a INTERNAL MACRO */" (macro-identifier x))) ((object-macro? x) (format p "#define ~a ~a" (macro-identifier x) (unlex (macro-body x)))) ((function-macro? x) (format p "#define ~a(~a) ~a" (macro-identifier x) (string-join (append (macro-identifier-list x) (if (variadic? x) '("...") '())) "," 'infix) (unlex (macro-body x)))))) (define* (pprint-environment environment optional: (port (current-error-port))) (display "== Environment ==\n") (hash-for-each (lambda (key macro) (pprint-macro macro port) (newline port)) (cpp-variables environment))) (define (mark-noexpand1 token name) (modify token lexeme-noexpand xcons name)) (define (mark-noexpand tokens name) ;; (typecheck tokens (list-of lexeme?)) ;; (typecheck name string?) (map (lambda (token) (mark-noexpand1 token name)) tokens)) (define (marked-noexpand? token) (cond ((identifier-token? token) => (lambda (id) (member id (lexeme-noexpand token)))) (else #f))) ;; Expands a token-stream which contains no pre-processing directives (#if:s, ...) (define (resolve-token-stream environment tokens) (typecheck environment cpp-environment?) (typecheck tokens (list-of lexeme?)) ;; (pprint-environment environment) ;; (format (current-error-port) "~a~%~%" (unlex tokens)) (let loop ((environment environment) (tokens tokens)) (cond ((null? tokens) '()) ((car tokens) (lambda (x) (and (identifier-token? x) (not (marked-noexpand? x)))) => (lambda (token) (call-with-values (lambda () (maybe-extend-identifier environment (identifier-token? token) (lexeme-noexpand token) (cdr tokens))) loop))) (else (cons (car tokens) (loop environment (cdr tokens))))))) ;; returns a new environment ;; handle body of #if ;; environment, (list token) → environment (define (resolve-for-if environment tokens) (typecheck environment cpp-environment?) (typecheck tokens (list-of lexeme?)) (-> (extend-environment environment defined-macro) (resolve-token-stream tokens) resolve-constant-expression c-boolean->boolean (if (enter-active-if environment) (enter-inactive-if environment)))) ;; environment, string, (list token) → environment, (list token) (define (maybe-extend-identifier environment identifier noexpand-list remaining-tokens) (typecheck environment cpp-environment?) (typecheck identifier string?) (typecheck remaining-tokens (list-of lexeme?)) (typecheck noexpand-list (list-of string?)) (cond ((get-identifier environment identifier) => (lambda (value) (expand-macro (join-file-line environment) value noexpand-list remaining-tokens))) (else ; It wasn't an identifier, leave it as is (values environment (append (mark-noexpand (lex identifier) identifier) remaining-tokens))))) (define (resolve-and-include-header environment tokens) (typecheck environment cpp-environment?) (typecheck tokens (list-of lexeme?)) ;; TODO rewrite without match (let loop ((%first-time #t) (tokens tokens)) (match (drop-whitespace tokens) ((`(header-name (h-string ,str)) rest ...) (cond ((remove whitespace-token? rest) (negate null?) => (lambda (tokens) (scm-error 'cpp-error "resolve-and-include-header" "Unexpected tokens after #include <>: ~s" (list tokens) #f)))) (handle-preprocessing-tokens environment (-> str resolve-h-file read-file tokenize))) ((`(header-name (q-string ,str)) rest ...) (cond ((remove whitespace-token? rest) (negate null?) => (lambda (tokens) (scm-error 'cpp-error "resolve-and-include-header" "Unexpected tokens after #include <>: ~s" (list tokens) #f)))) (handle-preprocessing-tokens environment (-> str resolve-q-file read-file tokenize))) (tokens (unless %first-time (scm-error 'cpp-error "resolve-and-include-header" "Failed parsing tokens: ~s" (list tokens) #f)) (loop #f (resolve-token-stream environment tokens)))))) ;; environment, tokens → environment (define (handle-line-directive environment tokens*) (typecheck environment cpp-environment?) (typecheck tokens* (list-of lexeme?)) (let loop ((%first-time #t) (tokens tokens*)) (cond ((null? tokens)) ((number-token? (car tokens)) => (lambda (line) (let ((line (string->number line))) (let ((remaining (drop-whitespace (cdr tokens)))) (cond ((null? remaining) (set environment current-line line)) ((string-token? (car remaining)) => (lambda (file) (-> environment (set current-line line) (set current-file file)))) (%first-time (loop #f (resolve-token-stream environment tokens))) (else (scm-error 'cpp-error "handle-line-directive" "Invalid line directive: ~s" (list tokens*) #f) )))))) (%first-time (loop #f (resolve-token-stream environment tokens))) (else (scm-error 'cpp-error "handle-line-directive" "Invalid line directive: ~s" (list tokens*) #f))))) ;; environment, tokens → environment (define (resolve-define environment tokens) (typecheck environment cpp-environment?) (typecheck tokens (list-of lexeme?)) (let ((identifier (identifier-token? (car tokens))) (tail (cdr tokens))) (-> environment bump-line (add-identifier! identifier (cond ((and (not (null? tail)) (equal? '(punctuator "(") (lexeme-body (car tail)))) ;; function like macro (let ((identifier-list replacement-list (break (lambda (token) (equal? '(punctuator ")") (lexeme-body token))) (cdr tail)))) (let ((variadic? identifiers (parse-identifier-list identifier-list))) (function-like-macro identifier: identifier variadic?: variadic? identifier-list: identifiers ;; cdr drops the end parenthesis of the definition ;; surrounding whitespace is not part of the replacement list (6.10.3 p.7) body: (drop-whitespace-both (cdr replacement-list)))))) (else (object-like-macro identifier: identifier body: (drop-whitespace-both tail)))))))) ;; environment, tokens -> environment, tokens (define (handle-preprocessing-tokens environment tokens) (let loop ((environment environment) (tokens tokens)) (define (err fmt . args) (scm-error 'cpp-error "handle-preprocessing-tokens" (string-append "~a:~a " fmt) (cons* (current-file environment) (current-line environment) args) #f)) (define (handle-regular-line environment tokens) (let ((line-tokens remaining-tokens (tokens-until-eol tokens))) (if (in-comment-block? environment) (loop (bump-line environment) remaining-tokens) (append (resolve-token-stream environment line-tokens) (loop (bump-line environment) remaining-tokens))))) (cond ((null? tokens) '()) ((newline-token? (car tokens)) (let ((tokens (drop-whitespace (cdr tokens)))) (cond ((null? tokens) '()) ((equal? '(punctuator "#") (lexeme-body (car tokens))) (let ((line-tokens remaining-tokens (tokens-until-eol (cdr tokens)))) ;; drop whitespace after to not "eat" the next newline token (let ((line-tokens (drop-whitespace line-tokens))) (cond ((null? line-tokens) (loop (bump-line environment) remaining-tokens)) ((in-comment-block? environment) (case (string->symbol (identifier-token? (car line-tokens))) ((else) (loop (bump-line (flip-flop-if environment)) remaining-tokens)) ((endif) (loop (bump-line (leave-if environment)) remaining-tokens)) ((elif) (loop (bump-line (resolve-for-if (leave-if environment) (drop-whitespace (cdr line-tokens)))) remaining-tokens)) (else (loop environment remaining-tokens)))) ;; From here on we are not in a comment block (else (let ((directive (string->symbol (identifier-token? (car line-tokens)))) (body (drop-whitespace (cdr line-tokens)))) (if (eq? 'include directive) ;; include is special since it returns a token stream (call-with-values (lambda () (resolve-and-include-header environment body)) (lambda (environment tokens) (loop (bump-line environment) (append tokens remaining-tokens)))) (let ((operation (case directive ((if) resolve-for-if) ((ifdef) (lambda (env body) (if (in-environment? env (identifier-token? (car body))) enter-active-if enter-inactive-if))) ((ifndef) (lambda (env body) (if (in-environment? env (identifier-token? (car body))) enter-inactive-if enter-active-if))) ((define) resolve-define) ((undef) (lambda (env body) (remove-identifier! env (car body)))) ((line) handle-line-directive) ((error) (lambda (_ body) (throw 'cpp-error body))) ((pragma) handle-pragma) (else (err "Unknown preprocessing directive: ~s" (list line-tokens)))))) (loop (bump-line (operation environment body)) remaining-tokens))))))))) (else (handle-regular-line environment tokens))))) (else (handle-regular-line environment tokens))))) (define (read-file path) (call-with-input-file path (@ (ice-9 rdelim) read-string))) (define (comment->whitespace token) (if (comment-token? token) (car (lex " ")) token)) (define (comments->whitespace tokens) (map comment->whitespace tokens)) ;;; 5.1.11.2 Translation phases (define (tokenize string) (-> string ;;; 1. trigraph replacement replace-trigraphs ;;; 2. Line folding fold-lines ;;; 3. Decomposition into preprocenning tokens, whitespaces, and comments lex ;;; 4. Execution of preprocessing directives, all preprocessing directives are then deleted comments->whitespace ;; squeeze-whitespace-blocks ))