(define-module (c preprocessor2) :use-module (srfi srfi-1) :use-module (srfi srfi-71) :use-module (srfi srfi-88) :use-module (ice-9 match) :use-module (c cpp-environment) :use-module (c eval2) :use-module ((c cpp-environment function-like-macro) :select (function-like-macro variadic? identifier-list)) :use-module ((c cpp-environment object-like-macro) :select (object-like-macro)) :use-module ((c cpp-environment internal-macro) :select (internal-macro)) :use-module ((hnh util) :select (-> intersperse)) :use-module ((hnh util lens) :select (set)) :use-module (hnh util path) :use-module (hnh util type) :use-module ((c lex2) :select (lex)) :use-module ((c trigraph) :select (replace-trigraphs)) :use-module ((c line-fold) :select (fold-lines)) :export ()) ;;; Call graph ;; squeeze-whitespace ;; stringify-tokens ;; expand-join ;; build-parameter-map ;; apply-macro ;; - build-parameter-map ;; - stringify-tokens ;; - expand-join ;; expand-macro ;; - parse-parameter-list ;; - apply-macro ;; parse-parameter-list ;; resolve-token-stream ;; - maybe-extend-identifier ;; maybe-extend-identifier ;; - expand-macro ;; resolve-define ;; - parse-identifier-list ;; expand-stringifiers ;; - stringify-tokens ;;; (define-syntax-rule (parameter-map? x) (typecheck x (list-of (pair-of string? (list-of token?))))) ;; Returns two values: ;; - tokens until a newline token is met ;; - (potentially the newline token) and the remaining tokens (define (tokens-until-eol tokens) (typecheck tokens (list-of token?)) (break (lambda (token) (equal? token '(whitespace "\n"))) tokens)) ;; match in predicates so non-lists fail properly (define (whitespace-token? token) (match token (`(whitespace ,_) #t) (_ #f))) (define (identifier-token? token) (match token (`(preprocessing-token (identifier ,id)) id) (_ #f))) (define (unwrap-preprocessing-token token) (match token (`(preprocessing-token ,x) x) (_ (scm-error 'wrong-type-arg "unwrap-preprocessing-token" "Not a preprocessing token: ~s" (list token) #f)))) (define (preprocessing-token? token) (catch 'wrong-type-arg (lambda () (unwrap-preprocessing-token token)) (const #f))) (define (token? x) (or (preprocessing-token? x) (whitespace-token? x))) ;; Replace all whitespace with single spaces. (define (squeeze-whitespace tokens) (match tokens ('() '()) ((`(whitespace ,_) `(whitespace ,_) rest ...) (squeeze-whitespace (cons '(whitespace " ") rest))) ((`(whitespace ,_) rest ...) (cons '(whitespace " ") (squeeze-whitespace rest))) ((token rest ...) (cons token (squeeze-whitespace rest))))) ;; Returns the "source" of the token, as a preprocessing string literal token (define (stringify-token unwrapped-preprocessing-token) (match unwrapped-preprocessing-token (`(string-literal ,s) (format #f "~s" s)) (`(header-name (q-string ,s)) (format #f "~s" s)) (`(header-name (h-string ,s)) (format #f "<~a>" s)) (`(identifier ,id) id) (`(pp-number ,n) n) (`(character-constant ,c) (format #f "'~a'" c)) (`(punctuator ,p) p))) ;; takes a token list, and return a single string literal token (define (stringify-tokens tokens) `(preprocessing-token (string-literal ,(unlex tokens)))) ;; takes a list of preprocessing-token's, and return a "source" string (define (unlex tokens) (typecheck tokens (list-of token?)) (string-concatenate (map (match-lambda (`(preprocessing-token ,body) (stringify-token body)) (`(whitespace ,_) " ")) (squeeze-whitespace tokens)))) ;; Expand ## tokens ;; TODO (define (expand-join macro tokens) (typecheck macro macro?) (typecheck tokens (list-of token?)) tokens) ;; parameters is a lexeme list, as returned by parse-parameter-list (define (build-parameter-map macro parameters) (typecheck macro macro?) (typecheck parameters (list-of (list-of token?))) (if (macro-variadic? macro) (let ((head rest (split-at parameters (length (macro-identifier-list macro))))) (cons (cons "__VA_ARGS__" (concatenate (intersperse '((preprocessing-token (punctuator ","))) rest))) (map cons (macro-identifier-list macro) head))) (map cons (macro-identifier-list macro) parameters))) ;; TODO Deprecate? (define (parameter-map->macro-list param-map) (typecheck param-map parameter-map?) (map (lambda (pair) (let ((identifier (car pair)) (body (cdr pair))) (object-like-macro identifier: identifier body: body))) param-map)) ;; Drop leading whitespace tokens (define (drop-whitespace tokens) (typecheck tokens (list-of token?)) (drop-while whitespace-token? tokens)) (define (drop-whitespace-right tokens) (typecheck tokens (list-of token?)) (-> tokens reverse drop-whitespace reverse)) (define (drop-whitespace-both tokens) (typecheck tokens (list-of token?)) (-> tokens drop-whitespace drop-whitespace-right)) (define (expand-stringifiers macro parameter-map) (typecheck macro macro?) (typecheck parameter-map parameter-map?) (let loop ((tokens (macro-body macro))) (match tokens (('(preprocessing-token (punctuator "#")) rest ...) (match (drop-whitespace rest) ((`(preprocessing-token (identifier ,x)) rest ...) (unless (member x (macro-identifier-list macro)) (scm-error 'macro-expand-error "expand-stringifiers" "'#' is not followed by a macro parameter: ~s" (list x) #f)) (cons (stringify-tokens (assoc-ref parameter-map x)) (loop rest))))) ('() '()) ((token rest ...) (cons token (loop rest)))))) ;; expand function like macro (define (apply-macro environment macro parameters) (typecheck environment cpp-environment?) ;; Each element should be the lexeme list for that argument (typecheck parameters (list-of (list-of token?))) (typecheck macro macro?) (when (or (and (variadic? macro) (> (length (identifier-list macro)) (length parameters))) (and (not (variadic? macro)) (not (= (length (identifier-list macro)) (length parameters))))) (scm-error 'cpp-arity-error "apply-macro" ;; TODO better error message for variadic macros "Wrong number of arguments to macro ~s, expected ~s, got ~s" (list (macro-identifier macro) (length (identifier-list macro)) (length parameters)) (list macro))) (let () (define parameter-map (build-parameter-map macro parameters)) (define stringify-resolved (expand-stringifiers macro parameter-map)) ;; TODO resolve ## (define resulting-body stringify-resolved #; (expand-join macro stringify-resolved)) (define (bound-identifier? id) (member id (if (variadic? macro) (cons "__VA_ARGS__" (identifier-list macro)) (identifier-list macro)))) (let loop ((tokens resulting-body)) (cond ((null? tokens) '()) ;; TODO the parameters should be macro-expanded before being inserted ((identifier-token? (car tokens)) bound-identifier? => (lambda (id) (append (assoc-ref parameter-map id) (loop (cdr tokens))))) (else (cons (car tokens) (loop (cdr tokens)))))) #; (let ((env (extend-environment environment (parameter-map->macro-list parameter-map)))) (resolve-token-stream env resulting-body)))) ;; Expand object-like macro ;; #define VALUE 10 ;; #define str(x) #x ;; #define OTHER str(VALUE) ;; OTHER ;; ⇒ "VALUE" ;; token should be the token stream just after the name of the macro (define (expand-macro environment macro remaining-tokens) (typecheck environment cpp-environment?) (typecheck remaining-tokens (list-of token?)) (let ((name (macro-identifier macro))) (cond ((object-macro? macro) (values environment (append (mark-noexpand (macro-body macro) name) remaining-tokens))) ((function-macro? macro) (let ((containing remaining newlines (parse-parameter-list remaining-tokens))) (values (bump-line environment newlines) (append (mark-noexpand (apply-macro environment macro containing) name) remaining)))) ((internal-macro? macro) (let ((containing remaining newlines (parse-parameter-list remaining-tokens))) (values (bump-line environment newlines) (append (mark-noexpand ((macro-body macro) environment containing) name) remaining)))) (else (scm-error 'wrong-type-arg "expand-macro" "Macro isn't a macro: ~s" (list macro) #f))))) ;; Takes a list of preprocessing tokens, and returns two values ;; if the last token was '...' ;; and a list of strings of all token names ;; Note that this is ONLY #define f(x) forms ;; not usage forms (define (parse-identifier-list tokens) (typecheck tokens (list-of token?)) (let loop ((tokens (remove whitespace-token? tokens)) (done '())) (match tokens ('() (values #f (reverse done))) ((`(preprocessing-token (identifier ,id)) rest ...) (loop rest (cons id done))) ((`(preprocessing-token (punctuator "..."))) (values #t (reverse done))) ((`(preprocessing-token (punctuator "...")) rest ...) (scm-error 'cpp-error "parse-identifier-list" "'...' only allowed as last argument in identifier list. Rest: ~s" (list rest) #f)) ((`(preprocessing-token (punctuator ",")) rest ...) (loop rest done)) ((`(preprocessing-token ,other) rest ...) (scm-error 'cpp-error "parse-identifier-list" "Unexpected preprocessing-token in identifier list: ~s" (list other) #f))))) ;; helper procedure to parse-parameter-list. ;; If a parameter is complex then whitespace is kept, but squeezed to single spaces. Surounding whitespace is removed. ;; Example: ;; #define str(x, y) #y ;; str(x, ( 2, 4 ) ) ;; expands to: ;; "( 2, 4 )" ;; 6.10.3.2 p 2 (define (cleanup-whitespace tokens) (typecheck tokens (list-of token?)) (-> tokens drop-whitespace-both squeeze-whitespace)) ;; returns three values: ;; - a list of tokens where each is a parameter to the function like macro ;; - the remaining tokenstream ;; - how many newlines were encountered ;; The standard might call these "replacement lists" (define (parse-parameter-list tokens*) (typecheck tokens* (list-of token?)) (let %loop ((depth 0) (newlines 0) (current '()) (parameters '()) (tokens tokens*) (%first-iteration? #t)) (define* (loop tokens key: (depth depth) (newlines newlines) (current current) (parameters parameters)) (%loop depth newlines current parameters tokens #f)) (let ((current* (if (zero? depth) current (cons (car tokens) current)))) (match tokens (() (scm-error 'misc-error "parse-parameter-list" "Ran out of tokens while parsing: ~s" (list tokens*) #f)) (('(whitespace "\n") rest ...) (loop rest newlines: (1+ newlines) current: current*)) ((`(whitespace ,_) rest ...) (loop rest current: current*)) (('(preprocessing-token (punctuator "(")) rest ...) (loop rest depth: (1+ depth) current: current*)) (('(preprocessing-token (punctuator ")")) rest ...) (if (= 1 depth) ;; return value (values (if (null? parameters) (cond ((null? current) '()) ((every whitespace-token? current) '()) (else (reverse (cons (cleanup-whitespace (reverse current)) parameters)))) (reverse (cond ((null? current) parameters) ((every whitespace-token? current) parameters) (else (cons (cleanup-whitespace (reverse current)) parameters))))) rest newlines) (loop rest depth: (1- depth) current: current*))) (('(preprocessing-token (punctuator ",")) rest ...) (if (= 1 depth) (loop rest current: '() parameters: (cons (cond ((null? current) '()) ((every whitespace-token? current) '()) (else (cleanup-whitespace (reverse current)))) parameters)) (loop rest current: current*))) ((_ rest ...) (loop rest current: current*)))))) ;; Add __FILE__ and __LINE__ object macros to the environment (define (join-file-line environment) (define file (current-file environment)) (define line (current-line environment)) (extend-environment environment ;; 6.10.8 (list (object-like-macro identifier: "__FILE__" body: `((preprocessing-token (string-literal ,file)))) (object-like-macro identifier: "__LINE__" body: `((preprocessing-token (pp-number ,(number->string line)))))))) (define (c-search-path) (make-parameter (list "." "/usr/include"))) ;; #include (define (resolve-h-file string) (typecheck string string?) (cond ((path-absolute? string) string) (else (let ((filename (find file-exists? (map (lambda (path-prefix) (path-append path-prefix string)) (c-search-path))))) (if filename filename (scm-error 'cpp-error "resolve-h-file" "Can't resolve file: ~s" (list string) #f)))))) ;; #include "myheader.h" (define (resolve-q-file string) (typecheck string string?) ;; This should always be a fallback (6.10.2, p. 3) (cond (else (resolve-h-file string)))) (define defined-macro (internal-macro identifier: "defined" body: (lambda (environment tokens) (match tokens (`(((preprocessing-token (identifier ,id)))) (let ((in-env (boolean->c-boolean (in-environment? environment id)))) (lex (number->string in-env)))) (_ (scm-error 'cpp-error "defined" "Invalid parameter list to `defined': ~s" (list tokens) #f)))))) ;; environment, tokens → environment (define (handle-pragma environment tokens) (match tokens (`((preprocessing-token (identifier "STDC")) (whitespace ,_) ... (preprocessing-token (identifier ,identifier)) (whitespace ,_) ... (preprocessing-token (identifier ,on-off-switch)) (whitespace ,_) ...) ;; TODO actually do something with the pragmas (probably just store them in the environment) (format (current-error-port) "#Pragma STDC ~a ~a" identifier on-off-switch) environment) (_ (format (current-error-port) "Non-standard #Pragma: ~s~%" tokens) environment))) ;; TODO ;; (define _Pragma-macro ;; (internal-macro ;; identifier: "_Pragma" ;; body: (lambda (environment tokens) ;; ))) ;; TODO (define (resolve-constant-expression tokens) (typecheck tokens (list-of token?)) 'TODO ) (define* (pprint-macro x optional: (p (current-output-port))) (cond ((internal-macro? x) (format p "/* ~a INTERNAL MACRO */" (macro-identifier x))) ((object-macro? x) (format p "#define ~a ~a" (macro-identifier x) (unlex (macro-body x)))) ((function-macro? x) (format p "#define ~a(~a) ~a" (macro-identifier x) (string-join (macro-identifier-list x) "," 'infix) (unlex (macro-body x)))))) (define* (pprint-environment environment optional: (port (current-error-port))) (display "== Environment ==\n") (hash-for-each (lambda (key macro) (pprint-macro macro port) (newline port)) (cpp-variables environment))) (define noexpand (make-object-property)) (define (mark-noexpand tokens name) (typecheck tokens (list-of token?)) (typecheck name string?) (let ((tokens tokens)) (for-each (lambda (token) (set! (noexpand token) (cons name (noexpand token)))) tokens) tokens)) (define (list-like->list x) (if (not (pair? x)) (list x) (cons (car x) (list-like->list (cdr x))))) (define (marked-noexpand? token) (cond ((identifier-token? token) => (lambda (id) (member id (list-like->list (noexpand token))))) (else #f))) ;; Expands a token-stream which contains no pre-processing directives (#if:s, ...) (define (resolve-token-stream environment tokens) (typecheck environment cpp-environment?) (typecheck tokens (list-of token?)) ;; (pprint-environment environment) ;; (format (current-error-port) "~a~%~%" (unlex tokens)) (let loop ((environment environment) (tokens tokens)) (unless (null? tokens) (format (current-error-port) "~s [~a] [~a]~%" (car tokens) (noexpand (car tokens)) (marked-noexpand? (car tokens)))) (format (current-error-port) "~a~%" (unlex tokens)) (cond ((null? tokens) '()) ((car tokens) (lambda (x) (and (identifier-token? x) (not (marked-noexpand? x)))) => (lambda (token) (call-with-values (lambda () (maybe-extend-identifier environment (identifier-token? token) (cdr tokens))) loop))) (else (cons (car tokens) (loop environment (cdr tokens)))) ) #; (match tokens ('() '()) ((`(preprocessing-token (identifier ,id)) rest ...) (call-with-values (lambda () (maybe-extend-identifier environment id rest)) loop)) ;; ((`(whitespace ,_) rest ...) ;; (loop environment rest)) ((token rest ...) (cons token (loop environment rest)))))) ;; returns a new environment ;; handle body of #if ;; environment, (list token) → environment (define (resolve-for-if environment tokens) (typecheck environment cpp-environment?) (typecheck tokens (list-of token?)) (-> (extend-environment environment defined-macro) (resolve-token-stream tokens) resolve-constant-expression c-boolean->boolean (if (enter-active-if environment) (enter-inactive-if environment)))) ;; environment, string, (list token) → environment, (list token) (define (maybe-extend-identifier environment identifier remaining-tokens) (typecheck environment cpp-environment?) (typecheck identifier string?) (typecheck remaining-tokens (list-of token?)) ;; (typecheck continuation procedure?) ; TODO arity? (cond ((get-identifier environment identifier) => (lambda (value) (expand-macro (join-file-line environment) value remaining-tokens))) (else ; It wasn't an identifier, leave it as is (values environment (append (mark-noexpand `((preprocessing-token (identifier ,identifier))) identifier) remaining-tokens))))) (define (resolve-and-include-header environment tokens) (typecheck environment cpp-environment?) (typecheck tokens (list-of token?)) (let loop ((%first-time #t) (tokens tokens)) (match (drop-whitespace tokens) ((`(header-name (h-string ,str)) rest ...) (cond ((remove whitespace-token? rest) (negate null?) => (lambda (tokens) (scm-error 'cpp-error "resolve-and-include-header" "Unexpected tokens after #include <>: ~s" (list tokens) #f)))) (handle-preprocessing-tokens environment (-> str resolve-h-file read-file tokenize))) ((`(header-name (q-string ,str)) rest ...) (cond ((remove whitespace-token? rest) (negate null?) => (lambda (tokens) (scm-error 'cpp-error "resolve-and-include-header" "Unexpected tokens after #include <>: ~s" (list tokens) #f)))) (handle-preprocessing-tokens environment (-> str resolve-q-file read-file tokenize))) (tokens (unless %first-time (scm-error 'cpp-error "resolve-and-include-header" "Failed parsing tokens: ~s" (list tokens) #f)) (loop #f (resolve-token-stream environment tokens)))))) ;; environment, tokens → environment (define (handle-line-directive environment tokens*) (typecheck environment cpp-environment?) (typecheck tokens* (list-of token?)) (let loop ((%first-time #t) (tokens tokens*)) (match tokens (`((preprocessing-token (pp-number ,line)) (whitespace ,_) ... rest ...) (match rest (`((preprocessing-token (string-literal ,file)) (whitespace ,_) ...) (-> environment (set current-line line) (set current-file file))) (`((whitespace ,_) ...) (set environment current-line line)) (_ (unless %first-time (scm-error 'cpp-error "handle-line-directive" "Invalid line directive: ~s" (list tokens*) #f)) (loop #f (resolve-token-stream environment tokens))))) (_ (unless %first-time (scm-error 'cpp-error "handle-line-directive" "Invalid line directive: ~s" (list tokens*) #f)) (loop #f (resolve-token-stream environment tokens)))))) ;; environment, tokens → environment (define (resolve-define environment tokens) (typecheck environment cpp-environment?) (typecheck tokens (list-of token?)) (match tokens ((`(preprocessing-token (identifier ,identifier)) tail ...) (-> environment bump-line (add-identifier! identifier (match tail (('(preprocessing-token (punctuator "(")) rest ...) ;; function like macro (let ((identifier-list replacement-list (break (lambda (token) (equal? token '(preprocessing-token (punctuator ")")))) rest))) (let ((variadic? identifiers (parse-identifier-list identifier-list))) (function-like-macro identifier: identifier variadic?: variadic? identifier-list: identifiers ;; NOTE 6.10.3 states that there needs to be at least on whitespace here ;; cdr drops the end parenthesis of the definition ;; surrounding whitespace is not part of the replacement list (6.10.3 p.7) body: (drop-whitespace-both (cdr replacement-list)))))) (_ (object-like-macro identifier: identifier body: (drop-whitespace-both tail))))))))) ;; environment, tokens -> environment, tokens (define (handle-preprocessing-tokens environment tokens) (let loop ((environment environment) (tokens tokens)) (define (err fmt . args) (scm-error 'cpp-error "handle-preprocessing-tokens" (string-append "~a:~a " fmt) (cons* (current-file environment) (current-line environment) args) #f)) ;; TODO all of this needs to be surounded with a conditional for ;; environmentns if status. However, ensure that each directive ;; starts at start of line (match tokens ('() '()) ((`(whitespace "\n") `(whitespace ,_) '... '(preprocessing-token (puntuator "#")) rest ...) ;; Line tokens are those in this line, ;; while remaining tokens are the newline, follewed by the rest of the files tokens (let ((line-tokens remaining-tokens (tokens-until-eol rest))) ;; Actual tokens just removes all whitespace between "#" and "define" (let ((actual-tokens (drop-whitespace line-tokens))) (if (null? actual-tokens) (loop (bump-line environment) remaining-tokens) (match (car actual-tokens) (`(preprocessing-token (identifier "if")) (let ((environment (resolve-for-if environment actual-tokens))) (loop environment remaining-tokens))) (`(preprocessing-token (identifier "ifdef")) (match actual-tokens ((`(preprocessing-token (identifier ,id)) _ ...) (loop ((if (in-environment? environment id) enter-active-if enter-inactive-if) environment) remaining-tokens)) (_ (err "Non identifier in ifdef: ~s" actual-tokens)))) (`(preprocessing-token (identifier "ifndef")) (match actual-tokens ((`(preprocessing-token (identifier ,id)) _ ...) (loop ((if (in-environment? environment id) enter-inactive-if enter-active-if) environment) remaining-tokens)) (_ (err "Non identifier in ifndef: ~s" actual-tokens)))) ('(preprocessing-token (identifier "else")) ;; TODO 'TODO ) ('(preprocessing-token (identifier "elif")) (-> environment leave-if (resolve-for-if actual-tokens) (loop remaining-tokens))) ('(preprocessing-token (identifier "endif")) (loop (leave-if environment) remaining-tokens)) ('(preprocessing-token (identifier "include")) (call-with-values (lambda () (resolve-and-include-header environment (cdr actual-tokens))) (lambda (environment tokens) (loop environment (append tokens remaining-tokens))))) ('(preprocessing-token (identifier "define")) (let ((env (resolve-define environment (cdr actual-tokens)))) (loop env remaining-tokens)) ) ('(preprocessing-token (identifier "undef")) (loop (match actual-tokens (`((preprocessing-token (identifier ,id))) (-> environment bump-line (remove-identifier! id)))) remaining-tokens)) ('(preprocessing-token (identifier "line")) (loop (handle-line-directive environment actual-tokens) remaining-tokens)) ('(preprocessing-token (identifier "error")) ;; NOTE this is an "expected" error (throw 'cpp-error actual-tokens)) ('(preprocessing-token (identifier "pragma")) (loop (handle-pragma environment actual-tokens) remaining-tokens))))))) ((`(preprocessing-token (identifier ,id)) rest ...) (maybe-extend-identifier environment id rest loop)) (('(whitespace "\n") rest ...) (cons '(whitespace "\n") (loop (bump-line environment) rest))) ((token rest ...) (cons token (loop environment rest)))))) (define (read-file path) (call-with-input-file path (@ (ice-9 rdelim) read-string))) (define (comment->whitespace token) (match token (`(comment ,_) '(whitespace " ")) (other other))) (define (comments->whitespace tokens) (map comment->whitespace tokens)) ;;; 5.1.11.2 Translation phases (define (tokenize string) (-> string ;;; 1. trigraph replacement replace-trigraphs ;;; 2. Line folding fold-lines ;;; 3. Decomposition into preprocenning tokens, whitespaces, and comments lex ;;; 4. Execution of preprocessing directives, all preprocessing directives are then deleted comments->whitespace ;; squeeze-whitespace-blocks ))