diff options
Diffstat (limited to 'module/c/preprocessor2.scm')
-rw-r--r-- | module/c/preprocessor2.scm | 590 |
1 files changed, 590 insertions, 0 deletions
diff --git a/module/c/preprocessor2.scm b/module/c/preprocessor2.scm new file mode 100644 index 00000000..e99b1049 --- /dev/null +++ b/module/c/preprocessor2.scm @@ -0,0 +1,590 @@ +(define-module (c preprocessor2) + :use-module (srfi srfi-1) + :use-module (srfi srfi-71) + :use-module (srfi srfi-88) + :use-module (ice-9 match) + :use-module (c cpp-environment) + :use-module (c eval2) + :use-module ((c cpp-environment function-like-macro) :select (function-like-macro)) + :use-module ((c cpp-environment object-like-macro) :select (object-like-macro)) + :use-module ((c cpp-environment internal-macro) :select (internal-macro)) + :use-module ((hnh util) :select (->)) + :use-module ((hnh util lens) :select (set)) + :use-module (hnh util path) + :use-module ((c lex2) :select (lex)) + :use-module ((c trigraph) :select (replace-trigraphs)) + :use-module ((c line-fold) :select (fold-lines)) + :export ()) + +;; Returns two values: +;; - tokens until a newline token is met +;; - (potentially the newline token) and the remaining tokens +(define (tokens-until-eol tokens) + (break (lambda (token) (equal? token '(whitespace "\n"))) + tokens)) + +;; match in predicates so non-lists fail properly +(define (whitespace-token? token) + (match token + (`(whitespace ,_) #t) + (_ #f))) + +(define (unwrap-preprocessing-token token) + (match token + (`(preprocessing-token ,x) x) + (_ (scm-error 'wrong-type-arg "unwrap-preprocessing-token" + "Not a preprocessing token: ~s" (list token) + #f)))) + +(define (preprocessing-token? token) + (catch 'wrong-type-arg + (lambda () (unwrap-preprocessing-token token)) + (const #f))) + + +;; Replace all whitespace with single spaces. +(define (squeeze-whitespace tokens) + (match tokens + ('() '()) + ((`(whitespace ,_) `(whitespace ,_) rest ...) + (squeeze-whitespace (cons '(whitespace " ") rest))) + ((`(whitespace ,_) rest ...) + (cons '(whitespace " ") (squeeze-whitespace rest))) + ((token rest ...) + (cons token (squeeze-whitespace rest))))) + +;; Returns the "source" of the token, as a preprocessing string literal token +(define (stringify-token unwrapped-preprocessing-token) + (match unwrapped-preprocessing-token + (`(string-literal ,s) + (format #f "~s" s)) + (`(header-name (q-string ,s)) + (format #f "~s" s)) + (`(header-name (h-string ,s)) + (format #f "<~a>" s)) + (`(identifier ,id) id) + (`(pp-number ,n) n) + (`(character-constant ,c) + (format #f "'~a'" c)) + (`(punctuator ,p) p))) + +(define (stringify-tokens tokens) + `(preprocessing-token + (string-literal + ,(string-concatenate + (map (match-lambda (`(preprocessing-token ,body) (stringify-token body)) + (`(whitespace ,_) " ")) + (squeeze-whitespace tokens)))))) + +;; Expand ## tokens +;; TODO +(define (expand-join macro tokens) + tokens) + +;; parameters is a lexeme list, as returned by parse-parameter-list +(define (build-parameter-map macro parameters) + (if (macro-variadic? macro) + (let ((head rest (split-at parameters (length (macro-identifier-list macro))))) + ;; TODO commas (,) should be interleaved with rest + (cons (cons "__VA_ARGS__" rest) + (map cons (macro-identifier-list macro) head))) + (map cons + (macro-identifier-list macro) + parameters))) + +;; Drop leading whitespace tokens +(define (drop-whitespace tokens) + (drop-while whitespace-token? tokens)) + +(define (drop-whitespace-right tokens) + (-> tokens reverse drop-whitespace reverse)) + +(define (drop-whitespace-both tokens) + (-> tokens + drop-whitespace + drop-whitespace-right)) + +(define (expand-stringifiers macro parameter-map) + (let loop ((tokens (macro-body macro))) + (match tokens + (('(preprocessing-token (punctuator "#")) + rest ...) + (match (drop-whitespace rest) + ((`(preprocessing-token (identifier ,x)) rest ...) + (unless (member x (macro-identifier-list macro)) + (scm-error 'macro-expand-error "expand-stringifiers" + "'#' is not followed by a macro parameter: ~s" + (list x) #f)) + (cons (stringify-tokens (assoc-ref parameter-map x)) + (loop rest))))) + ('() '()) + ((token rest ...) + (cons token (loop rest)))))) + +;; expand function like macro +(define (apply-macro environment macro parameters) + (define parameter-map (build-parameter-map macro parameters)) + (define stringify-resolved (expand-stringifiers macro parameter-map)) + ;; TODO resolve ## + (define resulting-body stringify-resolved #; (expand-join macro stringify-resolved) + ) + (resolve-token-stream (extend-environment environment parameter-map) + resulting-body)) + + + +;; Expand object-like macro + +;; #define VALUE 10 +;; #define str(x) #x +;; #define OTHER str(VALUE) +;; OTHER +;; ⇒ "VALUE" + +;; token should be the token stream just after the name of the macro +(define (expand-macro environment macro tokens) + (cond ((object-macro? macro) + ;; Shouldn't we expand the macro body here? + (values environment (append (macro-body macro) tokens))) + + ((function-macro? macro) + (let ((containing remaining newlines (parse-parameter-list tokens))) + (values (bump-line environment newlines) + ;; Macro output can be macro expanded + ;; TODO self-referential macros? + (append (apply-macro environment macro containing) remaining)))) + + ((internal-macro? macro) + (let ((containing remaining newlines (parse-parameter-list tokens))) + (values (bump-line environment newlines) + (append ((macro-body macro) environment containing) + remaining)))) + + (else + (scm-error 'wrong-type-arg "expand-macro" + "Macro isn't a macro: ~s" + (list macro) #f)))) + +;; Takes a list of preprocessing tokens, and returns two values +;; if the last token was '...' +;; and a list of strings of all token names +;; Note that this is ONLY #define f(x) forms +;; not usage forms +(define (parse-identifier-list tokens) + (let loop ((tokens (remove whitespace-token? tokens)) (done '())) + (match tokens + ('() (values #f (reverse done))) + + ((`(preprocessing-token (identifier ,id)) rest ...) + (loop rest (cons id done))) + + ((`(preprocessing-token (punctuator "..."))) + (values #t (reverse done))) + + ((`(preprocessing-token (punctuator "...")) rest ...) + (scm-error 'cpp-error "parse-identifier-list" + "'...' only allowed as last argument in identifier list. Rest: ~s" + (list rest) #f)) + + ((`(preprocessing-token (punctuator ",")) rest ...) + (loop rest done)) + + ((`(preprocessing-token ,other) rest ...) + (scm-error 'cpp-error "parse-identifier-list" + "Unexpected preprocessing-token in identifier list: ~s" + (list other) #f))))) + + + +;; helper procedure to parse-parameter-list. +;; If a parameter is complex then whitespace is kept, but squeezed to single spaces. Surounding whitespace is removed. +;; Example: +;; #define str(x, y) #y +;; str(x, ( 2, 4 ) ) +;; expands to: +;; "( 2, 4 )" +;; 6.10.3.2 p 2 +(define (cleanup-whitespace tokens) + (-> tokens drop-whitespace-both squeeze-whitespace)) + +;; returns three values: +;; - a list of tokens where each is a parameter to the function like macro +;; - the remaining tokenstream +;; - how many newlines were encountered +;; The standard might call these "replacement lists" +(define (parse-parameter-list tokens) + (let %loop ((depth 0) (newlines 0) (current '()) + (parameters '()) (tokens tokens) (%first-iteration? #t)) + (define* (loop tokens key: + (depth depth) (newlines newlines) + (current current) (parameters parameters)) + (%loop depth newlines current parameters tokens #f)) + (let ((current* (if (zero? depth) + current + (cons (car tokens) current)))) + (match tokens + (('(whitespace "\n") rest ...) + (loop rest newlines: (1+ newlines) current: current*)) + ((`(whitespace ,_) rest ...) + (loop rest current: current*)) + (('(preprocessing-token (punctuator "(")) rest ...) + (loop rest depth: (1+ depth) current: current*)) + (('(preprocessing-token (punctuator ")")) rest ...) + (if (= 1 depth) + ;; return value + (values + (if (null? parameters) + (cond ((null? current) '()) + ((every whitespace-token? current) '()) + (else (reverse + (cons (cleanup-whitespace (reverse current)) + parameters)))) + (reverse + (cond ((null? current) parameters) + ((every whitespace-token? current) parameters) + (else (cons (cleanup-whitespace (reverse current)) + parameters))))) + + rest + newlines) + (loop rest + depth: (1- depth) + current: current*))) + (('(preprocessing-token (punctuator ",")) rest ...) + (if (= 1 depth) + (loop rest + current: '() + parameters: + (cons (cond ((null? current) '()) + ((every whitespace-token? current) '()) + (else (cleanup-whitespace (reverse current)))) + parameters)) + (loop rest current: current*))) + ((_ rest ...) + (loop rest current: current*)))))) + + +(define (join-file-line environment) + (define file (current-file environment)) + (define line (current-line environment)) + (extend-environment + environment + ;; 6.10.8 + (list + (object-like-macro + identifier: "__FILE__" + body: `((preprocessing-token (string-literal ,file)))) + (object-like-macro + identifier: "__LINE__" + body: `((preprocessing-token (pp-number ,(number->string line)))))))) + +(define (c-search-path) (make-parameter (list "." "/usr/include"))) + +;; #include <stdio.h> +(define (resolve-h-file string) + (cond ((path-absolute? string) string) + (else + (let ((filename + (find file-exists? + (map (lambda (path-prefix) + (path-append path-prefix string)) + (c-search-path))))) + (if filename filename + (scm-error 'cpp-error "resolve-h-file" + "Can't resolve file: ~s" + (list string) #f)))))) + +;; #include "myheader.h" +(define (resolve-q-file string) + ;; This should always be a fallback (6.10.2, p. 3) + (cond (else (resolve-h-file string)))) + +(define defined-macro + (internal-macro + identifier: "defined" + body: (lambda (environment tokens) + (match tokens + (`((preprocessing-token (identifier ,id))) + `(preprocessing-token (pp-number ,(boolean->c-boolean (in-environment? environment id))))) + (_ (scm-error 'cpp-error "defined" + "Invalid parameter list to `defined': ~s" + (list tokens) #f)))))) + +;; environment, tokens → environment +(define (handle-pragma environment tokens) + (match tokens + (`((preprocessing-token (identifier "STDC")) (whitespace ,_) ... + (preprocessing-token (identifier ,identifier)) (whitespace ,_) ... + (preprocessing-token (identifier ,on-off-switch)) (whitespace ,_) ...) + ;; TODO actually do something with the pragmas (probably just store them in the environment) + (format (current-error-port) + "#Pragma STDC ~a ~a" identifier on-off-switch) + environment) + (_ (format (current-error-port) + "Non-standard #Pragma: ~s~%" tokens) + environment))) + + +;; TODO +;; (define _Pragma-macro +;; (internal-macro +;; identifier: "_Pragma" +;; body: (lambda (environment tokens) +;; ))) + +;; TODO +(define (resolve-constant-expression tokens) + 'TODO + ) + +;; Expands a token-stream which contains no pre-processing directives (#if:s, ...) +(define (resolve-token-stream environment tokens) + (let loop ((tokens tokens)) + (match tokens + ('() '()) + ((`(preprocessing-token (identifier ,id)) rest ...) + (call-with-values (lambda () (maybe-extend-identifier environment id rest)) + (lambda (_ tokens) (loop tokens)))) + ((`(whitespace ,_) rest ...) + (loop rest)) + ((token rest ...) + (cons token (loop rest)))))) + +;; returns a new environment +;; handle body of #if +;; environment, (list token) → environment +(define (resolve-for-if environment tokens) + (-> (extend-environment environment defined-macro) + (resolve-token-stream tokens) + resolve-constant-expression + c-boolean->boolean + (if (enter-active-if environment) + (enter-inactive-if environment)))) + +;; environment, string, (list token) → environment, (list token) +(define (maybe-extend-identifier environment identifier remaining-tokens) + (cond ((get-identifier environment identifier) + => (lambda (value) (expand-macro (join-file-line environment) + value + remaining-tokens))) + (else ; It wasn't an identifier, leave it as is + ;; TODO shouldn't we include the identifier in the remaining tokens stream? + (values environment remaining-tokens)))) + +(define (resolve-and-include-header environment tokens) + (let loop ((%first-time #t) (tokens tokens)) + (match (drop-whitespace tokens) + ((`(header-name (h-string ,str)) rest ...) + (cond ((remove whitespace-token? rest) + (negate null?) + => (lambda (tokens) + (scm-error 'cpp-error "resolve-and-include-header" + "Unexpected tokens after #include <>: ~s" + (list tokens) #f)))) + (handle-preprocessing-tokens + environment + (-> str resolve-h-file read-file tokenize))) + + ((`(header-name (q-string ,str)) rest ...) + (cond ((remove whitespace-token? rest) + (negate null?) + => (lambda (tokens) + (scm-error 'cpp-error "resolve-and-include-header" + "Unexpected tokens after #include <>: ~s" + (list tokens) + #f)))) + (handle-preprocessing-tokens + environment + (-> str resolve-q-file read-file tokenize))) + + (tokens + (unless %first-time + (scm-error 'cpp-error "resolve-and-include-header" + "Failed parsing tokens: ~s" + (list tokens) #f)) + (loop #f (resolve-token-stream environment tokens)))))) + +;; environment, tokens → environment +(define (handle-line-directive environment tokens*) + (let loop ((%first-time #t) (tokens tokens*)) + (match tokens + (`((preprocessing-token (pp-number ,line)) (whitespace ,_) ... rest ...) + (match rest + (`((preprocessing-token (string-literal ,file)) (whitespace ,_) ...) + (-> environment + (set current-line line) + (set current-file file))) + (`((whitespace ,_) ...) + (set environment current-line line)) + (_ (unless %first-time + (scm-error 'cpp-error "handle-line-directive" + "Invalid line directive: ~s" + (list tokens*) #f)) + (loop #f (resolve-token-stream environment tokens))))) + (_ (unless %first-time + (scm-error 'cpp-error "handle-line-directive" + "Invalid line directive: ~s" + (list tokens*) #f)) + (loop #f (resolve-token-stream environment tokens)))))) + +;; environment, tokens → environment +(define (resolve-define environment tokens) + (match tokens + ((`(preprocessing-token (identifier ,identifier)) tail ...) + (-> environment + bump-line + (add-identifier! + identifier + (match tail + (('(preprocessing-token (punctuator "(")) rest ...) + ;; function like macro + (call-with-values (lambda () (break (lambda (token) (equal? token '(preprocessing-token (punctuator ")")))) + rest)) + (lambda (identifier-list replacement-list) + (let ((variadic? identifiers (parse-identifier-list identifier-list))) + + (function-like-macro + identifier: identifier + variadic?: variadic? + identifier-list: identifiers + ;; NOTE 6.10.3 states that there needs to be at least on whitespace here + body: (cdr replacement-list)))))) + (_ (object-like-macro + identifier: identifier + body: tail)))))))) + + + +;; environment, tokens -> environment, tokens +(define (handle-preprocessing-tokens environment tokens) + (let loop ((environment environment) (tokens tokens)) + (define (err fmt . args) + (scm-error 'cpp-error "handle-preprocessing-tokens" + (string-append "~a:~a " fmt) + (cons* (current-file environment) + (current-line environment) + args) + #f)) + + ;; TODO all of this needs to be surounded with a conditional for + ;; environmentns if status. However, ensure that each directive + ;; starts at start of line + + (match tokens + ('() '()) + ((`(whitespace "\n") `(whitespace ,_) '... '(preprocessing-token (puntuator "#")) rest ...) + ;; Line tokens are those in this line, + ;; while remaining tokens are the newline, follewed by the rest of the files tokens + (let ((line-tokens remaining-tokens (tokens-until-eol rest))) + ;; Actual tokens just removes all whitespace between "#" and "define" + (let ((actual-tokens (drop-whitespace line-tokens))) + (if (null? actual-tokens) + (loop (bump-line environment) remaining-tokens) + (match (car actual-tokens) + (`(preprocessing-token (identifier "if")) + (let ((environment (resolve-for-if environment actual-tokens))) + (loop environment remaining-tokens))) + + (`(preprocessing-token (identifier "ifdef")) + (match actual-tokens + ((`(preprocessing-token (identifier ,id)) _ ...) + (loop + ((if (in-environment? environment id) + enter-active-if enter-inactive-if) + environment) + remaining-tokens)) + (_ (err "Non identifier in ifdef: ~s" actual-tokens)))) + + (`(preprocessing-token (identifier "ifndef")) + (match actual-tokens + ((`(preprocessing-token (identifier ,id)) _ ...) + (loop + ((if (in-environment? environment id) + enter-inactive-if enter-active-if) + environment) + remaining-tokens)) + (_ (err "Non identifier in ifndef: ~s" actual-tokens)))) + + ('(preprocessing-token (identifier "else")) + ;; TODO + 'TODO + ) + + ('(preprocessing-token (identifier "elif")) + (-> environment leave-if + (resolve-for-if actual-tokens) + (loop remaining-tokens))) + + ('(preprocessing-token (identifier "endif")) + (loop (leave-if environment) remaining-tokens)) + + ('(preprocessing-token (identifier "include")) + (call-with-values + (lambda () (resolve-and-include-header environment (cdr actual-tokens))) + (lambda (environment tokens) + (loop environment (append tokens remaining-tokens))))) + + ('(preprocessing-token (identifier "define")) + (let ((env (resolve-define environment (cdr actual-tokens)))) + (loop env remaining-tokens)) + ) + + ('(preprocessing-token (identifier "undef")) + (loop (match actual-tokens + (`((preprocessing-token (identifier ,id))) + (-> environment bump-line (remove-identifier! id)))) + remaining-tokens)) + + ('(preprocessing-token (identifier "line")) + (loop (handle-line-directive environment actual-tokens) + remaining-tokens)) + + ('(preprocessing-token (identifier "error")) + ;; NOTE this is an "expected" error + (throw 'cpp-error actual-tokens)) + + ('(preprocessing-token (identifier "pragma")) + (loop (handle-pragma environment actual-tokens) + remaining-tokens))))))) + + ((`(preprocessing-token (identifier ,id)) rest ...) + (call-with-values (lambda () (maybe-extend-identifier environment id rest)) + loop)) + + (('(whitespace "\n") rest ...) + (cons '(whitespace "\n") (loop (bump-line environment) rest))) + + ((token rest ...) (cons token (loop environment rest)))))) + + + +(define (comment->whitespace expr) + (match expr + (('comment _) '(whitespace " ")) + (other other))) + +(define (read-file path) + (call-with-input-file path (@ (ice-9 rdelim) read-string))) + +(define (comment->whitespace token) + (match token + (`(comment ,_) '(whitespace " ")) + (other other))) + +(define (comments->whitespace tokens) + (map comment->whitespace tokens)) + +;;; 5.1.11.2 Translation phases + +(define (tokenize string) + (-> string +;;; 1. trigraph replacement + replace-trigraphs +;;; 2. Line folding + fold-lines +;;; 3. Decomposition into preprocenning tokens, whitespaces, and comments + lex +;;; 4. Execution of preprocessing directives, all preprocessing directives are then deleted + comments->whitespace + ;; squeeze-whitespace-blocks + )) |