aboutsummaryrefslogtreecommitdiff
path: root/module/c/preprocessor2.scm
diff options
context:
space:
mode:
Diffstat (limited to 'module/c/preprocessor2.scm')
-rw-r--r--module/c/preprocessor2.scm590
1 files changed, 590 insertions, 0 deletions
diff --git a/module/c/preprocessor2.scm b/module/c/preprocessor2.scm
new file mode 100644
index 00000000..e99b1049
--- /dev/null
+++ b/module/c/preprocessor2.scm
@@ -0,0 +1,590 @@
+(define-module (c preprocessor2)
+ :use-module (srfi srfi-1)
+ :use-module (srfi srfi-71)
+ :use-module (srfi srfi-88)
+ :use-module (ice-9 match)
+ :use-module (c cpp-environment)
+ :use-module (c eval2)
+ :use-module ((c cpp-environment function-like-macro) :select (function-like-macro))
+ :use-module ((c cpp-environment object-like-macro) :select (object-like-macro))
+ :use-module ((c cpp-environment internal-macro) :select (internal-macro))
+ :use-module ((hnh util) :select (->))
+ :use-module ((hnh util lens) :select (set))
+ :use-module (hnh util path)
+ :use-module ((c lex2) :select (lex))
+ :use-module ((c trigraph) :select (replace-trigraphs))
+ :use-module ((c line-fold) :select (fold-lines))
+ :export ())
+
+;; Returns two values:
+;; - tokens until a newline token is met
+;; - (potentially the newline token) and the remaining tokens
+(define (tokens-until-eol tokens)
+ (break (lambda (token) (equal? token '(whitespace "\n")))
+ tokens))
+
+;; match in predicates so non-lists fail properly
+(define (whitespace-token? token)
+ (match token
+ (`(whitespace ,_) #t)
+ (_ #f)))
+
+(define (unwrap-preprocessing-token token)
+ (match token
+ (`(preprocessing-token ,x) x)
+ (_ (scm-error 'wrong-type-arg "unwrap-preprocessing-token"
+ "Not a preprocessing token: ~s" (list token)
+ #f))))
+
+(define (preprocessing-token? token)
+ (catch 'wrong-type-arg
+ (lambda () (unwrap-preprocessing-token token))
+ (const #f)))
+
+
+;; Replace all whitespace with single spaces.
+(define (squeeze-whitespace tokens)
+ (match tokens
+ ('() '())
+ ((`(whitespace ,_) `(whitespace ,_) rest ...)
+ (squeeze-whitespace (cons '(whitespace " ") rest)))
+ ((`(whitespace ,_) rest ...)
+ (cons '(whitespace " ") (squeeze-whitespace rest)))
+ ((token rest ...)
+ (cons token (squeeze-whitespace rest)))))
+
+;; Returns the "source" of the token, as a preprocessing string literal token
+(define (stringify-token unwrapped-preprocessing-token)
+ (match unwrapped-preprocessing-token
+ (`(string-literal ,s)
+ (format #f "~s" s))
+ (`(header-name (q-string ,s))
+ (format #f "~s" s))
+ (`(header-name (h-string ,s))
+ (format #f "<~a>" s))
+ (`(identifier ,id) id)
+ (`(pp-number ,n) n)
+ (`(character-constant ,c)
+ (format #f "'~a'" c))
+ (`(punctuator ,p) p)))
+
+(define (stringify-tokens tokens)
+ `(preprocessing-token
+ (string-literal
+ ,(string-concatenate
+ (map (match-lambda (`(preprocessing-token ,body) (stringify-token body))
+ (`(whitespace ,_) " "))
+ (squeeze-whitespace tokens))))))
+
+;; Expand ## tokens
+;; TODO
+(define (expand-join macro tokens)
+ tokens)
+
+;; parameters is a lexeme list, as returned by parse-parameter-list
+(define (build-parameter-map macro parameters)
+ (if (macro-variadic? macro)
+ (let ((head rest (split-at parameters (length (macro-identifier-list macro)))))
+ ;; TODO commas (,) should be interleaved with rest
+ (cons (cons "__VA_ARGS__" rest)
+ (map cons (macro-identifier-list macro) head)))
+ (map cons
+ (macro-identifier-list macro)
+ parameters)))
+
+;; Drop leading whitespace tokens
+(define (drop-whitespace tokens)
+ (drop-while whitespace-token? tokens))
+
+(define (drop-whitespace-right tokens)
+ (-> tokens reverse drop-whitespace reverse))
+
+(define (drop-whitespace-both tokens)
+ (-> tokens
+ drop-whitespace
+ drop-whitespace-right))
+
+(define (expand-stringifiers macro parameter-map)
+ (let loop ((tokens (macro-body macro)))
+ (match tokens
+ (('(preprocessing-token (punctuator "#"))
+ rest ...)
+ (match (drop-whitespace rest)
+ ((`(preprocessing-token (identifier ,x)) rest ...)
+ (unless (member x (macro-identifier-list macro))
+ (scm-error 'macro-expand-error "expand-stringifiers"
+ "'#' is not followed by a macro parameter: ~s"
+ (list x) #f))
+ (cons (stringify-tokens (assoc-ref parameter-map x))
+ (loop rest)))))
+ ('() '())
+ ((token rest ...)
+ (cons token (loop rest))))))
+
+;; expand function like macro
+(define (apply-macro environment macro parameters)
+ (define parameter-map (build-parameter-map macro parameters))
+ (define stringify-resolved (expand-stringifiers macro parameter-map))
+ ;; TODO resolve ##
+ (define resulting-body stringify-resolved #; (expand-join macro stringify-resolved)
+ )
+ (resolve-token-stream (extend-environment environment parameter-map)
+ resulting-body))
+
+
+
+;; Expand object-like macro
+
+;; #define VALUE 10
+;; #define str(x) #x
+;; #define OTHER str(VALUE)
+;; OTHER
+;; ⇒ "VALUE"
+
+;; token should be the token stream just after the name of the macro
+(define (expand-macro environment macro tokens)
+ (cond ((object-macro? macro)
+ ;; Shouldn't we expand the macro body here?
+ (values environment (append (macro-body macro) tokens)))
+
+ ((function-macro? macro)
+ (let ((containing remaining newlines (parse-parameter-list tokens)))
+ (values (bump-line environment newlines)
+ ;; Macro output can be macro expanded
+ ;; TODO self-referential macros?
+ (append (apply-macro environment macro containing) remaining))))
+
+ ((internal-macro? macro)
+ (let ((containing remaining newlines (parse-parameter-list tokens)))
+ (values (bump-line environment newlines)
+ (append ((macro-body macro) environment containing)
+ remaining))))
+
+ (else
+ (scm-error 'wrong-type-arg "expand-macro"
+ "Macro isn't a macro: ~s"
+ (list macro) #f))))
+
+;; Takes a list of preprocessing tokens, and returns two values
+;; if the last token was '...'
+;; and a list of strings of all token names
+;; Note that this is ONLY #define f(x) forms
+;; not usage forms
+(define (parse-identifier-list tokens)
+ (let loop ((tokens (remove whitespace-token? tokens)) (done '()))
+ (match tokens
+ ('() (values #f (reverse done)))
+
+ ((`(preprocessing-token (identifier ,id)) rest ...)
+ (loop rest (cons id done)))
+
+ ((`(preprocessing-token (punctuator "...")))
+ (values #t (reverse done)))
+
+ ((`(preprocessing-token (punctuator "...")) rest ...)
+ (scm-error 'cpp-error "parse-identifier-list"
+ "'...' only allowed as last argument in identifier list. Rest: ~s"
+ (list rest) #f))
+
+ ((`(preprocessing-token (punctuator ",")) rest ...)
+ (loop rest done))
+
+ ((`(preprocessing-token ,other) rest ...)
+ (scm-error 'cpp-error "parse-identifier-list"
+ "Unexpected preprocessing-token in identifier list: ~s"
+ (list other) #f)))))
+
+
+
+;; helper procedure to parse-parameter-list.
+;; If a parameter is complex then whitespace is kept, but squeezed to single spaces. Surounding whitespace is removed.
+;; Example:
+;; #define str(x, y) #y
+;; str(x, ( 2, 4 ) )
+;; expands to:
+;; "( 2, 4 )"
+;; 6.10.3.2 p 2
+(define (cleanup-whitespace tokens)
+ (-> tokens drop-whitespace-both squeeze-whitespace))
+
+;; returns three values:
+;; - a list of tokens where each is a parameter to the function like macro
+;; - the remaining tokenstream
+;; - how many newlines were encountered
+;; The standard might call these "replacement lists"
+(define (parse-parameter-list tokens)
+ (let %loop ((depth 0) (newlines 0) (current '())
+ (parameters '()) (tokens tokens) (%first-iteration? #t))
+ (define* (loop tokens key:
+ (depth depth) (newlines newlines)
+ (current current) (parameters parameters))
+ (%loop depth newlines current parameters tokens #f))
+ (let ((current* (if (zero? depth)
+ current
+ (cons (car tokens) current))))
+ (match tokens
+ (('(whitespace "\n") rest ...)
+ (loop rest newlines: (1+ newlines) current: current*))
+ ((`(whitespace ,_) rest ...)
+ (loop rest current: current*))
+ (('(preprocessing-token (punctuator "(")) rest ...)
+ (loop rest depth: (1+ depth) current: current*))
+ (('(preprocessing-token (punctuator ")")) rest ...)
+ (if (= 1 depth)
+ ;; return value
+ (values
+ (if (null? parameters)
+ (cond ((null? current) '())
+ ((every whitespace-token? current) '())
+ (else (reverse
+ (cons (cleanup-whitespace (reverse current))
+ parameters))))
+ (reverse
+ (cond ((null? current) parameters)
+ ((every whitespace-token? current) parameters)
+ (else (cons (cleanup-whitespace (reverse current))
+ parameters)))))
+
+ rest
+ newlines)
+ (loop rest
+ depth: (1- depth)
+ current: current*)))
+ (('(preprocessing-token (punctuator ",")) rest ...)
+ (if (= 1 depth)
+ (loop rest
+ current: '()
+ parameters:
+ (cons (cond ((null? current) '())
+ ((every whitespace-token? current) '())
+ (else (cleanup-whitespace (reverse current))))
+ parameters))
+ (loop rest current: current*)))
+ ((_ rest ...)
+ (loop rest current: current*))))))
+
+
+(define (join-file-line environment)
+ (define file (current-file environment))
+ (define line (current-line environment))
+ (extend-environment
+ environment
+ ;; 6.10.8
+ (list
+ (object-like-macro
+ identifier: "__FILE__"
+ body: `((preprocessing-token (string-literal ,file))))
+ (object-like-macro
+ identifier: "__LINE__"
+ body: `((preprocessing-token (pp-number ,(number->string line))))))))
+
+(define (c-search-path) (make-parameter (list "." "/usr/include")))
+
+;; #include <stdio.h>
+(define (resolve-h-file string)
+ (cond ((path-absolute? string) string)
+ (else
+ (let ((filename
+ (find file-exists?
+ (map (lambda (path-prefix)
+ (path-append path-prefix string))
+ (c-search-path)))))
+ (if filename filename
+ (scm-error 'cpp-error "resolve-h-file"
+ "Can't resolve file: ~s"
+ (list string) #f))))))
+
+;; #include "myheader.h"
+(define (resolve-q-file string)
+ ;; This should always be a fallback (6.10.2, p. 3)
+ (cond (else (resolve-h-file string))))
+
+(define defined-macro
+ (internal-macro
+ identifier: "defined"
+ body: (lambda (environment tokens)
+ (match tokens
+ (`((preprocessing-token (identifier ,id)))
+ `(preprocessing-token (pp-number ,(boolean->c-boolean (in-environment? environment id)))))
+ (_ (scm-error 'cpp-error "defined"
+ "Invalid parameter list to `defined': ~s"
+ (list tokens) #f))))))
+
+;; environment, tokens → environment
+(define (handle-pragma environment tokens)
+ (match tokens
+ (`((preprocessing-token (identifier "STDC")) (whitespace ,_) ...
+ (preprocessing-token (identifier ,identifier)) (whitespace ,_) ...
+ (preprocessing-token (identifier ,on-off-switch)) (whitespace ,_) ...)
+ ;; TODO actually do something with the pragmas (probably just store them in the environment)
+ (format (current-error-port)
+ "#Pragma STDC ~a ~a" identifier on-off-switch)
+ environment)
+ (_ (format (current-error-port)
+ "Non-standard #Pragma: ~s~%" tokens)
+ environment)))
+
+
+;; TODO
+;; (define _Pragma-macro
+;; (internal-macro
+;; identifier: "_Pragma"
+;; body: (lambda (environment tokens)
+;; )))
+
+;; TODO
+(define (resolve-constant-expression tokens)
+ 'TODO
+ )
+
+;; Expands a token-stream which contains no pre-processing directives (#if:s, ...)
+(define (resolve-token-stream environment tokens)
+ (let loop ((tokens tokens))
+ (match tokens
+ ('() '())
+ ((`(preprocessing-token (identifier ,id)) rest ...)
+ (call-with-values (lambda () (maybe-extend-identifier environment id rest))
+ (lambda (_ tokens) (loop tokens))))
+ ((`(whitespace ,_) rest ...)
+ (loop rest))
+ ((token rest ...)
+ (cons token (loop rest))))))
+
+;; returns a new environment
+;; handle body of #if
+;; environment, (list token) → environment
+(define (resolve-for-if environment tokens)
+ (-> (extend-environment environment defined-macro)
+ (resolve-token-stream tokens)
+ resolve-constant-expression
+ c-boolean->boolean
+ (if (enter-active-if environment)
+ (enter-inactive-if environment))))
+
+;; environment, string, (list token) → environment, (list token)
+(define (maybe-extend-identifier environment identifier remaining-tokens)
+ (cond ((get-identifier environment identifier)
+ => (lambda (value) (expand-macro (join-file-line environment)
+ value
+ remaining-tokens)))
+ (else ; It wasn't an identifier, leave it as is
+ ;; TODO shouldn't we include the identifier in the remaining tokens stream?
+ (values environment remaining-tokens))))
+
+(define (resolve-and-include-header environment tokens)
+ (let loop ((%first-time #t) (tokens tokens))
+ (match (drop-whitespace tokens)
+ ((`(header-name (h-string ,str)) rest ...)
+ (cond ((remove whitespace-token? rest)
+ (negate null?)
+ => (lambda (tokens)
+ (scm-error 'cpp-error "resolve-and-include-header"
+ "Unexpected tokens after #include <>: ~s"
+ (list tokens) #f))))
+ (handle-preprocessing-tokens
+ environment
+ (-> str resolve-h-file read-file tokenize)))
+
+ ((`(header-name (q-string ,str)) rest ...)
+ (cond ((remove whitespace-token? rest)
+ (negate null?)
+ => (lambda (tokens)
+ (scm-error 'cpp-error "resolve-and-include-header"
+ "Unexpected tokens after #include <>: ~s"
+ (list tokens)
+ #f))))
+ (handle-preprocessing-tokens
+ environment
+ (-> str resolve-q-file read-file tokenize)))
+
+ (tokens
+ (unless %first-time
+ (scm-error 'cpp-error "resolve-and-include-header"
+ "Failed parsing tokens: ~s"
+ (list tokens) #f))
+ (loop #f (resolve-token-stream environment tokens))))))
+
+;; environment, tokens → environment
+(define (handle-line-directive environment tokens*)
+ (let loop ((%first-time #t) (tokens tokens*))
+ (match tokens
+ (`((preprocessing-token (pp-number ,line)) (whitespace ,_) ... rest ...)
+ (match rest
+ (`((preprocessing-token (string-literal ,file)) (whitespace ,_) ...)
+ (-> environment
+ (set current-line line)
+ (set current-file file)))
+ (`((whitespace ,_) ...)
+ (set environment current-line line))
+ (_ (unless %first-time
+ (scm-error 'cpp-error "handle-line-directive"
+ "Invalid line directive: ~s"
+ (list tokens*) #f))
+ (loop #f (resolve-token-stream environment tokens)))))
+ (_ (unless %first-time
+ (scm-error 'cpp-error "handle-line-directive"
+ "Invalid line directive: ~s"
+ (list tokens*) #f))
+ (loop #f (resolve-token-stream environment tokens))))))
+
+;; environment, tokens → environment
+(define (resolve-define environment tokens)
+ (match tokens
+ ((`(preprocessing-token (identifier ,identifier)) tail ...)
+ (-> environment
+ bump-line
+ (add-identifier!
+ identifier
+ (match tail
+ (('(preprocessing-token (punctuator "(")) rest ...)
+ ;; function like macro
+ (call-with-values (lambda () (break (lambda (token) (equal? token '(preprocessing-token (punctuator ")"))))
+ rest))
+ (lambda (identifier-list replacement-list)
+ (let ((variadic? identifiers (parse-identifier-list identifier-list)))
+
+ (function-like-macro
+ identifier: identifier
+ variadic?: variadic?
+ identifier-list: identifiers
+ ;; NOTE 6.10.3 states that there needs to be at least on whitespace here
+ body: (cdr replacement-list))))))
+ (_ (object-like-macro
+ identifier: identifier
+ body: tail))))))))
+
+
+
+;; environment, tokens -> environment, tokens
+(define (handle-preprocessing-tokens environment tokens)
+ (let loop ((environment environment) (tokens tokens))
+ (define (err fmt . args)
+ (scm-error 'cpp-error "handle-preprocessing-tokens"
+ (string-append "~a:~a " fmt)
+ (cons* (current-file environment)
+ (current-line environment)
+ args)
+ #f))
+
+ ;; TODO all of this needs to be surounded with a conditional for
+ ;; environmentns if status. However, ensure that each directive
+ ;; starts at start of line
+
+ (match tokens
+ ('() '())
+ ((`(whitespace "\n") `(whitespace ,_) '... '(preprocessing-token (puntuator "#")) rest ...)
+ ;; Line tokens are those in this line,
+ ;; while remaining tokens are the newline, follewed by the rest of the files tokens
+ (let ((line-tokens remaining-tokens (tokens-until-eol rest)))
+ ;; Actual tokens just removes all whitespace between "#" and "define"
+ (let ((actual-tokens (drop-whitespace line-tokens)))
+ (if (null? actual-tokens)
+ (loop (bump-line environment) remaining-tokens)
+ (match (car actual-tokens)
+ (`(preprocessing-token (identifier "if"))
+ (let ((environment (resolve-for-if environment actual-tokens)))
+ (loop environment remaining-tokens)))
+
+ (`(preprocessing-token (identifier "ifdef"))
+ (match actual-tokens
+ ((`(preprocessing-token (identifier ,id)) _ ...)
+ (loop
+ ((if (in-environment? environment id)
+ enter-active-if enter-inactive-if)
+ environment)
+ remaining-tokens))
+ (_ (err "Non identifier in ifdef: ~s" actual-tokens))))
+
+ (`(preprocessing-token (identifier "ifndef"))
+ (match actual-tokens
+ ((`(preprocessing-token (identifier ,id)) _ ...)
+ (loop
+ ((if (in-environment? environment id)
+ enter-inactive-if enter-active-if)
+ environment)
+ remaining-tokens))
+ (_ (err "Non identifier in ifndef: ~s" actual-tokens))))
+
+ ('(preprocessing-token (identifier "else"))
+ ;; TODO
+ 'TODO
+ )
+
+ ('(preprocessing-token (identifier "elif"))
+ (-> environment leave-if
+ (resolve-for-if actual-tokens)
+ (loop remaining-tokens)))
+
+ ('(preprocessing-token (identifier "endif"))
+ (loop (leave-if environment) remaining-tokens))
+
+ ('(preprocessing-token (identifier "include"))
+ (call-with-values
+ (lambda () (resolve-and-include-header environment (cdr actual-tokens)))
+ (lambda (environment tokens)
+ (loop environment (append tokens remaining-tokens)))))
+
+ ('(preprocessing-token (identifier "define"))
+ (let ((env (resolve-define environment (cdr actual-tokens))))
+ (loop env remaining-tokens))
+ )
+
+ ('(preprocessing-token (identifier "undef"))
+ (loop (match actual-tokens
+ (`((preprocessing-token (identifier ,id)))
+ (-> environment bump-line (remove-identifier! id))))
+ remaining-tokens))
+
+ ('(preprocessing-token (identifier "line"))
+ (loop (handle-line-directive environment actual-tokens)
+ remaining-tokens))
+
+ ('(preprocessing-token (identifier "error"))
+ ;; NOTE this is an "expected" error
+ (throw 'cpp-error actual-tokens))
+
+ ('(preprocessing-token (identifier "pragma"))
+ (loop (handle-pragma environment actual-tokens)
+ remaining-tokens)))))))
+
+ ((`(preprocessing-token (identifier ,id)) rest ...)
+ (call-with-values (lambda () (maybe-extend-identifier environment id rest))
+ loop))
+
+ (('(whitespace "\n") rest ...)
+ (cons '(whitespace "\n") (loop (bump-line environment) rest)))
+
+ ((token rest ...) (cons token (loop environment rest))))))
+
+
+
+(define (comment->whitespace expr)
+ (match expr
+ (('comment _) '(whitespace " "))
+ (other other)))
+
+(define (read-file path)
+ (call-with-input-file path (@ (ice-9 rdelim) read-string)))
+
+(define (comment->whitespace token)
+ (match token
+ (`(comment ,_) '(whitespace " "))
+ (other other)))
+
+(define (comments->whitespace tokens)
+ (map comment->whitespace tokens))
+
+;;; 5.1.11.2 Translation phases
+
+(define (tokenize string)
+ (-> string
+;;; 1. trigraph replacement
+ replace-trigraphs
+;;; 2. Line folding
+ fold-lines
+;;; 3. Decomposition into preprocenning tokens, whitespaces, and comments
+ lex
+;;; 4. Execution of preprocessing directives, all preprocessing directives are then deleted
+ comments->whitespace
+ ;; squeeze-whitespace-blocks
+ ))