aboutsummaryrefslogtreecommitdiff
path: root/module/c/preprocessor2.scm
diff options
context:
space:
mode:
Diffstat (limited to 'module/c/preprocessor2.scm')
-rw-r--r--module/c/preprocessor2.scm496
1 files changed, 496 insertions, 0 deletions
diff --git a/module/c/preprocessor2.scm b/module/c/preprocessor2.scm
new file mode 100644
index 00000000..19daabfb
--- /dev/null
+++ b/module/c/preprocessor2.scm
@@ -0,0 +1,496 @@
+(define-module (c preprocessor2)
+ :use-module (srfi srfi-1)
+ :use-module (srfi srfi-71)
+ :use-module (srfi srfi-88)
+ :use-module (ice-9 match)
+ :use-module (c cpp-environment)
+ :use-module (c eval2)
+ :use-module ((c cpp-environment function-like-macro) :select (function-like-macro))
+ :use-module ((c cpp-environment object-like-macro) :select (object-like-macro))
+ :use-module ((c cpp-environment internal-macro) :select (internal-macro))
+ :use-module ((hnh util) :select (->))
+ :use-module ((hnh util lens) :select (set))
+ :use-module (hnh util path)
+ :use-module ((c lex2) :select (lex))
+ :use-module ((c trigraph) :select (replace-trigraphs))
+ :use-module ((c line-fold) :select (fold-lines))
+ :export ())
+
+(define (tokens-until-eol tokens)
+ (break (lambda (token) (equal? token '(whitespace "\n")))
+ tokens))
+
+(define (whitespace-token? token)
+ (eq? 'whitespace (car token)))
+
+(define (preprocessing-token? token)
+ (eq? 'preprocessing-token token))
+
+(define (squeeze-whitespace tokens)
+ (match tokens
+ ('() '())
+ (`((whitespace ,_) (whitespace ,_) ,rest ...)
+ (squeeze-whitespace (cons '(whitespace " ") rest)))
+ (`((whitespace ,_) ,rest ...)
+ (cons '(whitespace " ") (squeeze-whitespace rest)))
+ ((token rest ...)
+ (cons token (squeeze-whitespace rest)))))
+
+(define (stringify-token token)
+ ;; TODO propperly implement this
+ `(preprocessing-token
+ (string-literal ,(with-output-to-string (lambda () (display token))))))
+
+(define (stringify-tokens tokens)
+ (with-output-to-string
+ (lambda ()
+ (for-each (compose display stringify-token)
+ (squeeze-whitespace tokens)))))
+
+;; Expand ## tokens
+;; TODO
+(define (expand-join macro tokens)
+ tokens)
+
+;; expand function like macro
+(define (apply-macro environment macro parameters)
+ (define parameter-map
+ (if (macro-variadic? macro)
+ (let ((head rest (split-at parameters (length (macro-identifier-list macro)))))
+ ;; TODO commas (,) should be interleaved with rest
+ (cons (cons "__VA_ARGS__" rest)
+ (map cons (macro-identifier-list macro) head)))
+ (map cons
+ (macro-identifier-list macro)
+ parameters)))
+
+ ;; resolve strigify operators
+ (define stringify-resolved
+ (let loop ((tokens (macro-body macro)))
+ (match tokens
+ (`((preprocessing-token (punctuator "#"))
+ (whitespace ,_) ...
+ (preprocessing-token (identifier ,x))
+ ,rest ...)
+ (unless (member x (macro-identifier-list macro))
+ (scm-error 'macro-expand-error "apply-macro"
+ "'#' is not followed by a macro parameter: ~s"
+ (list x) #f)
+ (cons (stringify-tokens (assoc-ref parameter-map x))
+ (loop rest))))
+ ('() '())
+ ((token rest ...)
+ (cons token (loop rest))))))
+
+ ;; TODO
+ ;; - resolve ##
+ (define resulting-body
+ (expand-join macro stringify-resolved))
+
+ ;; - subtitute parameters
+ ;; TODO what am I doing here?
+ (expand-macro (-> environment
+ (extend-environment parameter-map))
+ resulting-body))
+
+
+
+;; Expand object-like macro
+
+;; #define VALUE 10
+;; #define str(x) #x
+;; #define OTHER str(VALUE)
+;; OTHER
+;; ⇒ "VALUE"
+
+(define (expand-macro environment macro tokens)
+ (cond ((object-macro? macro)
+ (values environment (append (macro-body macro) tokens)))
+
+ ((function-macro? macro)
+ (let ((containing remaining newlines (parse-parameter-list tokens)))
+ (values (bump-line environment newlines)
+ ;; Macro output can be macro expanded
+ ;; TODO self-referential macros?
+ (append (apply-macro environment macro containing) remaining))))
+
+ ((internal-macro? macro)
+ (let ((containing remaining newlines (parse-parameter-list tokens)))
+ (values (bump-line environment newlines)
+ (append ((macro-body macro) environment containing)
+ remaining))))))
+
+;; Takes a list of preprocessing tokens, and returns two values
+;; if the last token was '...'
+;; and a list of strings of all token names
+;; Note that this is ONLY #define f(x) forms
+;; not usage forms
+(define (parse-identifier-list tokens)
+ (let loop ((tokens (remove whitespace-token? tokens)) (done '()))
+ (match tokens
+ ('() (values #f (reverse done)))
+
+ ((`(preprocessing-token (punctuation "...")))
+ (values #t (reverse done)))
+
+ ((`(preprocessing-token (identifier ,id)) rest ...)
+ (loop rest (cons id done)))
+
+ ((`(preprocessing-token (punctuation "...")) rest ...)
+ (scm-error 'cpp-error "parse-identifier-list"
+ "'...' only allowed as last argument in identifier list"
+ '() #f))
+
+ ((`(preprocessing-token ,other) rest ...)
+ (scm-error 'cpp-error "parse-identifier-list"
+ "Unexpected preprocessing-token in identifier list: ~s"
+ (list other) #f)))))
+
+
+;; returns three values:
+;; - a list of tokens where each is a parameter to the function like macro
+;; - the remaining tokenstream
+;; - how many newlines were encountered
+(define (parse-parameter-list tokens)
+ (let %loop ((depth 0) (newlines 0) (current '())
+ (parameters '()) (tokens tokens) (%first-iteration? #t))
+ (define* (loop tokens key:
+ (depth depth) (newlines newlines)
+ (current current) (parameters parameters))
+ (%loop depth newlines current parameters tokens #f))
+ (let ((current* (if (zero? depth)
+ current
+ (cons (car tokens) current))))
+ (match tokens
+ (`((whitespace "\n") ,rest ...)
+ (loop rest newlines: (1+ newlines) current: current*))
+ (`((whitespace ,_) ,rest ...)
+ (loop rest current: current*))
+ (`((preprocessing-token (punctuator "(")) ,rest ...)
+ (loop rest depth: (1+ depth) current: current*))
+ (`((preprocessing-token (punctuator ")")) ,rest ...)
+ (if (= 1 depth)
+ (values (reverse (cons (reverse current) parameters))
+ rest
+ newlines)
+ (loop rest
+ depth: (1- depth)
+ current: current*)))
+ (`((preprocessing-token (punctuator ",")) ,rest ...)
+ (if (= 1 depth)
+ (loop rest
+ current: '()
+ parameters: (cons (reverse current) parameters))
+ (loop rest current: current*)))))))
+
+
+(define (join-file-line environment)
+ (define file (current-file environment))
+ (define line (current-line environment))
+ (extend-environment
+ environment
+ ;; 6.10.8
+ `(("__FILE__" . (preprocessing-token (string-literal ,file)))
+ ("__LINE__" . (preprocessing-token (pp-number ,(number->string line)))))))
+
+(define (c-search-path) (make-parameter (list "." "/usr/include")))
+
+;; #include <stdio.h>
+(define (resolve-h-file string)
+ (cond ((path-absolute? string) string)
+ (else
+ (let ((filename
+ (find file-exists?
+ (map (lambda (path-prefix)
+ (path-append path-prefix string))
+ (c-search-path)))))
+ (if filename filename
+ (scm-error 'cpp-error "resolve-h-file"
+ "Can't resolve file: ~s"
+ (list string) #f))))))
+
+;; #include "myheader.h"
+(define (resolve-q-file string)
+ ;; This should always be a fallback (6.10.2, p. 3)
+ (cond (else (resolve-h-file string))))
+
+(define defined-macro
+ (internal-macro
+ identifier: "defined"
+ body: (lambda (environment tokens)
+ (match tokens
+ (`((preprocessor-token (identifier ,id)))
+ `(preprocessor-token (pp-number ,(boolean->c-boolean (in-environment? environment id)))))
+ (_ (scm-error 'cpp-error "defined"
+ "Invalid parameter list to `defined': ~s"
+ (list tokens) #f))))))
+
+;; environment, tokens → environment
+(define (handle-pragma environment tokens)
+ (match tokens
+ (`((preprocessing-token (identifier "STDC")) (whitespace ,_) ...
+ (preprocessing-token (identifier ,identifier)) (whitespace ,_) ...
+ (preprocessing-token (identifier ,on-off-switch)) (whitespace ,_) ...)
+ ;; TODO actually do something with the pragmas (probably just store them in the environment)
+ (format (current-error-port)
+ "#Pragma STDC ~a ~a" identifier on-off-switch)
+ environment)
+ (_ (format (current-error-port)
+ "Non-standard #Pragma: ~s~%" tokens)
+ environment)))
+
+
+;; TODO
+;; (define _Pragma-macro
+;; (internal-macro
+;; identifier: "_Pragma"
+;; body: (lambda (environment tokens)
+;; )))
+
+;; TODO
+(define (resolve-constant-expression tokens)
+ 'TODO
+ )
+
+(define (resolve-token-stream environment tokens)
+ (let loop ((tokens tokens))
+ (match tokens
+ ('() '())
+ (`((preprocessing-token (identifier ,id)) ,rest ...)
+ (call-with-values (lambda () (maybe-extend-identifier environment id rest))
+ (lambda (_ tokens) (loop tokens))))
+ (`((whitespace ,_) ,rest ...)
+ (loop rest))
+ ((token rest ...)
+ (cons token (loop rest))))))
+
+;; returns a new environment
+;; handle body of #if
+;; environment, (list token) → environment
+(define (resolve-for-if environment tokens)
+ (-> (extend-environment environment defined-macro)
+ (resolve-token-stream tokens)
+ resolve-constant-expression
+ c-boolean->boolean
+ (if (enter-active-if environment)
+ (enter-inactive-if environment))))
+
+;; environment, string, (list token) → environment, (list token)
+(define (maybe-extend-identifier environment identifier remaining-tokens)
+ (cond ((get-identifier environment identifier)
+ => (lambda (value) (expand-macro (join-file-line environment) value remaining-tokens)))
+ (else ; It wasn't an identifier, leave it as is
+ (values environment remaining-tokens))))
+
+(define (resolve-and-include-header environment tokens)
+ (let loop ((%first-time #t) (tokens tokens))
+ (match (drop-while whitespace-token? tokens)
+ ((`(header-name (h-string ,str)) rest ...)
+ (cond ((remove whitespace-token? rest)
+ (negate null?)
+ => (lambda (tokens)
+ (scm-error 'cpp-error "resolve-and-include-header"
+ "Unexpected tokens after #include <>: ~s"
+ (list tokens) #f))))
+ (handle-preprocessing-tokens
+ environment
+ (-> str resolve-h-file read-file tokenize)))
+
+ ((`(header-name (q-string ,str)) rest ...)
+ (cond ((remove whitespace-token? rest)
+ (negate null?)
+ => (lambda (tokens)
+ (scm-error 'cpp-error "resolve-and-include-header"
+ "Unexpected tokens after #include <>: ~s"
+ (list tokens)
+ #f))))
+ (handle-preprocessing-tokens
+ environment
+ (-> str resolve-q-file read-file tokenize)))
+
+ (tokens
+ (unless %first-time
+ (scm-error 'cpp-error "resolve-and-include-header"
+ "Failed parsing tokens: ~s"
+ (list tokens) #f))
+ (loop #f (resolve-token-stream environment tokens))))))
+
+;; environment, tokens → environment
+(define (handle-line-directive environment tokens*)
+ (let loop ((%first-time #t) (tokens tokens*))
+ (match tokens
+ (`((preprocessing-token (pp-number ,line)) (whitespace ,_) ... rest ...)
+ (match rest
+ (`((preprocessing-token (string-literal ,file)) (whitespace ,_) ...)
+ (-> environment
+ (set current-line line)
+ (set current-file file)))
+ (`((whitespace ,_) ...)
+ (set environment current-line line))
+ (_ (unless %first-time
+ (scm-error 'cpp-error "handle-line-directive"
+ "Invalid line directive: ~s"
+ (list tokens*) #f))
+ (loop #f (resolve-token-stream environment tokens)))))
+ (_ (unless %first-time
+ (scm-error 'cpp-error "handle-line-directive"
+ "Invalid line directive: ~s"
+ (list tokens*) #f))
+ (loop #f (resolve-token-stream environment tokens))))))
+
+;; environment, tokens → environment
+(define (resolve-define environment tokens)
+ (match tokens
+ (`((preprocessing-token (identifier ,identifier)) tail ...)
+ (-> environment
+ bump-line
+ (add-identifier!
+ identifier
+ (if (equal? '(preprocessing-token (punctuator "(")) (car tail))
+ ;; function like macro
+ (call-with-values (lambda () (break (lambda (token) (equal? token '(preprocessing-token (punctuator ")"))))
+ (cdr tail)))
+ (lambda (identifier-list replacement-list)
+ (let ((variadic? identifiers (parse-identifier-list identifier-list)))
+ (function-like-macro
+ identifier: identifier
+ variadic?: variadic?
+ identifier-list: identifiers
+ ;; NOTE 6.10.3 states that there needs to be at least on whitespace here
+ body: (cdr replacement-list)))))
+
+ (object-like-macro
+ identifier: identifier
+ body: tail)))))))
+
+
+
+;; environment, tokens -> environment, tokens
+(define (handle-preprocessing-tokens environment tokens)
+ (let loop ((environment environment) (tokens tokens))
+ (define (err fmt . args)
+ (scm-error 'cpp-error "handle-preprocessing-tokens"
+ (string-append "~a:~a " fmt)
+ (cons* (current-file environment)
+ (current-line environment)
+ args)
+ #f))
+
+ (match tokens
+ ('() '())
+ (`((whitespace "\n") (whitespace ,_) ... (preprocessing-token (puntuator "#")) ,rest ...)
+ ;; Line tokens are those in this line,
+ ;; while remaining tokens are the newline, follewed by the rest of the files tokens
+ (let ((line-tokens remaining-tokens (tokens-until-eol rest)))
+ ;; Actual tokens just removes all whitespace between "#" and "define"
+ (let ((actual-tokens (drop-while whitespace-token? line-tokens)))
+ (if (null? actual-tokens)
+ (loop (bump-line environment) remaining-tokens)
+ (match (car actual-tokens)
+ (`(preprocessing-token (identifier "if"))
+ (let ((environment (resolve-for-if environment actual-tokens)))
+ (loop environment remaining-tokens)))
+
+ (`(preprocessing-token (identifier "ifdef"))
+ (match actual-tokens
+ (`((preprocessing-token (identifier ,id)) ,_ ...)
+ (loop
+ ((if (in-environment? environment id)
+ enter-active-if enter-inactive-if)
+ environment)
+ remaining-tokens))
+ (_ (err "Non identifier in ifdef: ~s" actual-tokens))))
+
+ (`(preprocessing-token (identifier "ifndef"))
+ (match actual-tokens
+ (`((preprocessing-token (identifier ,id)) ,_ ...)
+ (loop
+ ((if (in-environment? environment id)
+ enter-inactive-if enter-active-if)
+ environment)
+ remaining-tokens))
+ (_ (err "Non identifier in ifndef: ~s" actual-tokens))))
+
+ (`(preprocessing-token (identifier "else"))
+ ;; TODO
+ 'TODO
+ )
+
+ (`(preprocessing-token (identifier "elif"))
+ (-> environment leave-if
+ (resolve-for-if actual-tokens)
+ (loop remaining-tokens)))
+
+ (`(preprocessing-token (identifier "endif"))
+ (loop (leave-if environment) remaining-tokens))
+
+ (`(preprocessing-token (identifier "include"))
+ (call-with-values
+ (lambda () (resolve-and-include-header environment (cdr actual-tokens)))
+ (lambda (environment tokens)
+ (loop environment (append tokens remaining-tokens)))))
+
+ (`(preprocessing-token (identifier "define"))
+ (let ((env (resolve-define environment (cdr actual-tokens))))
+ (loop env remaining-tokens))
+ )
+
+ (`(preprocessing-token (identifier "undef"))
+ (loop (match actual-tokens
+ (`((preprocessing-token (identifier ,id)))
+ (-> environment bump-line (remove-identifier! id))))
+ remaining-tokens))
+
+ (`(preprocessing-token (identifier "line"))
+ (loop (handle-line-directive environment actual-tokens)
+ remaining-tokens))
+
+ (`(preprocessing-token (identifier "error"))
+ ;; NOTE this is an "expected" error
+ (throw 'cpp-error actual-tokens))
+
+ (`(preprocessing-token (identifier "pragma"))
+ (loop (handle-pragma environment actual-tokens)
+ remaining-tokens)))))))
+
+ (`((preprocessing-token (identifier ,id)) ,rest ...)
+ (call-with-values (lambda () (maybe-extend-identifier environment id rest))
+ loop))
+
+ (('(whitespace "\n") rest ...)
+ (cons '(whitespace "\n") (loop (bump-line environment) rest)))
+
+ ((token rest ...) (cons token (loop environment rest))))))
+
+
+
+(define (comment->whitespace expr)
+ (match expr
+ (('comment _) '(whitespace " "))
+ (other other)))
+
+(define (read-file path)
+ (call-with-input-file path (@ (ice-9 rdelim) read-string)))
+
+(define (comment->whitespace token)
+ (match token
+ (`(comment ,_) '(whitespace " "))
+ (other other)))
+
+(define (comments->whitespace tokens)
+ (map comment->whitespace tokens))
+
+;;; 5.1.11.2 Translation phases
+
+(define (tokenize string)
+ (-> string
+;;; 1. trigraph replacement
+ replace-trigraphs
+;;; 2. Line folding
+ fold-lines
+;;; 3. Decomposition into preprocenning tokens, whitespaces, and comments
+ lex
+;;; 4. Execution of preprocessing directives, all preprocessing directives are then deleted
+ comments->whitespace
+ ;; squeeze-whitespace-blocks
+ ))