aboutsummaryrefslogtreecommitdiff
path: root/module/c/preprocessor2.scm
diff options
context:
space:
mode:
Diffstat (limited to 'module/c/preprocessor2.scm')
-rw-r--r--module/c/preprocessor2.scm336
1 files changed, 267 insertions, 69 deletions
diff --git a/module/c/preprocessor2.scm b/module/c/preprocessor2.scm
index e99b1049..0bb101f8 100644
--- a/module/c/preprocessor2.scm
+++ b/module/c/preprocessor2.scm
@@ -5,21 +5,58 @@
:use-module (ice-9 match)
:use-module (c cpp-environment)
:use-module (c eval2)
- :use-module ((c cpp-environment function-like-macro) :select (function-like-macro))
+ :use-module ((c cpp-environment function-like-macro)
+ :select (function-like-macro variadic? identifier-list))
:use-module ((c cpp-environment object-like-macro) :select (object-like-macro))
:use-module ((c cpp-environment internal-macro) :select (internal-macro))
- :use-module ((hnh util) :select (->))
+ :use-module ((hnh util) :select (-> intersperse))
:use-module ((hnh util lens) :select (set))
:use-module (hnh util path)
+ :use-module (hnh util type)
:use-module ((c lex2) :select (lex))
:use-module ((c trigraph) :select (replace-trigraphs))
:use-module ((c line-fold) :select (fold-lines))
:export ())
+;;; Call graph
+;; squeeze-whitespace
+;; stringify-tokens
+;; expand-join
+;; build-parameter-map
+
+;; apply-macro
+;; - build-parameter-map
+;; - stringify-tokens
+;; - expand-join
+
+;; expand-macro
+;; - parse-parameter-list
+;; - apply-macro
+
+;; parse-parameter-list
+
+;; resolve-token-stream
+;; - maybe-extend-identifier
+
+;; maybe-extend-identifier
+;; - expand-macro
+
+;; resolve-define
+;; - parse-identifier-list
+
+;; expand-stringifiers
+;; - stringify-tokens
+
+;;;
+
+(define-syntax-rule (parameter-map? x)
+ (typecheck x (list-of (pair-of string? (list-of token?)))))
+
;; Returns two values:
;; - tokens until a newline token is met
;; - (potentially the newline token) and the remaining tokens
(define (tokens-until-eol tokens)
+ (typecheck tokens (list-of token?))
(break (lambda (token) (equal? token '(whitespace "\n")))
tokens))
@@ -29,6 +66,11 @@
(`(whitespace ,_) #t)
(_ #f)))
+(define (identifier-token? token)
+ (match token
+ (`(preprocessing-token (identifier ,id)) id)
+ (_ #f)))
+
(define (unwrap-preprocessing-token token)
(match token
(`(preprocessing-token ,x) x)
@@ -41,6 +83,10 @@
(lambda () (unwrap-preprocessing-token token))
(const #f)))
+(define (token? x)
+ (or (preprocessing-token? x)
+ (whitespace-token? x)))
+
;; Replace all whitespace with single spaces.
(define (squeeze-whitespace tokens)
@@ -68,43 +114,70 @@
(format #f "'~a'" c))
(`(punctuator ,p) p)))
+;; takes a token list, and return a single string literal token
(define (stringify-tokens tokens)
- `(preprocessing-token
- (string-literal
- ,(string-concatenate
- (map (match-lambda (`(preprocessing-token ,body) (stringify-token body))
- (`(whitespace ,_) " "))
- (squeeze-whitespace tokens))))))
+ `(preprocessing-token (string-literal ,(unlex tokens))))
+
+;; takes a list of preprocessing-token's, and return a "source" string
+(define (unlex tokens)
+ (typecheck tokens (list-of token?))
+ (string-concatenate
+ (map (match-lambda (`(preprocessing-token ,body) (stringify-token body))
+ (`(whitespace ,_) " "))
+ (squeeze-whitespace tokens))))
+
;; Expand ## tokens
;; TODO
(define (expand-join macro tokens)
+ (typecheck macro macro?)
+ (typecheck tokens (list-of token?))
tokens)
;; parameters is a lexeme list, as returned by parse-parameter-list
(define (build-parameter-map macro parameters)
+ (typecheck macro macro?)
+ (typecheck parameters (list-of (list-of token?)))
(if (macro-variadic? macro)
(let ((head rest (split-at parameters (length (macro-identifier-list macro)))))
- ;; TODO commas (,) should be interleaved with rest
- (cons (cons "__VA_ARGS__" rest)
+ (cons (cons "__VA_ARGS__" (concatenate (intersperse
+ '((preprocessing-token (punctuator ",")))
+ rest)))
(map cons (macro-identifier-list macro) head)))
(map cons
(macro-identifier-list macro)
parameters)))
+
+;; TODO Deprecate?
+(define (parameter-map->macro-list param-map)
+ (typecheck param-map parameter-map?)
+ (map (lambda (pair)
+ (let ((identifier (car pair))
+ (body (cdr pair)))
+ (object-like-macro
+ identifier: identifier
+ body: body)))
+ param-map))
+
;; Drop leading whitespace tokens
(define (drop-whitespace tokens)
+ (typecheck tokens (list-of token?))
(drop-while whitespace-token? tokens))
(define (drop-whitespace-right tokens)
+ (typecheck tokens (list-of token?))
(-> tokens reverse drop-whitespace reverse))
(define (drop-whitespace-both tokens)
+ (typecheck tokens (list-of token?))
(-> tokens
drop-whitespace
drop-whitespace-right))
(define (expand-stringifiers macro parameter-map)
+ (typecheck macro macro?)
+ (typecheck parameter-map parameter-map?)
(let loop ((tokens (macro-body macro)))
(match tokens
(('(preprocessing-token (punctuator "#"))
@@ -123,13 +196,45 @@
;; expand function like macro
(define (apply-macro environment macro parameters)
- (define parameter-map (build-parameter-map macro parameters))
- (define stringify-resolved (expand-stringifiers macro parameter-map))
- ;; TODO resolve ##
- (define resulting-body stringify-resolved #; (expand-join macro stringify-resolved)
- )
- (resolve-token-stream (extend-environment environment parameter-map)
- resulting-body))
+ (typecheck environment cpp-environment?)
+ ;; Each element should be the lexeme list for that argument
+ (typecheck parameters (list-of (list-of token?)))
+ (typecheck macro macro?)
+ (when (or (and (variadic? macro)
+ (> (length (identifier-list macro))
+ (length parameters)))
+ (and (not (variadic? macro))
+ (not (= (length (identifier-list macro))
+ (length parameters)))))
+ (scm-error 'cpp-arity-error "apply-macro"
+ ;; TODO better error message for variadic macros
+ "Wrong number of arguments to macro ~s, expected ~s, got ~s"
+ (list (macro-identifier macro)
+ (length (identifier-list macro))
+ (length parameters))
+ (list macro)))
+ (let ()
+ (define parameter-map (build-parameter-map macro parameters))
+ (define stringify-resolved (expand-stringifiers macro parameter-map))
+ ;; TODO resolve ##
+ (define resulting-body stringify-resolved #; (expand-join macro stringify-resolved))
+ (define (bound-identifier? id)
+ (member id (if (variadic? macro)
+ (cons "__VA_ARGS__" (identifier-list macro))
+ (identifier-list macro))))
+ (let loop ((tokens resulting-body))
+ (cond ((null? tokens) '())
+ ;; TODO the parameters should be macro-expanded before being inserted
+ ((identifier-token? (car tokens))
+ bound-identifier?
+ => (lambda (id) (append (assoc-ref parameter-map id)
+ (loop (cdr tokens)))))
+ (else (cons (car tokens)
+ (loop (cdr tokens))))))
+ #;
+ (let ((env (extend-environment environment
+ (parameter-map->macro-list parameter-map))))
+ (resolve-token-stream env resulting-body))))
@@ -142,28 +247,32 @@
;; ⇒ "VALUE"
;; token should be the token stream just after the name of the macro
-(define (expand-macro environment macro tokens)
- (cond ((object-macro? macro)
- ;; Shouldn't we expand the macro body here?
- (values environment (append (macro-body macro) tokens)))
-
- ((function-macro? macro)
- (let ((containing remaining newlines (parse-parameter-list tokens)))
- (values (bump-line environment newlines)
- ;; Macro output can be macro expanded
- ;; TODO self-referential macros?
- (append (apply-macro environment macro containing) remaining))))
-
- ((internal-macro? macro)
- (let ((containing remaining newlines (parse-parameter-list tokens)))
- (values (bump-line environment newlines)
- (append ((macro-body macro) environment containing)
- remaining))))
-
- (else
- (scm-error 'wrong-type-arg "expand-macro"
- "Macro isn't a macro: ~s"
- (list macro) #f))))
+(define (expand-macro environment macro remaining-tokens)
+ (typecheck environment cpp-environment?)
+ (typecheck remaining-tokens (list-of token?))
+ (let ((name (macro-identifier macro)))
+ (cond ((object-macro? macro)
+ (values environment (append (mark-noexpand (macro-body macro) name)
+ remaining-tokens)))
+
+ ((function-macro? macro)
+ (let ((containing remaining newlines (parse-parameter-list remaining-tokens)))
+ (values (bump-line environment newlines)
+ (append (mark-noexpand (apply-macro environment macro containing)
+ name)
+ remaining))))
+
+ ((internal-macro? macro)
+ (let ((containing remaining newlines (parse-parameter-list remaining-tokens)))
+ (values (bump-line environment newlines)
+ (append (mark-noexpand ((macro-body macro) environment containing)
+ name)
+ remaining))))
+
+ (else
+ (scm-error 'wrong-type-arg "expand-macro"
+ "Macro isn't a macro: ~s"
+ (list macro) #f)))))
;; Takes a list of preprocessing tokens, and returns two values
;; if the last token was '...'
@@ -171,6 +280,7 @@
;; Note that this is ONLY #define f(x) forms
;; not usage forms
(define (parse-identifier-list tokens)
+ (typecheck tokens (list-of token?))
(let loop ((tokens (remove whitespace-token? tokens)) (done '()))
(match tokens
('() (values #f (reverse done)))
@@ -205,6 +315,7 @@
;; "( 2, 4 )"
;; 6.10.3.2 p 2
(define (cleanup-whitespace tokens)
+ (typecheck tokens (list-of token?))
(-> tokens drop-whitespace-both squeeze-whitespace))
;; returns three values:
@@ -212,9 +323,10 @@
;; - the remaining tokenstream
;; - how many newlines were encountered
;; The standard might call these "replacement lists"
-(define (parse-parameter-list tokens)
+(define (parse-parameter-list tokens*)
+ (typecheck tokens* (list-of token?))
(let %loop ((depth 0) (newlines 0) (current '())
- (parameters '()) (tokens tokens) (%first-iteration? #t))
+ (parameters '()) (tokens tokens*) (%first-iteration? #t))
(define* (loop tokens key:
(depth depth) (newlines newlines)
(current current) (parameters parameters))
@@ -223,6 +335,9 @@
current
(cons (car tokens) current))))
(match tokens
+ (() (scm-error 'misc-error "parse-parameter-list"
+ "Ran out of tokens while parsing: ~s"
+ (list tokens*) #f))
(('(whitespace "\n") rest ...)
(loop rest newlines: (1+ newlines) current: current*))
((`(whitespace ,_) rest ...)
@@ -264,6 +379,7 @@
(loop rest current: current*))))))
+;; Add __FILE__ and __LINE__ object macros to the environment
(define (join-file-line environment)
(define file (current-file environment))
(define line (current-line environment))
@@ -282,6 +398,7 @@
;; #include <stdio.h>
(define (resolve-h-file string)
+ (typecheck string string?)
(cond ((path-absolute? string) string)
(else
(let ((filename
@@ -296,6 +413,7 @@
;; #include "myheader.h"
(define (resolve-q-file string)
+ (typecheck string string?)
;; This should always be a fallback (6.10.2, p. 3)
(cond (else (resolve-h-file string))))
@@ -304,8 +422,9 @@
identifier: "defined"
body: (lambda (environment tokens)
(match tokens
- (`((preprocessing-token (identifier ,id)))
- `(preprocessing-token (pp-number ,(boolean->c-boolean (in-environment? environment id)))))
+ (`(((preprocessing-token (identifier ,id))))
+ (let ((in-env (boolean->c-boolean (in-environment? environment id))))
+ (lex (number->string in-env))))
(_ (scm-error 'cpp-error "defined"
"Invalid parameter list to `defined': ~s"
(list tokens) #f))))))
@@ -334,26 +453,94 @@
;; TODO
(define (resolve-constant-expression tokens)
+ (typecheck tokens (list-of token?))
'TODO
)
+(define* (pprint-macro x optional: (p (current-output-port)))
+ (cond ((internal-macro? x)
+ (format p "/* ~a INTERNAL MACRO */"
+ (macro-identifier x)))
+ ((object-macro? x)
+ (format p "#define ~a ~a"
+ (macro-identifier x)
+ (unlex (macro-body x))))
+ ((function-macro? x)
+ (format p "#define ~a(~a) ~a"
+ (macro-identifier x)
+ (string-join (macro-identifier-list x) "," 'infix)
+ (unlex (macro-body x))))))
+
+(define* (pprint-environment environment optional: (port (current-error-port)))
+ (display "== Environment ==\n")
+ (hash-for-each (lambda (key macro)
+ (pprint-macro macro port)
+ (newline port))
+ (cpp-variables environment)))
+
+(define noexpand (make-object-property))
+
+(define (mark-noexpand tokens name)
+ (typecheck tokens (list-of token?))
+ (typecheck name string?)
+ (let ((tokens tokens))
+ (for-each (lambda (token) (set! (noexpand token) (cons name (noexpand token)))) tokens)
+ tokens))
+
+(define (list-like->list x)
+ (if (not (pair? x))
+ (list x)
+ (cons (car x) (list-like->list (cdr x)))))
+
+(define (marked-noexpand? token)
+ (cond ((identifier-token? token)
+ => (lambda (id) (member id (list-like->list (noexpand token)))))
+ (else #f)))
+
;; Expands a token-stream which contains no pre-processing directives (#if:s, ...)
(define (resolve-token-stream environment tokens)
- (let loop ((tokens tokens))
+ (typecheck environment cpp-environment?)
+ (typecheck tokens (list-of token?))
+ ;; (pprint-environment environment)
+ ;; (format (current-error-port) "~a~%~%" (unlex tokens))
+ (let loop ((environment environment) (tokens tokens))
+ (unless (null? tokens)
+ (format (current-error-port) "~s [~a] [~a]~%"
+ (car tokens)
+ (noexpand (car tokens))
+ (marked-noexpand? (car tokens))))
+ (format (current-error-port) "~a~%" (unlex tokens))
+ (cond ((null? tokens) '())
+ ((car tokens)
+ (lambda (x) (and (identifier-token? x)
+ (not (marked-noexpand? x))))
+ => (lambda (token)
+ (call-with-values
+ (lambda () (maybe-extend-identifier environment
+ (identifier-token? token)
+ (cdr tokens)))
+ loop)))
+ (else (cons (car tokens)
+ (loop environment (cdr tokens))))
+ )
+ #;
(match tokens
('() '())
((`(preprocessing-token (identifier ,id)) rest ...)
(call-with-values (lambda () (maybe-extend-identifier environment id rest))
- (lambda (_ tokens) (loop tokens))))
- ((`(whitespace ,_) rest ...)
- (loop rest))
+ loop))
+ ;; ((`(whitespace ,_) rest ...)
+ ;; (loop environment rest))
((token rest ...)
- (cons token (loop rest))))))
+ (cons token (loop environment rest))))))
;; returns a new environment
;; handle body of #if
;; environment, (list token) → environment
(define (resolve-for-if environment tokens)
+ (typecheck environment cpp-environment?)
+ (typecheck tokens (list-of token?))
+
(-> (extend-environment environment defined-macro)
(resolve-token-stream tokens)
resolve-constant-expression
@@ -363,15 +550,24 @@
;; environment, string, (list token) → environment, (list token)
(define (maybe-extend-identifier environment identifier remaining-tokens)
+ (typecheck environment cpp-environment?)
+ (typecheck identifier string?)
+ (typecheck remaining-tokens (list-of token?))
+ ;; (typecheck continuation procedure?) ; TODO arity?
(cond ((get-identifier environment identifier)
=> (lambda (value) (expand-macro (join-file-line environment)
value
remaining-tokens)))
(else ; It wasn't an identifier, leave it as is
- ;; TODO shouldn't we include the identifier in the remaining tokens stream?
- (values environment remaining-tokens))))
+ (values environment
+ (append (mark-noexpand `((preprocessing-token (identifier ,identifier)))
+ identifier)
+ remaining-tokens)))))
(define (resolve-and-include-header environment tokens)
+ (typecheck environment cpp-environment?)
+ (typecheck tokens (list-of token?))
+
(let loop ((%first-time #t) (tokens tokens))
(match (drop-whitespace tokens)
((`(header-name (h-string ,str)) rest ...)
@@ -406,6 +602,9 @@
;; environment, tokens → environment
(define (handle-line-directive environment tokens*)
+ (typecheck environment cpp-environment?)
+ (typecheck tokens* (list-of token?))
+
(let loop ((%first-time #t) (tokens tokens*))
(match tokens
(`((preprocessing-token (pp-number ,line)) (whitespace ,_) ... rest ...)
@@ -429,6 +628,9 @@
;; environment, tokens → environment
(define (resolve-define environment tokens)
+ (typecheck environment cpp-environment?)
+ (typecheck tokens (list-of token?))
+
(match tokens
((`(preprocessing-token (identifier ,identifier)) tail ...)
(-> environment
@@ -438,20 +640,22 @@
(match tail
(('(preprocessing-token (punctuator "(")) rest ...)
;; function like macro
- (call-with-values (lambda () (break (lambda (token) (equal? token '(preprocessing-token (punctuator ")"))))
- rest))
- (lambda (identifier-list replacement-list)
- (let ((variadic? identifiers (parse-identifier-list identifier-list)))
-
- (function-like-macro
- identifier: identifier
- variadic?: variadic?
- identifier-list: identifiers
- ;; NOTE 6.10.3 states that there needs to be at least on whitespace here
- body: (cdr replacement-list))))))
+ (let ((identifier-list
+ replacement-list
+ (break (lambda (token) (equal? token '(preprocessing-token (punctuator ")"))))
+ rest)))
+ (let ((variadic? identifiers (parse-identifier-list identifier-list)))
+ (function-like-macro
+ identifier: identifier
+ variadic?: variadic?
+ identifier-list: identifiers
+ ;; NOTE 6.10.3 states that there needs to be at least on whitespace here
+ ;; cdr drops the end parenthesis of the definition
+ ;; surrounding whitespace is not part of the replacement list (6.10.3 p.7)
+ body: (drop-whitespace-both (cdr replacement-list))))))
(_ (object-like-macro
identifier: identifier
- body: tail))))))))
+ body: (drop-whitespace-both tail)))))))))
@@ -548,8 +752,7 @@
remaining-tokens)))))))
((`(preprocessing-token (identifier ,id)) rest ...)
- (call-with-values (lambda () (maybe-extend-identifier environment id rest))
- loop))
+ (maybe-extend-identifier environment id rest loop))
(('(whitespace "\n") rest ...)
(cons '(whitespace "\n") (loop (bump-line environment) rest)))
@@ -558,11 +761,6 @@
-(define (comment->whitespace expr)
- (match expr
- (('comment _) '(whitespace " "))
- (other other)))
-
(define (read-file path)
(call-with-input-file path (@ (ice-9 rdelim) read-string)))