aboutsummaryrefslogtreecommitdiff
path: root/module/c/preprocessor2.scm
diff options
context:
space:
mode:
Diffstat (limited to 'module/c/preprocessor2.scm')
-rw-r--r--module/c/preprocessor2.scm302
1 files changed, 198 insertions, 104 deletions
diff --git a/module/c/preprocessor2.scm b/module/c/preprocessor2.scm
index 19daabfb..e99b1049 100644
--- a/module/c/preprocessor2.scm
+++ b/module/c/preprocessor2.scm
@@ -16,82 +16,120 @@
:use-module ((c line-fold) :select (fold-lines))
:export ())
+;; Returns two values:
+;; - tokens until a newline token is met
+;; - (potentially the newline token) and the remaining tokens
(define (tokens-until-eol tokens)
(break (lambda (token) (equal? token '(whitespace "\n")))
tokens))
+;; match in predicates so non-lists fail properly
(define (whitespace-token? token)
- (eq? 'whitespace (car token)))
+ (match token
+ (`(whitespace ,_) #t)
+ (_ #f)))
+
+(define (unwrap-preprocessing-token token)
+ (match token
+ (`(preprocessing-token ,x) x)
+ (_ (scm-error 'wrong-type-arg "unwrap-preprocessing-token"
+ "Not a preprocessing token: ~s" (list token)
+ #f))))
(define (preprocessing-token? token)
- (eq? 'preprocessing-token token))
+ (catch 'wrong-type-arg
+ (lambda () (unwrap-preprocessing-token token))
+ (const #f)))
+
+;; Replace all whitespace with single spaces.
(define (squeeze-whitespace tokens)
(match tokens
('() '())
- (`((whitespace ,_) (whitespace ,_) ,rest ...)
+ ((`(whitespace ,_) `(whitespace ,_) rest ...)
(squeeze-whitespace (cons '(whitespace " ") rest)))
- (`((whitespace ,_) ,rest ...)
+ ((`(whitespace ,_) rest ...)
(cons '(whitespace " ") (squeeze-whitespace rest)))
((token rest ...)
(cons token (squeeze-whitespace rest)))))
-(define (stringify-token token)
- ;; TODO propperly implement this
- `(preprocessing-token
- (string-literal ,(with-output-to-string (lambda () (display token))))))
+;; Returns the "source" of the token, as a preprocessing string literal token
+(define (stringify-token unwrapped-preprocessing-token)
+ (match unwrapped-preprocessing-token
+ (`(string-literal ,s)
+ (format #f "~s" s))
+ (`(header-name (q-string ,s))
+ (format #f "~s" s))
+ (`(header-name (h-string ,s))
+ (format #f "<~a>" s))
+ (`(identifier ,id) id)
+ (`(pp-number ,n) n)
+ (`(character-constant ,c)
+ (format #f "'~a'" c))
+ (`(punctuator ,p) p)))
(define (stringify-tokens tokens)
- (with-output-to-string
- (lambda ()
- (for-each (compose display stringify-token)
- (squeeze-whitespace tokens)))))
+ `(preprocessing-token
+ (string-literal
+ ,(string-concatenate
+ (map (match-lambda (`(preprocessing-token ,body) (stringify-token body))
+ (`(whitespace ,_) " "))
+ (squeeze-whitespace tokens))))))
;; Expand ## tokens
;; TODO
(define (expand-join macro tokens)
tokens)
-;; expand function like macro
-(define (apply-macro environment macro parameters)
- (define parameter-map
- (if (macro-variadic? macro)
- (let ((head rest (split-at parameters (length (macro-identifier-list macro)))))
- ;; TODO commas (,) should be interleaved with rest
- (cons (cons "__VA_ARGS__" rest)
- (map cons (macro-identifier-list macro) head)))
- (map cons
- (macro-identifier-list macro)
- parameters)))
-
- ;; resolve strigify operators
- (define stringify-resolved
- (let loop ((tokens (macro-body macro)))
- (match tokens
- (`((preprocessing-token (punctuator "#"))
- (whitespace ,_) ...
- (preprocessing-token (identifier ,x))
- ,rest ...)
- (unless (member x (macro-identifier-list macro))
- (scm-error 'macro-expand-error "apply-macro"
- "'#' is not followed by a macro parameter: ~s"
- (list x) #f)
+;; parameters is a lexeme list, as returned by parse-parameter-list
+(define (build-parameter-map macro parameters)
+ (if (macro-variadic? macro)
+ (let ((head rest (split-at parameters (length (macro-identifier-list macro)))))
+ ;; TODO commas (,) should be interleaved with rest
+ (cons (cons "__VA_ARGS__" rest)
+ (map cons (macro-identifier-list macro) head)))
+ (map cons
+ (macro-identifier-list macro)
+ parameters)))
+
+;; Drop leading whitespace tokens
+(define (drop-whitespace tokens)
+ (drop-while whitespace-token? tokens))
+
+(define (drop-whitespace-right tokens)
+ (-> tokens reverse drop-whitespace reverse))
+
+(define (drop-whitespace-both tokens)
+ (-> tokens
+ drop-whitespace
+ drop-whitespace-right))
+
+(define (expand-stringifiers macro parameter-map)
+ (let loop ((tokens (macro-body macro)))
+ (match tokens
+ (('(preprocessing-token (punctuator "#"))
+ rest ...)
+ (match (drop-whitespace rest)
+ ((`(preprocessing-token (identifier ,x)) rest ...)
+ (unless (member x (macro-identifier-list macro))
+ (scm-error 'macro-expand-error "expand-stringifiers"
+ "'#' is not followed by a macro parameter: ~s"
+ (list x) #f))
(cons (stringify-tokens (assoc-ref parameter-map x))
- (loop rest))))
- ('() '())
- ((token rest ...)
- (cons token (loop rest))))))
-
- ;; TODO
- ;; - resolve ##
- (define resulting-body
- (expand-join macro stringify-resolved))
+ (loop rest)))))
+ ('() '())
+ ((token rest ...)
+ (cons token (loop rest))))))
- ;; - subtitute parameters
- ;; TODO what am I doing here?
- (expand-macro (-> environment
- (extend-environment parameter-map))
- resulting-body))
+;; expand function like macro
+(define (apply-macro environment macro parameters)
+ (define parameter-map (build-parameter-map macro parameters))
+ (define stringify-resolved (expand-stringifiers macro parameter-map))
+ ;; TODO resolve ##
+ (define resulting-body stringify-resolved #; (expand-join macro stringify-resolved)
+ )
+ (resolve-token-stream (extend-environment environment parameter-map)
+ resulting-body))
@@ -103,8 +141,10 @@
;; OTHER
;; ⇒ "VALUE"
+;; token should be the token stream just after the name of the macro
(define (expand-macro environment macro tokens)
(cond ((object-macro? macro)
+ ;; Shouldn't we expand the macro body here?
(values environment (append (macro-body macro) tokens)))
((function-macro? macro)
@@ -118,7 +158,12 @@
(let ((containing remaining newlines (parse-parameter-list tokens)))
(values (bump-line environment newlines)
(append ((macro-body macro) environment containing)
- remaining))))))
+ remaining))))
+
+ (else
+ (scm-error 'wrong-type-arg "expand-macro"
+ "Macro isn't a macro: ~s"
+ (list macro) #f))))
;; Takes a list of preprocessing tokens, and returns two values
;; if the last token was '...'
@@ -130,16 +175,19 @@
(match tokens
('() (values #f (reverse done)))
- ((`(preprocessing-token (punctuation "...")))
- (values #t (reverse done)))
-
((`(preprocessing-token (identifier ,id)) rest ...)
(loop rest (cons id done)))
- ((`(preprocessing-token (punctuation "...")) rest ...)
+ ((`(preprocessing-token (punctuator "...")))
+ (values #t (reverse done)))
+
+ ((`(preprocessing-token (punctuator "...")) rest ...)
(scm-error 'cpp-error "parse-identifier-list"
- "'...' only allowed as last argument in identifier list"
- '() #f))
+ "'...' only allowed as last argument in identifier list. Rest: ~s"
+ (list rest) #f))
+
+ ((`(preprocessing-token (punctuator ",")) rest ...)
+ (loop rest done))
((`(preprocessing-token ,other) rest ...)
(scm-error 'cpp-error "parse-identifier-list"
@@ -147,10 +195,23 @@
(list other) #f)))))
+
+;; helper procedure to parse-parameter-list.
+;; If a parameter is complex then whitespace is kept, but squeezed to single spaces. Surounding whitespace is removed.
+;; Example:
+;; #define str(x, y) #y
+;; str(x, ( 2, 4 ) )
+;; expands to:
+;; "( 2, 4 )"
+;; 6.10.3.2 p 2
+(define (cleanup-whitespace tokens)
+ (-> tokens drop-whitespace-both squeeze-whitespace))
+
;; returns three values:
;; - a list of tokens where each is a parameter to the function like macro
;; - the remaining tokenstream
;; - how many newlines were encountered
+;; The standard might call these "replacement lists"
(define (parse-parameter-list tokens)
(let %loop ((depth 0) (newlines 0) (current '())
(parameters '()) (tokens tokens) (%first-iteration? #t))
@@ -162,26 +223,45 @@
current
(cons (car tokens) current))))
(match tokens
- (`((whitespace "\n") ,rest ...)
+ (('(whitespace "\n") rest ...)
(loop rest newlines: (1+ newlines) current: current*))
- (`((whitespace ,_) ,rest ...)
+ ((`(whitespace ,_) rest ...)
(loop rest current: current*))
- (`((preprocessing-token (punctuator "(")) ,rest ...)
+ (('(preprocessing-token (punctuator "(")) rest ...)
(loop rest depth: (1+ depth) current: current*))
- (`((preprocessing-token (punctuator ")")) ,rest ...)
+ (('(preprocessing-token (punctuator ")")) rest ...)
(if (= 1 depth)
- (values (reverse (cons (reverse current) parameters))
+ ;; return value
+ (values
+ (if (null? parameters)
+ (cond ((null? current) '())
+ ((every whitespace-token? current) '())
+ (else (reverse
+ (cons (cleanup-whitespace (reverse current))
+ parameters))))
+ (reverse
+ (cond ((null? current) parameters)
+ ((every whitespace-token? current) parameters)
+ (else (cons (cleanup-whitespace (reverse current))
+ parameters)))))
+
rest
newlines)
(loop rest
depth: (1- depth)
current: current*)))
- (`((preprocessing-token (punctuator ",")) ,rest ...)
+ (('(preprocessing-token (punctuator ",")) rest ...)
(if (= 1 depth)
(loop rest
current: '()
- parameters: (cons (reverse current) parameters))
- (loop rest current: current*)))))))
+ parameters:
+ (cons (cond ((null? current) '())
+ ((every whitespace-token? current) '())
+ (else (cleanup-whitespace (reverse current))))
+ parameters))
+ (loop rest current: current*)))
+ ((_ rest ...)
+ (loop rest current: current*))))))
(define (join-file-line environment)
@@ -190,8 +270,13 @@
(extend-environment
environment
;; 6.10.8
- `(("__FILE__" . (preprocessing-token (string-literal ,file)))
- ("__LINE__" . (preprocessing-token (pp-number ,(number->string line)))))))
+ (list
+ (object-like-macro
+ identifier: "__FILE__"
+ body: `((preprocessing-token (string-literal ,file))))
+ (object-like-macro
+ identifier: "__LINE__"
+ body: `((preprocessing-token (pp-number ,(number->string line))))))))
(define (c-search-path) (make-parameter (list "." "/usr/include")))
@@ -219,8 +304,8 @@
identifier: "defined"
body: (lambda (environment tokens)
(match tokens
- (`((preprocessor-token (identifier ,id)))
- `(preprocessor-token (pp-number ,(boolean->c-boolean (in-environment? environment id)))))
+ (`((preprocessing-token (identifier ,id)))
+ `(preprocessing-token (pp-number ,(boolean->c-boolean (in-environment? environment id)))))
(_ (scm-error 'cpp-error "defined"
"Invalid parameter list to `defined': ~s"
(list tokens) #f))))))
@@ -252,14 +337,15 @@
'TODO
)
+;; Expands a token-stream which contains no pre-processing directives (#if:s, ...)
(define (resolve-token-stream environment tokens)
(let loop ((tokens tokens))
(match tokens
('() '())
- (`((preprocessing-token (identifier ,id)) ,rest ...)
+ ((`(preprocessing-token (identifier ,id)) rest ...)
(call-with-values (lambda () (maybe-extend-identifier environment id rest))
(lambda (_ tokens) (loop tokens))))
- (`((whitespace ,_) ,rest ...)
+ ((`(whitespace ,_) rest ...)
(loop rest))
((token rest ...)
(cons token (loop rest))))))
@@ -278,13 +364,16 @@
;; environment, string, (list token) → environment, (list token)
(define (maybe-extend-identifier environment identifier remaining-tokens)
(cond ((get-identifier environment identifier)
- => (lambda (value) (expand-macro (join-file-line environment) value remaining-tokens)))
+ => (lambda (value) (expand-macro (join-file-line environment)
+ value
+ remaining-tokens)))
(else ; It wasn't an identifier, leave it as is
+ ;; TODO shouldn't we include the identifier in the remaining tokens stream?
(values environment remaining-tokens))))
(define (resolve-and-include-header environment tokens)
(let loop ((%first-time #t) (tokens tokens))
- (match (drop-while whitespace-token? tokens)
+ (match (drop-whitespace tokens)
((`(header-name (h-string ,str)) rest ...)
(cond ((remove whitespace-token? rest)
(negate null?)
@@ -341,27 +430,28 @@
;; environment, tokens → environment
(define (resolve-define environment tokens)
(match tokens
- (`((preprocessing-token (identifier ,identifier)) tail ...)
+ ((`(preprocessing-token (identifier ,identifier)) tail ...)
(-> environment
bump-line
(add-identifier!
identifier
- (if (equal? '(preprocessing-token (punctuator "(")) (car tail))
- ;; function like macro
- (call-with-values (lambda () (break (lambda (token) (equal? token '(preprocessing-token (punctuator ")"))))
- (cdr tail)))
- (lambda (identifier-list replacement-list)
- (let ((variadic? identifiers (parse-identifier-list identifier-list)))
- (function-like-macro
- identifier: identifier
- variadic?: variadic?
- identifier-list: identifiers
- ;; NOTE 6.10.3 states that there needs to be at least on whitespace here
- body: (cdr replacement-list)))))
-
- (object-like-macro
- identifier: identifier
- body: tail)))))))
+ (match tail
+ (('(preprocessing-token (punctuator "(")) rest ...)
+ ;; function like macro
+ (call-with-values (lambda () (break (lambda (token) (equal? token '(preprocessing-token (punctuator ")"))))
+ rest))
+ (lambda (identifier-list replacement-list)
+ (let ((variadic? identifiers (parse-identifier-list identifier-list)))
+
+ (function-like-macro
+ identifier: identifier
+ variadic?: variadic?
+ identifier-list: identifiers
+ ;; NOTE 6.10.3 states that there needs to be at least on whitespace here
+ body: (cdr replacement-list))))))
+ (_ (object-like-macro
+ identifier: identifier
+ body: tail))))))))
@@ -376,14 +466,18 @@
args)
#f))
+ ;; TODO all of this needs to be surounded with a conditional for
+ ;; environmentns if status. However, ensure that each directive
+ ;; starts at start of line
+
(match tokens
('() '())
- (`((whitespace "\n") (whitespace ,_) ... (preprocessing-token (puntuator "#")) ,rest ...)
+ ((`(whitespace "\n") `(whitespace ,_) '... '(preprocessing-token (puntuator "#")) rest ...)
;; Line tokens are those in this line,
;; while remaining tokens are the newline, follewed by the rest of the files tokens
(let ((line-tokens remaining-tokens (tokens-until-eol rest)))
;; Actual tokens just removes all whitespace between "#" and "define"
- (let ((actual-tokens (drop-while whitespace-token? line-tokens)))
+ (let ((actual-tokens (drop-whitespace line-tokens)))
(if (null? actual-tokens)
(loop (bump-line environment) remaining-tokens)
(match (car actual-tokens)
@@ -393,7 +487,7 @@
(`(preprocessing-token (identifier "ifdef"))
(match actual-tokens
- (`((preprocessing-token (identifier ,id)) ,_ ...)
+ ((`(preprocessing-token (identifier ,id)) _ ...)
(loop
((if (in-environment? environment id)
enter-active-if enter-inactive-if)
@@ -403,7 +497,7 @@
(`(preprocessing-token (identifier "ifndef"))
(match actual-tokens
- (`((preprocessing-token (identifier ,id)) ,_ ...)
+ ((`(preprocessing-token (identifier ,id)) _ ...)
(loop
((if (in-environment? environment id)
enter-inactive-if enter-active-if)
@@ -411,49 +505,49 @@
remaining-tokens))
(_ (err "Non identifier in ifndef: ~s" actual-tokens))))
- (`(preprocessing-token (identifier "else"))
+ ('(preprocessing-token (identifier "else"))
;; TODO
'TODO
)
- (`(preprocessing-token (identifier "elif"))
+ ('(preprocessing-token (identifier "elif"))
(-> environment leave-if
(resolve-for-if actual-tokens)
(loop remaining-tokens)))
- (`(preprocessing-token (identifier "endif"))
+ ('(preprocessing-token (identifier "endif"))
(loop (leave-if environment) remaining-tokens))
- (`(preprocessing-token (identifier "include"))
+ ('(preprocessing-token (identifier "include"))
(call-with-values
(lambda () (resolve-and-include-header environment (cdr actual-tokens)))
(lambda (environment tokens)
(loop environment (append tokens remaining-tokens)))))
- (`(preprocessing-token (identifier "define"))
+ ('(preprocessing-token (identifier "define"))
(let ((env (resolve-define environment (cdr actual-tokens))))
(loop env remaining-tokens))
)
- (`(preprocessing-token (identifier "undef"))
+ ('(preprocessing-token (identifier "undef"))
(loop (match actual-tokens
(`((preprocessing-token (identifier ,id)))
(-> environment bump-line (remove-identifier! id))))
remaining-tokens))
- (`(preprocessing-token (identifier "line"))
+ ('(preprocessing-token (identifier "line"))
(loop (handle-line-directive environment actual-tokens)
remaining-tokens))
- (`(preprocessing-token (identifier "error"))
+ ('(preprocessing-token (identifier "error"))
;; NOTE this is an "expected" error
(throw 'cpp-error actual-tokens))
- (`(preprocessing-token (identifier "pragma"))
+ ('(preprocessing-token (identifier "pragma"))
(loop (handle-pragma environment actual-tokens)
remaining-tokens)))))))
- (`((preprocessing-token (identifier ,id)) ,rest ...)
+ ((`(preprocessing-token (identifier ,id)) rest ...)
(call-with-values (lambda () (maybe-extend-identifier environment id rest))
loop))