aboutsummaryrefslogtreecommitdiff
path: root/module
diff options
context:
space:
mode:
authorHugo Hörnquist <hugo@lysator.liu.se>2022-07-10 23:36:56 +0200
committerHugo Hörnquist <hugo@lysator.liu.se>2022-07-10 23:36:56 +0200
commitf7b18cc72dd5b2ca90b6670dbe81c3ef3204d6d9 (patch)
treedcc40399f08285a9a308079098e735fb5bf192bd /module
parentAdd of-type? to (hnh util type). (diff)
downloadcalp-f7b18cc72dd5b2ca90b6670dbe81c3ef3204d6d9.tar.gz
calp-f7b18cc72dd5b2ca90b6670dbe81c3ef3204d6d9.tar.xz
Resolve recursive macros.
Diffstat (limited to 'module')
-rw-r--r--module/c/compiler.scm12
-rw-r--r--module/c/cpp-environment.scm20
-rw-r--r--module/c/cpp-environment/function-like-macro.scm15
-rw-r--r--module/c/cpp-environment/object-like-macro.scm13
-rw-r--r--module/c/cpp-types.scm28
-rw-r--r--module/c/cpp-util.scm62
-rw-r--r--module/c/lex2.scm28
-rw-r--r--module/c/preprocessor2.scm537
-rw-r--r--module/c/unlex.scm37
9 files changed, 388 insertions, 364 deletions
diff --git a/module/c/compiler.scm b/module/c/compiler.scm
index 801c3752..09d49578 100644
--- a/module/c/compiler.scm
+++ b/module/c/compiler.scm
@@ -25,23 +25,21 @@
;; 6.10.8
(object-like-macro
identifier: "__STDC__"
- body: '(preprocessing-token (pp-number "1")))
+ body: (lex "1"))
(object-like-macro
identifier: "__STDC_HOSTED__"
- body: '(preprocessing-token (pp-number "1")))
+ body: (lex "1"))
(object-like-macro
identifier: "__STDC_VERSION__"
- body: '(preprocessing-token (pp-number "201112L")))
+ body: (lex "201112L"))
(object-like-macro
identifier: "__DATE__"
;; TODO format should always be in
;; english, and not tranlated
- body: `(preprocessing-token (string-literal ,(strftime "%b %_d %Y" now))))
+ body: (lex (strftime "\"%b %_d %Y\"" now)))
(object-like-macro
identifier: "__TIME__"
- body: `(preprocessing-token
- (string-literal
- ,(strftime "%H:%M:%S" now))))))
+ body: (lex (strftime "\"%H:%M:%S\"" now)))))
(define environment
(-> (make-environment)
diff --git a/module/c/cpp-environment.scm b/module/c/cpp-environment.scm
index fa69e1fc..2ad60b56 100644
--- a/module/c/cpp-environment.scm
+++ b/module/c/cpp-environment.scm
@@ -53,15 +53,17 @@
(identifier x))
-(define (macro-body macro)
- (define body-proc
- (cond ((obj:object-like-macro? macro) obj:body)
- ((fun:function-like-macro? macro) fun:body)
- ((int:internal-macro? macro) int:body)
- (else (scm-error 'wrong-type-arg "macro-body"
- "Not a macro: ~s"
- (list macro) #f))))
- (body-proc macro))
+(define (macro-body-proc macro)
+ (cond ((obj:object-like-macro? macro) obj:body)
+ ((fun:function-like-macro? macro) fun:body)
+ ((int:internal-macro? macro) int:body)
+ (else (scm-error 'wrong-type-arg "macro-body"
+ "Not a macro: ~s"
+ (list macro) #f))))
+
+(define macro-body
+ (case-lambda ((macro) ((macro-body-proc macro) macro))
+ ((macro value) ((macro-body-proc macro) macro value))))
(define macro-identifier-list fun:identifier-list)
(define macro-variadic? fun:variadic?)
diff --git a/module/c/cpp-environment/function-like-macro.scm b/module/c/cpp-environment/function-like-macro.scm
index 26512439..a4b58487 100644
--- a/module/c/cpp-environment/function-like-macro.scm
+++ b/module/c/cpp-environment/function-like-macro.scm
@@ -1,6 +1,8 @@
(define-module (c cpp-environment function-like-macro)
:use-module (hnh util object)
:use-module (hnh util type)
+ :use-module ((c lex2) :select (lexeme?))
+ :use-module ((c unlex) :select (unlex))
:export (function-like-macro
function-like-macro?
identifier
@@ -8,11 +10,16 @@
body
variadic?))
-(define-type (function-like-macro)
+(define-type (function-like-macro
+ printer: (lambda (r p)
+ (format p "#<#define ~a(~a) ~a>"
+ (identifier r)
+ (append (identifier-list r)
+ (if (variadic? r)
+ '("...") '()))
+ (unlex (body r)))))
(identifier type: string?)
(identifier-list type: (list-of string?))
- ;; TODO import these
- (body type: list? ; (list-of (or whitespace-token? preprocessing-token?))
- )
+ (body type: (list-of lexeme?))
(variadic? type: boolean?
default: #f))
diff --git a/module/c/cpp-environment/object-like-macro.scm b/module/c/cpp-environment/object-like-macro.scm
index 5d4c8810..90a3ad23 100644
--- a/module/c/cpp-environment/object-like-macro.scm
+++ b/module/c/cpp-environment/object-like-macro.scm
@@ -1,13 +1,18 @@
(define-module (c cpp-environment object-like-macro)
:use-module (hnh util object)
+ :use-module (hnh util type)
+ :use-module ((c lex2) :select (lexeme?))
+ :use-module ((c unlex) :select (unlex))
:export (object-like-macro
object-like-macro?
identifier
body))
-(define-type (object-like-macro)
+(define-type (object-like-macro
+ printer: (lambda (r p)
+ (format p "#<#define ~a ~a>"
+ (identifier r)
+ (unlex (body r)))))
(identifier type: string?)
- ;; TODO import these
- (body type: list? ; (list-of (or whitespace-token? preprocessing-token?))
- ))
+ (body type: (list-of lexeme?)))
diff --git a/module/c/cpp-types.scm b/module/c/cpp-types.scm
new file mode 100644
index 00000000..64bf6a7b
--- /dev/null
+++ b/module/c/cpp-types.scm
@@ -0,0 +1,28 @@
+(define-module (c cpp-types)
+ :use-module (c lex2)
+ :use-module (ice-9 match)
+ :use-module (c cpp-util)
+ :export (whitespace-token?
+ comment-token?
+ preprocessing-token?
+ newline-token?
+ identifier-token?))
+
+(define (whitespace-token? x)
+ (eq? 'whitespace (lexeme-type x)))
+
+(define (comment-token? x)
+ (eq? 'comment (lexeme-type x)))
+
+(define (preprocessing-token? x)
+ (eq? 'preprocessing-token (lexeme-type x)))
+
+(define (newline-token? x)
+ (and (whitespace-token? x)
+ (string=? "\n" (lexeme-body x))))
+
+(define (identifier-token? token)
+ (and (preprocessing-token? token)
+ (match (lexeme-body token)
+ (`(identifier ,id) id)
+ (_ #f))))
diff --git a/module/c/cpp-util.scm b/module/c/cpp-util.scm
new file mode 100644
index 00000000..420c8739
--- /dev/null
+++ b/module/c/cpp-util.scm
@@ -0,0 +1,62 @@
+(define-module (c cpp-util)
+ :use-module ((srfi srfi-1) :select (drop-while break))
+ :use-module ((hnh util) :select (->))
+ :use-module (hnh util type)
+ :use-module ((c lex2) :select (lex lexeme?))
+ :use-module (c cpp-types)
+ :export (tokens-until-eol
+ squeeze-whitespace
+ drop-whitespace
+ drop-whitespace-right
+ drop-whitespace-both
+ cleanup-whitespace))
+
+;; Returns two values:
+;; - tokens until a newline token is met
+;; - (potentially the newline token) and the remaining tokens
+(define (tokens-until-eol tokens)
+ (typecheck tokens (list-of lexeme?))
+ (break newline-token? tokens))
+
+
+;; Replace all whitespace with single spaces.
+(define (squeeze-whitespace tokens)
+ (cond ((null? tokens) '())
+ ((null? (cdr tokens))
+ (list
+ (if (whitespace-token? (car tokens))
+ (car (lex " "))
+ (car tokens))))
+ ((and (whitespace-token? (car tokens))
+ (whitespace-token? (cadr tokens)))
+ (squeeze-whitespace (cons (car (lex " "))
+ (cddr tokens))))
+ (else (cons (car tokens)
+ (squeeze-whitespace (cdr tokens))))))
+
+;; Drop leading whitespace tokens
+(define (drop-whitespace tokens)
+ (typecheck tokens (list-of lexeme?))
+ (drop-while whitespace-token? tokens))
+
+(define (drop-whitespace-right tokens)
+ (typecheck tokens (list-of lexeme?))
+ (-> tokens reverse drop-whitespace reverse))
+
+(define (drop-whitespace-both tokens)
+ (typecheck tokens (list-of lexeme?))
+ (-> tokens
+ drop-whitespace
+ drop-whitespace-right))
+
+;; helper procedure to parse-parameter-list.
+;; If a parameter is complex then whitespace is kept, but squeezed to single spaces. Surounding whitespace is removed.
+;; Example:
+;; #define str(x, y) #y
+;; str(x, ( 2, 4 ) )
+;; expands to:
+;; "( 2, 4 )"
+;; 6.10.3.2 p 2
+(define (cleanup-whitespace tokens)
+ (typecheck tokens (list-of lexeme?))
+ (-> tokens drop-whitespace-both squeeze-whitespace))
diff --git a/module/c/lex2.scm b/module/c/lex2.scm
index 6083190f..e1784541 100644
--- a/module/c/lex2.scm
+++ b/module/c/lex2.scm
@@ -1,6 +1,14 @@
(define-module (c lex2)
:use-module (ice-9 peg)
- :export (lex))
+ :use-module (ice-9 match)
+ :use-module (hnh util object)
+ :use-module (hnh util type)
+ :use-module (srfi srfi-88)
+ :export (lex
+ lexeme lexeme?
+ (type . lexeme-type)
+ (body . lexeme-body)
+ (noexpand . lexeme-noexpand)))
;;; A.1 Lexical grammar
;;; A.1.1 Lexical elements
@@ -321,6 +329,22 @@
preprocessing-token)))
+(define-type (lexeme)
+ (type type: (memv '(whitespace comment preprocessing-token)))
+ (body type: (or string? list?))
+ (noexpand type: (list-of string?)
+ default: '()))
+
+(define (lex-output->lexeme-object x)
+ (match x
+ (`(whitespace ,body)
+ (lexeme body: body type: 'whitespace ))
+ (`(comment ,body)
+ (lexeme body: body type: 'comment ))
+ (`(preprocessing-token ,body)
+ (lexeme body: body type: 'preprocessing-token))))
+
;; returns a list of lexemes
(define (lex string)
- (cdr (peg:tree (match-pattern preprocessing-tokens string))))
+ (map lex-output->lexeme-object
+ (cdr (peg:tree (match-pattern preprocessing-tokens string)))))
diff --git a/module/c/preprocessor2.scm b/module/c/preprocessor2.scm
index 0bb101f8..2d2a9530 100644
--- a/module/c/preprocessor2.scm
+++ b/module/c/preprocessor2.scm
@@ -9,139 +9,40 @@
:select (function-like-macro variadic? identifier-list))
:use-module ((c cpp-environment object-like-macro) :select (object-like-macro))
:use-module ((c cpp-environment internal-macro) :select (internal-macro))
- :use-module ((hnh util) :select (-> intersperse))
- :use-module ((hnh util lens) :select (set))
+ :use-module ((hnh util) :select (-> intersperse aif swap))
+ :use-module ((hnh util lens) :select (set modify))
:use-module (hnh util path)
:use-module (hnh util type)
- :use-module ((c lex2) :select (lex))
+ :use-module ((c lex2) :select (lex #|lexeme|# lexeme? lexeme-body lexeme-type lexeme-noexpand))
:use-module ((c trigraph) :select (replace-trigraphs))
:use-module ((c line-fold) :select (fold-lines))
+ :use-module (c unlex)
+ :use-module (c cpp-types)
+ :use-module (c cpp-util)
:export ())
-;;; Call graph
-;; squeeze-whitespace
-;; stringify-tokens
-;; expand-join
-;; build-parameter-map
+(define-syntax-rule (alist-of variable key-type value-type)
+ (build-validator-body variable (list-of (pair-of key-type value-type))))
-;; apply-macro
-;; - build-parameter-map
-;; - stringify-tokens
-;; - expand-join
-
-;; expand-macro
-;; - parse-parameter-list
-;; - apply-macro
-
-;; parse-parameter-list
-
-;; resolve-token-stream
-;; - maybe-extend-identifier
-
-;; maybe-extend-identifier
-;; - expand-macro
-
-;; resolve-define
-;; - parse-identifier-list
-
-;; expand-stringifiers
-;; - stringify-tokens
-
-;;;
-
-(define-syntax-rule (parameter-map? x)
- (typecheck x (list-of (pair-of string? (list-of token?)))))
-
-;; Returns two values:
-;; - tokens until a newline token is met
-;; - (potentially the newline token) and the remaining tokens
-(define (tokens-until-eol tokens)
- (typecheck tokens (list-of token?))
- (break (lambda (token) (equal? token '(whitespace "\n")))
- tokens))
-
-;; match in predicates so non-lists fail properly
-(define (whitespace-token? token)
- (match token
- (`(whitespace ,_) #t)
- (_ #f)))
-
-(define (identifier-token? token)
- (match token
- (`(preprocessing-token (identifier ,id)) id)
- (_ #f)))
-
-(define (unwrap-preprocessing-token token)
- (match token
- (`(preprocessing-token ,x) x)
- (_ (scm-error 'wrong-type-arg "unwrap-preprocessing-token"
- "Not a preprocessing token: ~s" (list token)
- #f))))
-
-(define (preprocessing-token? token)
- (catch 'wrong-type-arg
- (lambda () (unwrap-preprocessing-token token))
- (const #f)))
-
-(define (token? x)
- (or (preprocessing-token? x)
- (whitespace-token? x)))
-
-
-;; Replace all whitespace with single spaces.
-(define (squeeze-whitespace tokens)
- (match tokens
- ('() '())
- ((`(whitespace ,_) `(whitespace ,_) rest ...)
- (squeeze-whitespace (cons '(whitespace " ") rest)))
- ((`(whitespace ,_) rest ...)
- (cons '(whitespace " ") (squeeze-whitespace rest)))
- ((token rest ...)
- (cons token (squeeze-whitespace rest)))))
-
-;; Returns the "source" of the token, as a preprocessing string literal token
-(define (stringify-token unwrapped-preprocessing-token)
- (match unwrapped-preprocessing-token
- (`(string-literal ,s)
- (format #f "~s" s))
- (`(header-name (q-string ,s))
- (format #f "~s" s))
- (`(header-name (h-string ,s))
- (format #f "<~a>" s))
- (`(identifier ,id) id)
- (`(pp-number ,n) n)
- (`(character-constant ,c)
- (format #f "'~a'" c))
- (`(punctuator ,p) p)))
-
-;; takes a token list, and return a single string literal token
-(define (stringify-tokens tokens)
- `(preprocessing-token (string-literal ,(unlex tokens))))
-
-;; takes a list of preprocessing-token's, and return a "source" string
-(define (unlex tokens)
- (typecheck tokens (list-of token?))
- (string-concatenate
- (map (match-lambda (`(preprocessing-token ,body) (stringify-token body))
- (`(whitespace ,_) " "))
- (squeeze-whitespace tokens))))
+(define parameter-map? (of-type? (alist-of string? (list-of lexeme?))))
;; Expand ## tokens
;; TODO
-(define (expand-join macro tokens)
+;; Tokens is the body of the macro
+(define (expand## macro tokens)
(typecheck macro macro?)
- (typecheck tokens (list-of token?))
+ (typecheck tokens (list-of lexeme?))
tokens)
;; parameters is a lexeme list, as returned by parse-parameter-list
(define (build-parameter-map macro parameters)
(typecheck macro macro?)
- (typecheck parameters (list-of (list-of token?)))
+ (typecheck parameters (list-of (list-of lexeme?)))
(if (macro-variadic? macro)
(let ((head rest (split-at parameters (length (macro-identifier-list macro)))))
(cons (cons "__VA_ARGS__" (concatenate (intersperse
- '((preprocessing-token (punctuator ",")))
+ (lex ",")
rest)))
(map cons (macro-identifier-list macro) head)))
(map cons
@@ -160,68 +61,57 @@
body: body)))
param-map))
-;; Drop leading whitespace tokens
-(define (drop-whitespace tokens)
- (typecheck tokens (list-of token?))
- (drop-while whitespace-token? tokens))
-
-(define (drop-whitespace-right tokens)
- (typecheck tokens (list-of token?))
- (-> tokens reverse drop-whitespace reverse))
-
-(define (drop-whitespace-both tokens)
- (typecheck tokens (list-of token?))
- (-> tokens
- drop-whitespace
- drop-whitespace-right))
-(define (expand-stringifiers macro parameter-map)
+(define (expand# macro parameter-map)
(typecheck macro macro?)
(typecheck parameter-map parameter-map?)
(let loop ((tokens (macro-body macro)))
- (match tokens
- (('(preprocessing-token (punctuator "#"))
- rest ...)
- (match (drop-whitespace rest)
- ((`(preprocessing-token (identifier ,x)) rest ...)
- (unless (member x (macro-identifier-list macro))
- (scm-error 'macro-expand-error "expand-stringifiers"
- "'#' is not followed by a macro parameter: ~s"
- (list x) #f))
- (cons (stringify-tokens (assoc-ref parameter-map x))
- (loop rest)))))
- ('() '())
- ((token rest ...)
- (cons token (loop rest))))))
+ (cond ((null? tokens) '())
+ ((equal? '(punctuator "#")
+ (lexeme-body (car tokens)))
+ (let ((trimmed (drop-whitespace (cdr tokens))))
+ (let ((x (identifier-token? (car trimmed)))
+ (rest (cdr trimmed)))
+ (unless (member x (macro-identifier-list macro))
+ (scm-error 'macro-expand-error "expand#"
+ "'#' is not followed by a macro parameter: ~s"
+ (list x) #f))
+ (cons (stringify-tokens (assoc-ref parameter-map x))
+ (loop rest)))))
+ (else (cons (car tokens) (loop (cdr tokens)))))))
;; expand function like macro
+;; parameter is a list of lexeme-lists, each "top level" element matching one
+;; argument to the macro
(define (apply-macro environment macro parameters)
(typecheck environment cpp-environment?)
;; Each element should be the lexeme list for that argument
- (typecheck parameters (list-of (list-of token?)))
+ (typecheck parameters (list-of (list-of lexeme?)))
(typecheck macro macro?)
(when (or (and (variadic? macro)
- (> (length (identifier-list macro))
+ (> (length (macro-identifier-list macro))
(length parameters)))
(and (not (variadic? macro))
- (not (= (length (identifier-list macro))
+ (not (= (length (macro-identifier-list macro))
(length parameters)))))
(scm-error 'cpp-arity-error "apply-macro"
;; TODO better error message for variadic macros
"Wrong number of arguments to macro ~s, expected ~s, got ~s"
(list (macro-identifier macro)
- (length (identifier-list macro))
+ (length (macro-identifier-list macro))
(length parameters))
(list macro)))
(let ()
(define parameter-map (build-parameter-map macro parameters))
- (define stringify-resolved (expand-stringifiers macro parameter-map))
+ (define stringify-resolved (expand# macro parameter-map))
;; TODO resolve ##
- (define resulting-body stringify-resolved #; (expand-join macro stringify-resolved))
+ (define resulting-body stringify-resolved #; (expand## macro stringify-resolved))
+
(define (bound-identifier? id)
- (member id (if (variadic? macro)
- (cons "__VA_ARGS__" (identifier-list macro))
- (identifier-list macro))))
+ (and (string? id)
+ (or (and (variadic? macro) (string=? id "__VA_ARGS__"))
+ (member id (macro-identifier-list macro)))))
+
(let loop ((tokens resulting-body))
(cond ((null? tokens) '())
;; TODO the parameters should be macro-expanded before being inserted
@@ -229,12 +119,7 @@
bound-identifier?
=> (lambda (id) (append (assoc-ref parameter-map id)
(loop (cdr tokens)))))
- (else (cons (car tokens)
- (loop (cdr tokens))))))
- #;
- (let ((env (extend-environment environment
- (parameter-map->macro-list parameter-map))))
- (resolve-token-stream env resulting-body))))
+ (else (cons (car tokens) (loop (cdr tokens))))))))
@@ -246,33 +131,40 @@
;; OTHER
;; ⇒ "VALUE"
-;; token should be the token stream just after the name of the macro
-(define (expand-macro environment macro remaining-tokens)
+;; remaining-tokens should be the token stream just after the name of the macro
+(define (expand-macro environment macro noexpand-list remaining-tokens)
(typecheck environment cpp-environment?)
- (typecheck remaining-tokens (list-of token?))
+ (typecheck macro macro?)
+ (typecheck remaining-tokens (list-of lexeme?))
+ (typecheck noexpand-list (list-of string?))
+
(let ((name (macro-identifier macro)))
- (cond ((object-macro? macro)
- (values environment (append (mark-noexpand (macro-body macro) name)
- remaining-tokens)))
-
- ((function-macro? macro)
- (let ((containing remaining newlines (parse-parameter-list remaining-tokens)))
- (values (bump-line environment newlines)
- (append (mark-noexpand (apply-macro environment macro containing)
- name)
- remaining))))
-
- ((internal-macro? macro)
- (let ((containing remaining newlines (parse-parameter-list remaining-tokens)))
- (values (bump-line environment newlines)
- (append (mark-noexpand ((macro-body macro) environment containing)
- name)
- remaining))))
-
- (else
- (scm-error 'wrong-type-arg "expand-macro"
- "Macro isn't a macro: ~s"
- (list macro) #f)))))
+ (cond ((object-macro? macro)
+ (values environment (append (fold (swap mark-noexpand)
+ (macro-body macro)
+ (cons name noexpand-list))
+ remaining-tokens)))
+
+ ((function-macro? macro)
+ (let ((containing remaining newlines (parse-parameter-list remaining-tokens)))
+ (values (bump-line environment newlines)
+ (append (fold (swap mark-noexpand)
+ (apply-macro environment macro containing)
+ (cons name noexpand-list))
+ remaining))))
+
+ ((internal-macro? macro)
+ (let ((containing remaining newlines (parse-parameter-list remaining-tokens)))
+ (values (bump-line environment newlines)
+ (append (fold (swap mark-noexpand)
+ ((macro-body macro) environment containing)
+ (cons name noexpand-list))
+ remaining))))
+
+ (else
+ (scm-error 'wrong-type-arg "expand-macro"
+ "Macro isn't a macro: ~s"
+ (list macro) #f)))))
;; Takes a list of preprocessing tokens, and returns two values
;; if the last token was '...'
@@ -280,43 +172,25 @@
;; Note that this is ONLY #define f(x) forms
;; not usage forms
(define (parse-identifier-list tokens)
- (typecheck tokens (list-of token?))
+ (typecheck tokens (list-of lexeme?))
(let loop ((tokens (remove whitespace-token? tokens)) (done '()))
- (match tokens
- ('() (values #f (reverse done)))
-
- ((`(preprocessing-token (identifier ,id)) rest ...)
- (loop rest (cons id done)))
+ (cond ((null? tokens) (values #f (reverse done)))
+ ((identifier-token? (car tokens))
+ => (lambda (id) (loop (cdr tokens) (cons id done))))
+ ((equal? '(punctuator "...") (lexeme-body (car tokens)))
+ (unless (null? (cdr tokens))
+ (scm-error 'cpp-error "parse-identifier-list"
+ "'...' only allowed as last argument in identifier list. Rest: ~s"
+ (list (cdr tokens)) #f))
+ (values #t (reverse done)))
+ ((equal? '(punctuator ",") (lexeme-body (car tokens)))
+ (loop (cdr tokens) done))
+ (else (scm-error 'cpp-error "parse-identifier-list"
+ "Unexpected preprocessing-token in identifier list: ~s"
+ (list (car tokens)) #f)))))
- ((`(preprocessing-token (punctuator "...")))
- (values #t (reverse done)))
- ((`(preprocessing-token (punctuator "...")) rest ...)
- (scm-error 'cpp-error "parse-identifier-list"
- "'...' only allowed as last argument in identifier list. Rest: ~s"
- (list rest) #f))
- ((`(preprocessing-token (punctuator ",")) rest ...)
- (loop rest done))
-
- ((`(preprocessing-token ,other) rest ...)
- (scm-error 'cpp-error "parse-identifier-list"
- "Unexpected preprocessing-token in identifier list: ~s"
- (list other) #f)))))
-
-
-
-;; helper procedure to parse-parameter-list.
-;; If a parameter is complex then whitespace is kept, but squeezed to single spaces. Surounding whitespace is removed.
-;; Example:
-;; #define str(x, y) #y
-;; str(x, ( 2, 4 ) )
-;; expands to:
-;; "( 2, 4 )"
-;; 6.10.3.2 p 2
-(define (cleanup-whitespace tokens)
- (typecheck tokens (list-of token?))
- (-> tokens drop-whitespace-both squeeze-whitespace))
;; returns three values:
;; - a list of tokens where each is a parameter to the function like macro
@@ -324,7 +198,7 @@
;; - how many newlines were encountered
;; The standard might call these "replacement lists"
(define (parse-parameter-list tokens*)
- (typecheck tokens* (list-of token?))
+ (typecheck tokens* (list-of lexeme?))
(let %loop ((depth 0) (newlines 0) (current '())
(parameters '()) (tokens tokens*) (%first-iteration? #t))
(define* (loop tokens key:
@@ -334,49 +208,50 @@
(let ((current* (if (zero? depth)
current
(cons (car tokens) current))))
- (match tokens
- (() (scm-error 'misc-error "parse-parameter-list"
- "Ran out of tokens while parsing: ~s"
- (list tokens*) #f))
- (('(whitespace "\n") rest ...)
- (loop rest newlines: (1+ newlines) current: current*))
- ((`(whitespace ,_) rest ...)
- (loop rest current: current*))
- (('(preprocessing-token (punctuator "(")) rest ...)
- (loop rest depth: (1+ depth) current: current*))
- (('(preprocessing-token (punctuator ")")) rest ...)
- (if (= 1 depth)
- ;; return value
- (values
- (if (null? parameters)
- (cond ((null? current) '())
- ((every whitespace-token? current) '())
- (else (reverse
- (cons (cleanup-whitespace (reverse current))
- parameters))))
- (reverse
- (cond ((null? current) parameters)
- ((every whitespace-token? current) parameters)
- (else (cons (cleanup-whitespace (reverse current))
- parameters)))))
-
- rest
- newlines)
- (loop rest
- depth: (1- depth)
- current: current*)))
- (('(preprocessing-token (punctuator ",")) rest ...)
- (if (= 1 depth)
- (loop rest
- current: '()
- parameters:
- (cons (cond ((null? current) '())
- ((every whitespace-token? current) '())
- (else (cleanup-whitespace (reverse current))))
- parameters))
- (loop rest current: current*)))
- ((_ rest ...)
- (loop rest current: current*))))))
+ (cond ((null? tokens)
+ (scm-error 'misc-error "parse-parameter-list"
+ "Ran out of tokens while parsing: ~s"
+ (list tokens*) #f))
+ ((newline-token? (car tokens))
+ (loop (cdr tokens) newlines: (1+ newlines) current: current*))
+ ((whitespace-token? (car tokens))
+ (loop (cdr tokens) current: current*))
+
+ ((equal? '(punctuator "(") (lexeme-body (car tokens)))
+ (loop (cdr tokens) depth: (1+ depth) current: current*))
+ ((equal? '(punctuator ")") (lexeme-body (car tokens)))
+ (if (= 1 depth)
+ ;; return value
+ (values
+ (if (null? parameters)
+ (cond ((null? current) '())
+ ((every whitespace-token? current) '())
+ (else (reverse
+ (cons (cleanup-whitespace (reverse current))
+ parameters))))
+ (reverse
+ (cond ((null? current) parameters)
+ ((every whitespace-token? current) parameters)
+ (else (cons (cleanup-whitespace (reverse current))
+ parameters)))))
+
+ (cdr tokens)
+ newlines)
+ (loop (cdr tokens)
+ depth: (1- depth)
+ current: current*)))
+ ((equal? '(punctuator ",") (lexeme-body (car tokens)))
+ (if (= 1 depth)
+ (loop (cdr tokens)
+ current: '()
+ parameters:
+ (cons (cond ((null? current) '())
+ ((every whitespace-token? current) '())
+ (else (cleanup-whitespace (reverse current))))
+ parameters))
+ (loop (cdr tokens) current: current*)))
+ (else
+ (loop (cdr tokens) current: current*))))))
;; Add __FILE__ and __LINE__ object macros to the environment
@@ -389,10 +264,10 @@
(list
(object-like-macro
identifier: "__FILE__"
- body: `((preprocessing-token (string-literal ,file))))
+ body: (lex (format #f "~s" file)))
(object-like-macro
identifier: "__LINE__"
- body: `((preprocessing-token (pp-number ,(number->string line))))))))
+ body: (lex (number->string line))))))
(define (c-search-path) (make-parameter (list "." "/usr/include")))
@@ -420,14 +295,15 @@
(define defined-macro
(internal-macro
identifier: "defined"
- body: (lambda (environment tokens)
- (match tokens
- (`(((preprocessing-token (identifier ,id))))
- (let ((in-env (boolean->c-boolean (in-environment? environment id))))
- (lex (number->string in-env))))
- (_ (scm-error 'cpp-error "defined"
+ body: (lambda (environment arguments)
+ (typecheck arguments (and (list-of (list-of lexeme?))
+ (not null?)))
+ (aif (identifier-token? (car (list-ref arguments 0)))
+ (let ((in-env (boolean->c-boolean (in-environment? environment it))))
+ (lex (number->string in-env)))
+ (scm-error 'cpp-error "defined"
"Invalid parameter list to `defined': ~s"
- (list tokens) #f))))))
+ (list tokens) #f)))))
;; environment, tokens → environment
(define (handle-pragma environment tokens)
@@ -453,7 +329,7 @@
;; TODO
(define (resolve-constant-expression tokens)
- (typecheck tokens (list-of token?))
+ (typecheck tokens (list-of lexeme?))
'TODO
)
@@ -468,7 +344,10 @@
((function-macro? x)
(format p "#define ~a(~a) ~a"
(macro-identifier x)
- (string-join (macro-identifier-list x) "," 'infix)
+ (string-join (append (macro-identifier-list x)
+ (if (variadic? x)
+ '("...") '()))
+ "," 'infix)
(unlex (macro-body x))))))
(define* (pprint-environment environment optional: (port (current-error-port)))
@@ -478,38 +357,26 @@
(newline port))
(cpp-variables environment)))
-(define noexpand (make-object-property))
+(define (mark-noexpand1 token name)
+ (modify token lexeme-noexpand xcons name))
(define (mark-noexpand tokens name)
- (typecheck tokens (list-of token?))
- (typecheck name string?)
- (let ((tokens tokens))
- (for-each (lambda (token) (set! (noexpand token) (cons name (noexpand token)))) tokens)
- tokens))
-
-(define (list-like->list x)
- (if (not (pair? x))
- (list x)
- (cons (car x) (list-like->list (cdr x)))))
+ ;; (typecheck tokens (list-of lexeme?))
+ ;; (typecheck name string?)
+ (map (lambda (token) (mark-noexpand1 token name)) tokens))
(define (marked-noexpand? token)
(cond ((identifier-token? token)
- => (lambda (id) (member id (list-like->list (noexpand token)))))
+ => (lambda (id) (member id (lexeme-noexpand token))))
(else #f)))
;; Expands a token-stream which contains no pre-processing directives (#if:s, ...)
(define (resolve-token-stream environment tokens)
(typecheck environment cpp-environment?)
- (typecheck tokens (list-of token?))
+ (typecheck tokens (list-of lexeme?))
;; (pprint-environment environment)
;; (format (current-error-port) "~a~%~%" (unlex tokens))
(let loop ((environment environment) (tokens tokens))
- (unless (null? tokens)
- (format (current-error-port) "~s [~a] [~a]~%"
- (car tokens)
- (noexpand (car tokens))
- (marked-noexpand? (car tokens))))
- (format (current-error-port) "~a~%" (unlex tokens))
(cond ((null? tokens) '())
((car tokens)
(lambda (x) (and (identifier-token? x)
@@ -518,28 +385,18 @@
(call-with-values
(lambda () (maybe-extend-identifier environment
(identifier-token? token)
+ (lexeme-noexpand token)
(cdr tokens)))
loop)))
(else (cons (car tokens)
- (loop environment (cdr tokens))))
- )
- #;
- (match tokens
- ('() '())
- ((`(preprocessing-token (identifier ,id)) rest ...)
- (call-with-values (lambda () (maybe-extend-identifier environment id rest))
- loop))
- ;; ((`(whitespace ,_) rest ...)
- ;; (loop environment rest))
- ((token rest ...)
- (cons token (loop environment rest))))))
+ (loop environment (cdr tokens)))))))
;; returns a new environment
;; handle body of #if
;; environment, (list token) → environment
(define (resolve-for-if environment tokens)
(typecheck environment cpp-environment?)
- (typecheck tokens (list-of token?))
+ (typecheck tokens (list-of lexeme?))
(-> (extend-environment environment defined-macro)
(resolve-token-stream tokens)
@@ -549,25 +406,27 @@
(enter-inactive-if environment))))
;; environment, string, (list token) → environment, (list token)
-(define (maybe-extend-identifier environment identifier remaining-tokens)
+(define (maybe-extend-identifier environment identifier noexpand-list remaining-tokens)
(typecheck environment cpp-environment?)
(typecheck identifier string?)
- (typecheck remaining-tokens (list-of token?))
- ;; (typecheck continuation procedure?) ; TODO arity?
+ (typecheck remaining-tokens (list-of lexeme?))
+ (typecheck noexpand-list (list-of string?))
(cond ((get-identifier environment identifier)
=> (lambda (value) (expand-macro (join-file-line environment)
value
+ noexpand-list
remaining-tokens)))
- (else ; It wasn't an identifier, leave it as is
+ (else ; It wasn't an identifier, leave it as is
(values environment
- (append (mark-noexpand `((preprocessing-token (identifier ,identifier)))
+ (append (mark-noexpand (lex identifier)
identifier)
remaining-tokens)))))
(define (resolve-and-include-header environment tokens)
(typecheck environment cpp-environment?)
- (typecheck tokens (list-of token?))
+ (typecheck tokens (list-of lexeme?))
+ ;; TODO rewrite without match
(let loop ((%first-time #t) (tokens tokens))
(match (drop-whitespace tokens)
((`(header-name (h-string ,str)) rest ...)
@@ -603,8 +462,9 @@
;; environment, tokens → environment
(define (handle-line-directive environment tokens*)
(typecheck environment cpp-environment?)
- (typecheck tokens* (list-of token?))
+ (typecheck tokens* (list-of lexeme?))
+ ;; TODO rewrite without match
(let loop ((%first-time #t) (tokens tokens*))
(match tokens
(`((preprocessing-token (pp-number ,line)) (whitespace ,_) ... rest ...)
@@ -629,33 +489,34 @@
;; environment, tokens → environment
(define (resolve-define environment tokens)
(typecheck environment cpp-environment?)
- (typecheck tokens (list-of token?))
+ (typecheck tokens (list-of lexeme?))
+
+ (let ((identifier (identifier-token? (car tokens)))
+ (tail (cdr tokens)))
+ (-> environment
+ bump-line
+ (add-identifier!
+ identifier
+ (cond ((and (not (null? tail))
+ (equal? '(punctuator "(") (lexeme-body (car tail))))
+ ;; function like macro
+ (let ((identifier-list
+ replacement-list
+ (break (lambda (token) (equal? '(punctuator ")") (lexeme-body token) ))
+ (cdr tail))))
+ (let ((variadic? identifiers (parse-identifier-list identifier-list)))
+ (function-like-macro
+ identifier: identifier
+ variadic?: variadic?
+ identifier-list: identifiers
+ ;; NOTE 6.10.3 states that there needs to be at least on whitespace here
+ ;; cdr drops the end parenthesis of the definition
+ ;; surrounding whitespace is not part of the replacement list (6.10.3 p.7)
+ body: (drop-whitespace-both (cdr replacement-list))))))
+ (else (object-like-macro
+ identifier: identifier
+ body: (drop-whitespace-both tail))))))))
- (match tokens
- ((`(preprocessing-token (identifier ,identifier)) tail ...)
- (-> environment
- bump-line
- (add-identifier!
- identifier
- (match tail
- (('(preprocessing-token (punctuator "(")) rest ...)
- ;; function like macro
- (let ((identifier-list
- replacement-list
- (break (lambda (token) (equal? token '(preprocessing-token (punctuator ")"))))
- rest)))
- (let ((variadic? identifiers (parse-identifier-list identifier-list)))
- (function-like-macro
- identifier: identifier
- variadic?: variadic?
- identifier-list: identifiers
- ;; NOTE 6.10.3 states that there needs to be at least on whitespace here
- ;; cdr drops the end parenthesis of the definition
- ;; surrounding whitespace is not part of the replacement list (6.10.3 p.7)
- body: (drop-whitespace-both (cdr replacement-list))))))
- (_ (object-like-macro
- identifier: identifier
- body: (drop-whitespace-both tail)))))))))
@@ -765,9 +626,9 @@
(call-with-input-file path (@ (ice-9 rdelim) read-string)))
(define (comment->whitespace token)
- (match token
- (`(comment ,_) '(whitespace " "))
- (other other)))
+ (if (comment-token? token)
+ (car (lex " "))
+ token))
(define (comments->whitespace tokens)
(map comment->whitespace tokens))
diff --git a/module/c/unlex.scm b/module/c/unlex.scm
new file mode 100644
index 00000000..9f4b25b9
--- /dev/null
+++ b/module/c/unlex.scm
@@ -0,0 +1,37 @@
+(define-module (c unlex)
+ :use-module (hnh util type)
+ :use-module (ice-9 match)
+ :use-module (c lex2)
+ :use-module (c cpp-types)
+ :use-module (c cpp-util)
+ :export (unlex
+ stringify-token
+ stringify-tokens))
+
+;; takes a list of preprocessing-token's, and return a "source" string
+(define (unlex tokens)
+ (typecheck tokens (list-of lexeme?))
+ (string-concatenate
+ (map (lambda (x)
+ (cond ((preprocessing-token? x) (stringify-token x))
+ ((whitespace-token? x) " ")))
+ (squeeze-whitespace tokens))))
+
+;; Returns the "source" of the token, as a preprocessing string literal token
+(define (stringify-token preprocessing-token)
+ (match (lexeme-body preprocessing-token)
+ (`(string-literal ,s)
+ (format #f "~s" s))
+ (`(header-name (q-string ,s))
+ (format #f "~s" s))
+ (`(header-name (h-string ,s))
+ (format #f "<~a>" s))
+ (`(identifier ,id) id)
+ (`(pp-number ,n) n)
+ (`(character-constant ,c)
+ (format #f "'~a'" c))
+ (`(punctuator ,p) p)))
+
+;; takes a token list, and return a single string literal token
+(define (stringify-tokens tokens)
+ (lexeme type: 'preprocessing-token body: `(string-literal ,(unlex tokens))))