From ccc7848110b06479cf8a38ee843d4d3adc01a27c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Hugo=20H=C3=B6rnquist?= Date: Tue, 12 Jul 2022 02:37:49 +0200 Subject: work --- module/c/cpp-environment/function-like-macro.scm | 2 +- module/c/cpp-types.scm | 14 +- module/c/lex2.scm | 19 ++- module/c/preprocessor2.scm | 163 ++++++++++++++--------- module/c/unlex.scm | 12 +- 5 files changed, 138 insertions(+), 72 deletions(-) (limited to 'module') diff --git a/module/c/cpp-environment/function-like-macro.scm b/module/c/cpp-environment/function-like-macro.scm index a4b58487..59b47c9c 100644 --- a/module/c/cpp-environment/function-like-macro.scm +++ b/module/c/cpp-environment/function-like-macro.scm @@ -12,7 +12,7 @@ (define-type (function-like-macro printer: (lambda (r p) - (format p "#<#define ~a(~a) ~a>" + (format p "#<#define ~a~a ~a>" (identifier r) (append (identifier-list r) (if (variadic? r) diff --git a/module/c/cpp-types.scm b/module/c/cpp-types.scm index e5e73d32..555120d6 100644 --- a/module/c/cpp-types.scm +++ b/module/c/cpp-types.scm @@ -11,19 +11,25 @@ punctuator-token? number-token? string-token? + h-string-token? + q-string-token? )) (define (whitespace-token? x) - (eq? 'whitespace (lexeme-type x))) + (and (lexeme? x) + (eq? 'whitespace (lexeme-type x)))) (define (comment-token? x) - (eq? 'comment (lexeme-type x))) + (and (lexeme? x) + (eq? 'comment (lexeme-type x)))) (define (preprocessing-token? x) - (eq? 'preprocessing-token (lexeme-type x))) + (and (lexeme? x) + (eq? 'preprocessing-token (lexeme-type x)))) (define (placemaker-token? x) - (eq? 'placemaker (lexeme-type x))) + (and (lexeme? x) + (eq? 'placemaker (lexeme-type x)))) (define (newline-token? x) (and (whitespace-token? x) diff --git a/module/c/lex2.scm b/module/c/lex2.scm index c00a029c..50cf56e3 100644 --- a/module/c/lex2.scm +++ b/module/c/lex2.scm @@ -285,12 +285,12 @@ ;; (6.4.7) (define-peg-pattern h-char body - (or (and (not-followed-by (or ">" "\n")) peg-any) + (or (and (not-followed-by (or ">" "\n")) peg-any) escape-sequence)) ;; (6.4.7) (define-peg-pattern q-char body - (or (and (not-followed-by (or "\"" "\n")) peg-any) + (or (and (not-followed-by (or "\"" "\n")) peg-any) escape-sequence)) ;;; A.1.9 Preprocessing numbers @@ -347,9 +347,18 @@ (`(comment ,body) (lexeme body: body type: 'comment )) (`(preprocessing-token ,body) - (lexeme body: body type: 'preprocessing-token)))) + (case body + ;; "unflatten" + ((string-literal) + (lexeme body: '(string-literal "") type: 'preprocessing-token)) + (else + (lexeme body: body type: 'preprocessing-token)))) + ;; "unflatten" + ('comment (lexeme body: "" type: 'comment)))) ;; returns a list of lexemes (define (lex string) - (map lex-output->lexeme-object - (cdr (peg:tree (match-pattern preprocessing-tokens string))))) + (if (string-null? string) + '() + (map lex-output->lexeme-object + (cdr (peg:tree (match-pattern preprocessing-tokens string)))))) diff --git a/module/c/preprocessor2.scm b/module/c/preprocessor2.scm index 7e6de2e1..44931b68 100644 --- a/module/c/preprocessor2.scm +++ b/module/c/preprocessor2.scm @@ -7,7 +7,7 @@ :use-module (c eval2) :use-module ((c cpp-environment function-like-macro) :select (function-like-macro variadic? identifier-list)) - :use-module ((c cpp-environment object-like-macro) :select (object-like-macro)) + :use-module ((c cpp-environment object-like-macro) :select (object-like-macro object-like-macro?)) :use-module ((c cpp-environment internal-macro) :select (internal-macro)) :use-module ((hnh util) :select (-> intersperse aif swap unless)) :use-module ((hnh util lens) :select (set modify cdr*)) @@ -26,6 +26,13 @@ (define parameter-map? (of-type? (alist-of string? (list-of lexeme?)))) +(define (concat-token? token) (equal? "##" (punctuator-token? token))) +(define (stringify-token? token) (equal? "#" (punctuator-token? token))) +(define (left-parenthesis-token? token) (equal? "(" (punctuator-token? token))) +(define (right-parenthesis-token? token) (equal? ")" (punctuator-token? token))) +(define (comma-token? token) (equal? "," (punctuator-token? token))) +(define (ellipsis-token? token) (equal? "..." (punctuator-token? token))) + ;; parameters is a lexeme list, as returned by parse-parameter-list (define (build-parameter-map macro parameters) (typecheck macro macro?) @@ -46,7 +53,7 @@ (typecheck parameter-map parameter-map?) (let loop ((tokens (macro-body macro))) (cond ((null? tokens) '()) - ((equal? "#" (punctuator-token? (car tokens))) + ((stringify-token? (car tokens)) (let* ((head rest (car+cdr (drop-whitespace (cdr tokens)))) (x (identifier-token? head))) (cond ((assoc-ref parameter-map x) @@ -69,7 +76,7 @@ (right tokens)) (cond ((null? right) (reverse left)) - ((equal? "##" (punctuator-token? (car right))) + ((concat-token? (car right)) (let ((l (drop-whitespace left)) (r (drop-whitespace (cdr right)))) (cond ((or (null? l) (null? r)) @@ -86,11 +93,32 @@ (else (loop (cdr l) (cons (concatenate-tokens (car l) (car r)) (cdr r))))))) (else - (let ((pre post (break (lambda (token) (equal? "##" (punctuator-token? token))) - right))) + (let ((pre post (break concat-token? right))) (loop (append left (reverse pre)) post)))))) +(define (check-arity macro parameters) + (if (variadic? macro) + (unless (>= (length parameters) + (length (macro-identifier-list macro))) + (scm-error 'cpp-arity-error "apply-macro" + "Too few arguments to variadic macro ~s, expected at least ~s, got ~s" + (list (macro-identifier macro) + (length (macro-identifier-list macro)) + (length parameters)) + (list macro))) + (unless (or (and (= 0 (length (macro-identifier-list macro))) + (= 1 (length parameters)) + (null? (car parameters))) + (= (length (macro-identifier-list macro)) + (length parameters))) + (scm-error 'cpp-arity-error "apply-macro" + "Wrong number of arguments to macro ~s, expected ~s, got ~s" + (list (macro-identifier macro) + (length (macro-identifier-list macro)) + (length parameters)) + (list macro))))) + ;; expand function like macro ;; parameter is a list of lexeme-lists, each "top level" element matching one ;; argument to the macro @@ -99,24 +127,8 @@ ;; Each element should be the lexeme list for that argument (typecheck parameters (list-of (list-of lexeme?))) (typecheck macro macro?) - (when (and (variadic? macro) - (> (length (macro-identifier-list macro)) - (length parameters))) - (scm-error 'cpp-arity-error "apply-macro" - "Too few arguments to variadic macro ~s, expected at least ~s, got ~s" - (list (macro-identifier macro) - (length (macro-identifier-list macro)) - (length parameters)) - (list macro))) - (when (and (not (variadic? macro)) - (not (= (length (macro-identifier-list macro)) - (length parameters)))) - (scm-error 'cpp-arity-error "apply-macro" - "Wrong number of arguments to macro ~s, expected ~s, got ~s" - (list (macro-identifier macro) - (length (macro-identifier-list macro)) - (length parameters)) - (list macro))) + (check-arity macro parameters) + (let () (define (resolve-cpp-variables tokens parameter-map) @@ -124,16 +136,24 @@ (assoc-ref parameter-map id)) ;; expand parameters, and place placemaker tokens - (let loop ((tokens tokens)) + (let loop ((tokens tokens) (last #f)) (cond ((null? tokens) '()) ((identifier-token? (car tokens)) bound-identifier? - => (lambda (id) (let ((replacement (assoc-ref parameter-map id))) - (if (null? replacement) - (cons (placemaker) (loop (cdr tokens))) - ;; TODO macroexpand replacement here? - (append replacement (loop (cdr tokens))))))) - (else (cons (car tokens) (loop (cdr tokens))))))) + => (lambda (id) + (let ((replacement (assoc-ref parameter-map id))) + (if (null? replacement) + (cons (placemaker) (loop (cdr tokens) #f)) + ;; macroexpand replacement here! But only if the token isn't used with ## (or #) + (append + (if (or (concat-token? last) + (next-token-matches? concat-token? tokens)) + replacement + (resolve-token-stream environment replacement once?: #t)) + (loop (cdr tokens) #f)))))) + ((whitespace-token? (car tokens)) + (cons (car tokens) (loop (cdr tokens) last))) + (else (cons (car tokens) (loop (cdr tokens) (car tokens))))))) (define parameter-map (build-parameter-map macro parameters)) @@ -162,26 +182,41 @@ (let ((name (macro-identifier macro))) (cond ((object-macro? macro) + ;; #define f(a) f(x * (a)) + ;; #define w 0,1 + ;; #define m(a) a(w) + ;; m(f) + ;; ⇒ f(0,1) + ;; instead of expected + ;; f(2 * (0,1)) (values environment (append (fold (swap mark-noexpand) (expand## (macro-body macro)) (cons name noexpand-list)) remaining-tokens))) ((function-macro? macro) - (let ((containing remaining newlines (parse-parameter-list remaining-tokens))) - (values (bump-line environment newlines) - (append (fold (swap mark-noexpand) - (apply-macro environment macro containing) - (cons name noexpand-list)) - remaining)))) + (if (next-token-matches? left-parenthesis-token? remaining-tokens) + (let ((containing remaining newlines (parse-parameter-list remaining-tokens))) + (values (bump-line environment newlines) + (append (fold (swap mark-noexpand) + (apply-macro environment macro containing) + (cons name noexpand-list)) + remaining))) + (values environment + (append (mark-noexpand (lex (macro-identifier macro)) (macro-identifier macro)) + remaining-tokens)))) ((internal-macro? macro) - (let ((containing remaining newlines (parse-parameter-list remaining-tokens))) - (values (bump-line environment newlines) - (append (fold (swap mark-noexpand) - ((macro-body macro) environment containing) - (cons name noexpand-list)) - remaining)))) + (if (next-token-matches? left-parenthesis-token? remaining-tokens) + (let ((containing remaining newlines (parse-parameter-list remaining-tokens))) + (values (bump-line environment newlines) + (append (fold (swap mark-noexpand) + ((macro-body macro) environment containing) + (cons name noexpand-list)) + remaining))) + (values environment + (append (mark-noexpand (lex (macro-identifier macro)) (macro-identifier macro)) + remaining-tokens)))) (else (scm-error 'wrong-type-arg "expand-macro" @@ -199,13 +234,13 @@ (cond ((null? tokens) (values #f (reverse done))) ((identifier-token? (car tokens)) => (lambda (id) (loop (cdr tokens) (cons id done)))) - ((equal? '(punctuator "...") (lexeme-body (car tokens))) + ((ellipsis-token? (car tokens)) (unless (null? (cdr tokens)) (scm-error 'cpp-error "parse-identifier-list" "'...' only allowed as last argument in identifier list. Rest: ~s" (list (cdr tokens)) #f)) (values #t (reverse done))) - ((equal? '(punctuator ",") (lexeme-body (car tokens))) + ((comma-token? (car tokens)) (loop (cdr tokens) done)) (else (scm-error 'cpp-error "parse-identifier-list" "Unexpected preprocessing-token in identifier list: ~s" @@ -213,6 +248,12 @@ +(define (next-token-matches? predicate tokens) + (let ((tokens (drop-whitespace tokens))) + (if (null? tokens) + #f + (predicate (car tokens))))) + ;; returns three values: ;; - a list of tokens where each is a parameter to the function like macro @@ -241,21 +282,19 @@ ((whitespace-token? (car tokens)) (loop (cdr tokens) current: current*)) - ((equal? '(punctuator "(") (lexeme-body (car tokens))) + ((left-parenthesis-token? (car tokens)) (loop (cdr tokens) depth: (1+ depth) current: current*)) - ((equal? '(punctuator ")") (lexeme-body (car tokens))) + ((right-parenthesis-token? (car tokens)) (if (= 1 depth) ;; return value (values - (reverse (if (null? current) - parameters - (cons (reverse current) parameters))) + (reverse (cons (reverse current) parameters)) (cdr tokens) newlines) (loop (cdr tokens) depth: (1- depth) current: current*))) - ((equal? '(punctuator ",") (lexeme-body (car tokens))) + ((comma-token? (car tokens)) (if (= 1 depth) (loop (cdr tokens) current: '() @@ -376,7 +415,8 @@ (else #f))) ;; Expands a token-stream which contains no pre-processing directives (#if:s, ...) -(define (resolve-token-stream environment tokens) +;; If @var{once?} is true then the resulting body won't be scanned again for tokens to expand +(define* (resolve-token-stream environment tokens key: once?) (typecheck environment cpp-environment?) (typecheck tokens (list-of lexeme?)) ;; (pprint-environment environment) @@ -392,7 +432,8 @@ (identifier-token? token) (lexeme-noexpand token) (cdr tokens))) - loop))) + ;; Here is the after expansion + (if once? (lambda (_ t) t) loop)))) (else (cons (car tokens) (loop environment (cdr tokens))))))) @@ -440,14 +481,14 @@ (cond ((null? tokens) '()) ((h-string-token? (car tokens)) => (lambda (str) - (unless (null? (remove-whitespace (cdr tokens))) + (unless (null? (drop-whitespace (cdr tokens))) (err "Unexpected tokens after #include <>")) (handle-preprocessing-tokens environment (-> str resolve-h-file read-file tokenize)))) ((q-string-token? (car tokens)) => (lambda (str) - (unless (null? (remove-whitespace (cdr tokens))) + (unless (null? (drop-whitespace (cdr tokens))) (err "Unexpected tokens after #include \"\"")) (handle-preprocessing-tokens environment @@ -493,12 +534,11 @@ (add-identifier identifier (cond ((and (not (null? tail)) - (equal? '(punctuator "(") (lexeme-body (car tail)))) + (left-parenthesis-token? (car tail))) ;; function like macro (let ((identifier-list replacement-list - (break (lambda (token) (equal? '(punctuator ")") (lexeme-body token))) - (cdr tail)))) + (break right-parenthesis-token? (cdr tail)))) (let ((variadic? identifiers (parse-identifier-list identifier-list))) (function-like-macro identifier: identifier @@ -535,7 +575,7 @@ (let ((environment (bump-line environment)) (tokens* (drop-whitespace (cdr tokens)))) (cond ((null? tokens*) (values environment '())) - ((equal? '(punctuator "#") (lexeme-body (car tokens*))) + ((equal? "#" (punctuator-token? (car tokens*))) (let ((line-tokens remaining-tokens (tokens-until-eol (cdr tokens*)))) ;; drop whitespace after to not "eat" the next newline token (let ((line-tokens (drop-whitespace line-tokens))) @@ -564,7 +604,7 @@ (lambda (environment tokens) (loop environment (append tokens remaining-tokens)))) - (let ((operation + (let ((operation ; (environment, list token) → environment (case directive ((if) resolve-for-if) ((ifdef) @@ -591,7 +631,10 @@ (values env* (append (unless (in-comment-block? environment) (resolve-token-stream environment line-tokens)) - tokens*)))))))) + ;; The initial newline is presreved here, for better output, + ;; and to keep at least one whitespace token when there was one previously. + ;; possibly also keep a newline for line-directives. + (append (unless (null? remaining-tokens) (lex "\n")) tokens*))))))))) (else (err "Unexpected middle of line"))))) diff --git a/module/c/unlex.scm b/module/c/unlex.scm index 9f4b25b9..18e800d9 100644 --- a/module/c/unlex.scm +++ b/module/c/unlex.scm @@ -8,8 +8,15 @@ stringify-token stringify-tokens)) -;; takes a list of preprocessing-token's, and return a "source" string (define (unlex tokens) + (typecheck tokens (list-of lexeme?)) + (string-concatenate + (map (lambda (x) (cond (x preprocessing-token? => stringify-token) + ((whitespace-token? x) (lexeme-body x)))) + tokens))) + +;; takes a list of preprocessing-token's, and return a "source" string +(define (unlex-aggressive tokens) (typecheck tokens (list-of lexeme?)) (string-concatenate (map (lambda (x) @@ -34,4 +41,5 @@ ;; takes a token list, and return a single string literal token (define (stringify-tokens tokens) - (lexeme type: 'preprocessing-token body: `(string-literal ,(unlex tokens)))) + (lexeme type: 'preprocessing-token + body: `(string-literal ,(unlex-aggressive tokens)))) -- cgit v1.2.3