From 473ee634ea07e988b7074e5b323e02d72e9c9c60 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Hugo=20H=C3=B6rnquist?= Date: Sat, 16 Jul 2022 23:44:52 +0200 Subject: Cleanup in preprocessor2. --- module/c/preprocessor2.scm | 175 +++++++++++++++++++++++---------------- tests/test/cpp/preprocessor2.scm | 45 +++++++--- 2 files changed, 137 insertions(+), 83 deletions(-) diff --git a/module/c/preprocessor2.scm b/module/c/preprocessor2.scm index 1c73187d..c6be3936 100644 --- a/module/c/preprocessor2.scm +++ b/module/c/preprocessor2.scm @@ -9,10 +9,11 @@ :select (function-like-macro variadic? identifier-list)) :use-module ((c cpp-environment object-like-macro) :select (object-like-macro object-like-macro?)) :use-module ((c cpp-environment internal-macro) :select (internal-macro)) - :use-module ((hnh util) :select (-> ->> intersperse aif swap unless unval)) + :use-module ((hnh util) :select (-> ->> intersperse aif swap unless unval break/all)) :use-module ((hnh util lens) :select (set modify cdr*)) :use-module (hnh util path) :use-module (hnh util type) + :use-module (hnh util object) :use-module ((hnh util values) :select (abort* on-fst on-snd apply/values)) :use-module ((c lex2) :select (lex @@ -40,6 +41,9 @@ (define-syntax-rule (alist-of variable key-type value-type) (build-validator-body variable (list-of (pair-of key-type value-type)))) +(define (list-of-length lst n) + (= n (length lst))) + (define parameter-map? (of-type? (alist-of string? (list-of lexeme?)))) (define (concat-token? token) (and (equal? "##" (punctuator-token? token)) @@ -255,29 +259,92 @@ "Macro isn't a macro: ~s" (list macro) #f))))) -;; Takes a list of preprocessing tokens, and returns two values -;; if the last token was '...' -;; and a list of strings of all token names + + +(define-type (parenthesis-group) + (parenthesis-group-tokens + type: (list-of (or lexeme? parenthesis-group?)))) + +(define (make-parenthesis-group tokens) + (parenthesis-group parenthesis-group-tokens: tokens)) + + +(define (flatten-group tokens) + (cond ((null? tokens) '()) + ((lexeme? (car tokens)) + (cons (car tokens) (flatten-group (cdr tokens)))) + ((parenthesis-group? (car tokens)) + (append (lex "(") + (flatten-group (parenthesis-group-tokens (car tokens))) + (lex ")") + (flatten-group (cdr tokens)))))) + + +;; Takes a list of preprocessing tokens, and returns three values +;; - if the last token was '...' +;; - a list of strings of all token names +;; - the remaining tokens ;; Note that this is ONLY #define f(x) forms ;; not usage forms (define (parse-identifier-list tokens) ;; (typecheck tokens (list-of lexeme?)) - (let loop ((tokens (remove whitespace-token? tokens)) (done '())) - (cond ((null? tokens) (values #f (reverse done))) - ((identifier-token? (car tokens)) - => (lambda (id) (loop (cdr tokens) (cons id done)))) - ((ellipsis-token? (car tokens)) - (unless (null? (cdr tokens)) - (scm-error 'cpp-error "parse-identifier-list" - "'...' only allowed as last argument in identifier list. Rest: ~s" - (list (cdr tokens)) #f)) - (values #t (reverse done))) - ((comma-token? (car tokens)) - (loop (cdr tokens) done)) - (else (scm-error 'cpp-error "parse-identifier-list" - "Unexpected preprocessing-token in identifier list: ~s" - (list (car tokens)) #f))))) - + (let* ((group remaining (parse-group (drop-whitespace tokens))) + (groups (reverse (map drop-whitespace-both + (break/all comma-token? (parenthesis-group-tokens group)))))) + ;; Checks that there where no nested parenthesis + (cond ((equal? '(()) groups) + (values #f '() remaining)) + (else + (typecheck groups (list-of (and (list-of-length 1) + (list-of lexeme?)))) + + (let ((variadic? groups (if (ellipsis-token? (caar groups)) + (values #t (cdr groups)) + (values #f groups)))) + (values + variadic? + (map (lambda (x) (or (identifier-token? x) + (scm-error 'cpp-error "parse-identifier-list" + "Unexpected preprocessing-token in identifier list: ~s" + (list x) #f))) + (map car (reverse groups))) + remaining)))))) + + + +(define (newline-count group) + (let loop ((tokens (parenthesis-group-tokens group))) + (fold (lambda (item nls) + (+ nls + (cond ((newline-token? item) 1) + ((parenthesis-group? item) (newline-count item)) + (else 0)))) + 0 tokens))) + +;; tokens ⇒ parenthesis-group, remaining-tokens +(define (parse-group tokens) + (typecheck tokens (not null?)) + (typecheck (car tokens) left-parenthesis-token?) + + (let loop ((stack '()) (remaining tokens)) + (cond ((and (not (null? stack)) + (null? (cdr stack)) + (car stack)) + parenthesis-group? + => (lambda (item) (values item remaining))) + ((null? remaining) + (scm-error 'misc-error "parse-group" + "Ran out of tokens while parsing: ~s (stack: ~s)" + (list (unlex tokens) stack) #f)) + (else + (let ((token remaining (car+cdr remaining))) + (loop (cond ((right-parenthesis-token? token) + (let ((group rest (break left-parenthesis-token? stack))) + (cons (make-parenthesis-group (reverse group)) + ;; Remove left-parenthesis + (cdr rest)))) + (else (cons token stack))) + remaining)))))) ;; returns three values: @@ -287,46 +354,14 @@ ;; The standard might call these "replacement lists" ;; Note that each returned token-list might have padding whitespace which should be trimmed. ;; It's kept to allow __VA_ARGS__ to "remember" its whitespace -(define (parse-parameter-list tokens*) - ;; (typecheck tokens* (list-of lexeme?)) - (let %loop ((depth 0) (newlines 0) (current '()) - (parameters '()) (tokens tokens*) (%first-iteration? #t)) - (define* (loop tokens key: - (depth depth) (newlines newlines) - (current current) (parameters parameters)) - (%loop depth newlines current parameters tokens #f)) - (let ((current* (if (zero? depth) - current - (cons (car tokens) current)))) - (cond ((null? tokens) - (scm-error 'misc-error "parse-parameter-list" - "Ran out of tokens while parsing: ~s" - (list tokens*) #f)) - ((newline-token? (car tokens)) - (loop (cdr tokens) newlines: (1+ newlines) current: current*)) - ((whitespace-token? (car tokens)) - (loop (cdr tokens) current: current*)) - - ((left-parenthesis-token? (car tokens)) - (loop (cdr tokens) depth: (1+ depth) current: current*)) - ((right-parenthesis-token? (car tokens)) - (if (= 1 depth) - ;; return value - (values - (reverse (cons (reverse current) parameters)) - (cdr tokens) - newlines) - (loop (cdr tokens) - depth: (1- depth) - current: current*))) - ((comma-token? (car tokens)) - (if (= 1 depth) - (loop (cdr tokens) - current: '() - parameters: (cons (reverse current) parameters)) - (loop (cdr tokens) current: current*))) - (else - (loop (cdr tokens) current: current*)))))) +(define (parse-parameter-list tokens) + (let ((group remaining (parse-group (drop-whitespace tokens)))) + ;; Checks that no inner groups where here + ;; (typecheck tokens (list-of lexeme?)) + (values (map flatten-group + (break/all comma-token? (parenthesis-group-tokens group))) + remaining + (newline-count group)))) ;; Add __FILE__ and __LINE__ object macros to the environment @@ -581,17 +616,15 @@ (cond ((and (not (null? tail)) (left-parenthesis-token? (car tail))) ;; function like macro - (let ((identifier-list - replacement-list - (break right-parenthesis-token? (cdr tail)))) - (let ((variadic? identifiers (parse-identifier-list identifier-list))) - (function-like-macro - identifier: identifier - variadic?: variadic? - identifier-list: identifiers - ;; cdr drops the end parenthesis of the definition - ;; surrounding whitespace is not part of the replacement list (6.10.3 p.7) - body: (drop-whitespace-both (cdr replacement-list)))))) + (let ((variadic? identifiers replacement-list + (parse-identifier-list tail))) + (function-like-macro + identifier: identifier + variadic?: variadic? + identifier-list: identifiers + ;; surrounding whitespace is not part of the replacement list + ;; (6.10.3 p.7) + body: (drop-whitespace-both replacement-list)))) (else (object-like-macro identifier: identifier body: (drop-whitespace-both tail)))))))) diff --git a/tests/test/cpp/preprocessor2.scm b/tests/test/cpp/preprocessor2.scm index 27332894..1df1a621 100644 --- a/tests/test/cpp/preprocessor2.scm +++ b/tests/test/cpp/preprocessor2.scm @@ -119,35 +119,44 @@ (test-group "Parse identifier list" (test-group "Single argument" - (let ((rest args (parse-identifier-list (lex "x")))) + (let ((rest args remaining (parse-identifier-list (lex "(x)")))) (test-assert (not rest)) - (test-equal '("x") args))) + (test-equal '("x") args) + (test-equal '() remaining))) (test-group "Multiple parameters" - (let ((rest args (parse-identifier-list (lex "x, y")))) + (let ((rest args remaining (parse-identifier-list (lex "(x, y)")))) (test-assert (not rest)) - (test-equal '("x" "y") args))) + (test-equal '("x" "y") args) + (test-equal '() remaining))) + (test-group "Zero parameters" + (let ((rest args remaining (parse-identifier-list (lex "()")))) + (test-assert (not rest)) + (test-equal '() args) + (test-equal '() remaining))) (test-group "Rest args after regular" - (let ((rest args (parse-identifier-list (lex "x, ...")))) + (let ((rest args remaining (parse-identifier-list (lex "(x, ...)")))) (test-assert rest) - (test-equal '("x") args))) + (test-equal '("x") args) + (test-equal '() remaining))) (test-group "Only rest args" - (let ((rest args (parse-identifier-list (lex "...")))) + (let ((rest args remaining (parse-identifier-list (lex "(...)")))) (test-assert rest) - (test-equal '() args))) + (test-equal '() args) + (test-equal '() remaining))) (test-group "Errors" (test-error "Compound forms are invalid" - 'cpp-error (parse-identifier-list (lex "(y)"))) + 'wrong-type-arg (parse-identifier-list (lex "((y))"))) (test-error "Non-identifier atoms are invalid" - 'cpp-error (parse-identifier-list (lex "1"))) + 'cpp-error (parse-identifier-list (lex "(1)"))) (test-error "Rest args not at end is invalid" - 'cpp-error (parse-identifier-list (lex "..., y"))))) + 'cpp-error (parse-identifier-list (lex "(..., y)"))))) @@ -188,6 +197,12 @@ (test-equal '() remaining) (test-equal 0 nls))) + (test-group "Two empty parameters" + (let ((containing remaining nls (parse-parameter-list (lex "(,)")))) + (test-equal (list (lex "") (lex "")) containing) + (test-equal '() remaining) + (test-equal 0 nls))) + (test-group "Numeric parameter" (let ((containing remaining nls (parse-parameter-list (lex "(1)")))) (test-equal (list (lex "1")) containing) @@ -201,7 +216,13 @@ (test-equal (list (lex "x") (lex " (y, z)")) containing) (test-equal '() remaining) - (test-equal 0 nls)))) + (test-equal 0 nls))) + + (test-group "Newline in parameters" + (let ((containing remaining nls (parse-parameter-list (lex "(\n1\n)")))) + (test-equal (list (lex "\n1\n")) containing) + (test-equal '() remaining) + (test-equal 2 nls)))) (test-group "Build parameter map" (test-equal "Simplest case, zero arguments" -- cgit v1.2.3