From 39a878d1130bd33d7d1b1380617747187e01fb73 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Hugo=20H=C3=B6rnquist?= Date: Sat, 23 Jul 2022 21:42:19 +0200 Subject: Only parse h-strings where applicable. --- module/c/cpp-types.scm | 2 +- module/c/lex2.scm | 111 +++++++++++++++++++++++---------------- module/c/preprocessor2.scm | 2 +- tests/test/cpp/lex2.scm | 23 +++++++- tests/test/cpp/preprocessor2.scm | 2 +- 5 files changed, 90 insertions(+), 50 deletions(-) diff --git a/module/c/cpp-types.scm b/module/c/cpp-types.scm index bf51d009..a6299996 100644 --- a/module/c/cpp-types.scm +++ b/module/c/cpp-types.scm @@ -80,7 +80,7 @@ (define (h-string-token? token) (and (preprocessing-token? token) (match (lexeme-body token) - (`(header-name (h-string ,x)) x) + (`(h-string ,x) x) (_ #f)))) ;; NOTE q-string tokens are never produced by the lexer, diff --git a/module/c/lex2.scm b/module/c/lex2.scm index af90dcce..6c13f0f9 100644 --- a/module/c/lex2.scm +++ b/module/c/lex2.scm @@ -4,7 +4,7 @@ :use-module ((hnh util) :select (->)) :use-module (hnh util object) :use-module (hnh util type) - :use-module ((srfi srfi-1) :select (fold)) + :use-module ((srfi srfi-1) :select (fold append-map!)) :use-module (srfi srfi-88) :use-module ((c trigraph) :select (replace-trigraphs)) :use-module ((c line-fold) :select (fold-lines)) @@ -37,7 +37,8 @@ ;; string literal moved before header-name since string literals ;; otherwise became q-strings (or string-literal - header-name + ;; header-name + h-string-compound character-constant identifier pp-number @@ -288,25 +289,40 @@ ;;; A.1.8 Header names +(define-peg-pattern h-string-leadup none + (and "#" (* inline-whitespace) + "include" (* inline-whitespace) + "<")) + +;; Weird case which handles that '<' and '>' change semantics when after #include +;; 6.4.7 p. 4 +;; NOTE this is techincally incorrect, since this form is only valid at start of line. +;; Consider the case +;; #define s(x) #x +;; s(#include ) +;; That can't however easily be added since PEG has no start-of-line operator +(define-peg-pattern h-string-compound all + (and h-string-leadup h-string (ignore ">"))) + (define-peg-pattern h-string all (+ h-char)) -(define-peg-pattern q-string all (+ q-char)) +;; (define-peg-pattern q-string all (+ q-char)) -;; (6.4.7) -(define-peg-pattern header-name all - (or (and (ignore "<") h-string (ignore ">")) - ;; NOTE this case will never be reached, since it's treated as a regular - ;; string instead - (and (ignore "\"") q-string (ignore "\"")))) +;; ;; (6.4.7) +;; (define-peg-pattern header-name all +;; (or (and (ignore "<") h-string (ignore ">")) +;; ;; NOTE this case will never be reached, since it's treated as a regular +;; ;; string instead +;; (and (ignore "\"") q-string (ignore "\"")))) ;; (6.4.7) (define-peg-pattern h-char body (or (and (not-followed-by (or ">" "\n")) peg-any) escape-sequence)) -;; (6.4.7) -(define-peg-pattern q-char body - (or (and (not-followed-by (or "\"" "\n")) peg-any) - escape-sequence)) +;; ;; (6.4.7) +;; (define-peg-pattern q-char body +;; (or (and (not-followed-by (or "\"" "\n")) peg-any) +;; escape-sequence)) ;;; A.1.9 Preprocessing numbers @@ -321,10 +337,11 @@ +(define-peg-pattern inline-whitespace body + (or "\t" "\v" "\f" " ")) + (define-peg-pattern whitespace all - (or "\t" "\n" "\v" "\f" " " - ;; "\r" - )) + (or "\n" inline-whitespace)) (define-peg-pattern block-comment body (and (ignore "/*") @@ -366,34 +383,38 @@ (define (placemaker) (lexeme type: 'placemaker body: '())) -(define (lex-output->lexeme-object x) +(define (lex-output->lexeme-objects x) (match x (`(non-whitespace ,body) - (lexeme body: body type: 'other)) + (list (lexeme body: body type: 'other))) (`(whitespace ,body) - (lexeme body: body type: 'whitespace )) + (list (lexeme body: body type: 'whitespace))) (`(comment ,body) - (lexeme body: body type: 'comment )) + (list (lexeme body: body type: 'comment))) (`(preprocessing-token ,body) (match body ('string-literal ;; Unflatten case - (lexeme body: '(string-literal (encoding-prefix) "") - type: 'preprocessing-token)) + (list (lexeme body: '(string-literal (encoding-prefix) "") + type: 'preprocessing-token))) (('string-literal `(encoding-prefix ,px) args ...) - (lexeme body: `(string-literal (encoding-prefix . ,px) ,@args) - type: 'preprocessing-token)) + (list (lexeme body: `(string-literal (encoding-prefix . ,px) ,@args) + type: 'preprocessing-token))) (('string-literal args ...) - (lexeme body: `(string-literal (encoding-prefix) ,@args) - type: 'preprocessing-token)) + (list (lexeme body: `(string-literal (encoding-prefix) ,@args) + type: 'preprocessing-token))) (('character-constant `(character-prefix ,px) args ...) - (lexeme body: `(character-constant (character-prefix . ,px) - ,@args) - type: 'preprocessing-token)) + (list (lexeme body: `(character-constant (character-prefix . ,px) + ,@args) + type: 'preprocessing-token))) (('character-constant args ...) - (lexeme body: `(character-constant (character-prefix) ,@args) - type: 'preprocessing-token)) - (body (lexeme body: body type: 'preprocessing-token)))) + (list (lexeme body: `(character-constant (character-prefix) ,@args) + type: 'preprocessing-token))) + (`(h-string-compound ,body) + (list (lexeme type: 'preprocessing-token body: '(punctuator "#")) + (lexeme type: 'preprocessing-token body: '(identifier "include")) + (lexeme type: 'preprocessing-token body: body))) + (body (list (lexeme body: body type: 'preprocessing-token))))) ;; "unflatten" ('comment (lexeme body: "" type: 'comment)))) @@ -409,19 +430,19 @@ (define (lex string) (if (string-null? string) '() - (map lex-output->lexeme-object - (let ((result (match-pattern preprocessing-tokens string))) - (let ((trailing (substring (peg:string result) - (peg:end result)))) - (unless (string-null? trailing) - (scm-error 'cpp-lex-error "lex" - "Failed to lex string, remaining trailing characters: ~s" - (list trailing) #f))) - (unless (list? (peg:tree result)) - (scm-error 'cpp-lex-error "lex" - "Parsing just failed. Chars: ~s" - (list (peg:string result)) #f)) - (cdr (peg:tree result)))))) + (append-map! lex-output->lexeme-objects + (let ((result (match-pattern preprocessing-tokens string))) + (let ((trailing (substring (peg:string result) + (peg:end result)))) + (unless (string-null? trailing) + (scm-error 'cpp-lex-error "lex" + "Failed to lex string, remaining trailing characters: ~s" + (list trailing) #f))) + (unless (list? (peg:tree result)) + (scm-error 'cpp-lex-error "lex" + "Parsing just failed. Chars: ~s" + (list (peg:string result)) #f)) + (cdr (peg:tree result)))))) diff --git a/module/c/preprocessor2.scm b/module/c/preprocessor2.scm index f4451e84..e4590d41 100644 --- a/module/c/preprocessor2.scm +++ b/module/c/preprocessor2.scm @@ -535,7 +535,7 @@ (map (lambda (path-prefix) (path-append path-prefix string)) (c-search-path))) (scm-error 'cpp-error "resolve-h-file" - "Can't resolve file: ~s" + "Can't find file: <~a>" (list string) #f))))) ;; #include "myheader.h" diff --git a/tests/test/cpp/lex2.scm b/tests/test/cpp/lex2.scm index 54bc0aff..e30aac31 100644 --- a/tests/test/cpp/lex2.scm +++ b/tests/test/cpp/lex2.scm @@ -9,6 +9,9 @@ (define (ls . xs) (map l xs)) +;; See comment on h-string-compound in (c lex2) +(test-expect-fail "H-string looking as argument to macro") + (test-equal "Integer literal" (ls '(pp-number "10")) (lex "10")) @@ -108,17 +111,33 @@ (test-equal "Propper H-string" (list (l '(punctuator "#")) (l '(identifier "include")) - (lexeme type: 'whitespace body: " ") + ;; (lexeme type: 'whitespace body: " ") (l '(h-string "a"))) (lex "#include ")) - (test-equal "Not a H string" (ls '(punctuator "<") '(identifier "a") '(punctuator ">")) (lex "")) + ;; Assume that s is defined as + ;; #define s(x) #x + ;; then the following expression would expand to + ;; "#include" + (test-equal "H-string looking as argument to macro" + (ls '(identifier "s") + '(punctuator "(") + '(punctuator "#") + '(identifier "include") + '(punctuator "<") + '(identifier "a") + '(punctuator ">") + '(punctuator ")")) + (lex "s(#include)")) + + ;; The standard says this case is undefined + ;; 6.4.7 p. 3 (test-equal "Quotation mark inside h-string" (ls '(punctuator "#") '(identifier "include") diff --git a/tests/test/cpp/preprocessor2.scm b/tests/test/cpp/preprocessor2.scm index fd18ddce..be5ba94c 100644 --- a/tests/test/cpp/preprocessor2.scm +++ b/tests/test/cpp/preprocessor2.scm @@ -591,7 +591,7 @@ (test-equal "H-string" "/usr/include/stdio.h" (resolve-header (make-environment) - (lex ""))) + (cddr (lex "#include ")))) (test-equal "Q-string" "/usr/include/stdio.h" (resolve-header (make-environment) -- cgit v1.2.3