aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorHugo Hörnquist <hugo@lysator.liu.se>2022-07-23 21:42:19 +0200
committerHugo Hörnquist <hugo@lysator.liu.se>2022-08-18 16:38:37 +0200
commit39a878d1130bd33d7d1b1380617747187e01fb73 (patch)
tree5ea885ebe8eac148c6967fa7a24f6821ca24a8de
parentRewrote H-string tests to be what they should. (diff)
downloadcalp-39a878d1130bd33d7d1b1380617747187e01fb73.tar.gz
calp-39a878d1130bd33d7d1b1380617747187e01fb73.tar.xz
Only parse h-strings where applicable.
-rw-r--r--module/c/cpp-types.scm2
-rw-r--r--module/c/lex2.scm111
-rw-r--r--module/c/preprocessor2.scm2
-rw-r--r--tests/test/cpp/lex2.scm23
-rw-r--r--tests/test/cpp/preprocessor2.scm2
5 files changed, 90 insertions, 50 deletions
diff --git a/module/c/cpp-types.scm b/module/c/cpp-types.scm
index bf51d009..a6299996 100644
--- a/module/c/cpp-types.scm
+++ b/module/c/cpp-types.scm
@@ -80,7 +80,7 @@
(define (h-string-token? token)
(and (preprocessing-token? token)
(match (lexeme-body token)
- (`(header-name (h-string ,x)) x)
+ (`(h-string ,x) x)
(_ #f))))
;; NOTE q-string tokens are never produced by the lexer,
diff --git a/module/c/lex2.scm b/module/c/lex2.scm
index af90dcce..6c13f0f9 100644
--- a/module/c/lex2.scm
+++ b/module/c/lex2.scm
@@ -4,7 +4,7 @@
:use-module ((hnh util) :select (->))
:use-module (hnh util object)
:use-module (hnh util type)
- :use-module ((srfi srfi-1) :select (fold))
+ :use-module ((srfi srfi-1) :select (fold append-map!))
:use-module (srfi srfi-88)
:use-module ((c trigraph) :select (replace-trigraphs))
:use-module ((c line-fold) :select (fold-lines))
@@ -37,7 +37,8 @@
;; string literal moved before header-name since string literals
;; otherwise became q-strings
(or string-literal
- header-name
+ ;; header-name
+ h-string-compound
character-constant
identifier
pp-number
@@ -288,25 +289,40 @@
;;; A.1.8 Header names
+(define-peg-pattern h-string-leadup none
+ (and "#" (* inline-whitespace)
+ "include" (* inline-whitespace)
+ "<"))
+
+;; Weird case which handles that '<' and '>' change semantics when after #include
+;; 6.4.7 p. 4
+;; NOTE this is techincally incorrect, since this form is only valid at start of line.
+;; Consider the case
+;; #define s(x) #x
+;; s(#include <test>)
+;; That can't however easily be added since PEG has no start-of-line operator
+(define-peg-pattern h-string-compound all
+ (and h-string-leadup h-string (ignore ">")))
+
(define-peg-pattern h-string all (+ h-char))
-(define-peg-pattern q-string all (+ q-char))
+;; (define-peg-pattern q-string all (+ q-char))
-;; (6.4.7)
-(define-peg-pattern header-name all
- (or (and (ignore "<") h-string (ignore ">"))
- ;; NOTE this case will never be reached, since it's treated as a regular
- ;; string instead
- (and (ignore "\"") q-string (ignore "\""))))
+;; ;; (6.4.7)
+;; (define-peg-pattern header-name all
+;; (or (and (ignore "<") h-string (ignore ">"))
+;; ;; NOTE this case will never be reached, since it's treated as a regular
+;; ;; string instead
+;; (and (ignore "\"") q-string (ignore "\""))))
;; (6.4.7)
(define-peg-pattern h-char body
(or (and (not-followed-by (or ">" "\n")) peg-any)
escape-sequence))
-;; (6.4.7)
-(define-peg-pattern q-char body
- (or (and (not-followed-by (or "\"" "\n")) peg-any)
- escape-sequence))
+;; ;; (6.4.7)
+;; (define-peg-pattern q-char body
+;; (or (and (not-followed-by (or "\"" "\n")) peg-any)
+;; escape-sequence))
;;; A.1.9 Preprocessing numbers
@@ -321,10 +337,11 @@
+(define-peg-pattern inline-whitespace body
+ (or "\t" "\v" "\f" " "))
+
(define-peg-pattern whitespace all
- (or "\t" "\n" "\v" "\f" " "
- ;; "\r"
- ))
+ (or "\n" inline-whitespace))
(define-peg-pattern block-comment body
(and (ignore "/*")
@@ -366,34 +383,38 @@
(define (placemaker)
(lexeme type: 'placemaker body: '()))
-(define (lex-output->lexeme-object x)
+(define (lex-output->lexeme-objects x)
(match x
(`(non-whitespace ,body)
- (lexeme body: body type: 'other))
+ (list (lexeme body: body type: 'other)))
(`(whitespace ,body)
- (lexeme body: body type: 'whitespace ))
+ (list (lexeme body: body type: 'whitespace)))
(`(comment ,body)
- (lexeme body: body type: 'comment ))
+ (list (lexeme body: body type: 'comment)))
(`(preprocessing-token ,body)
(match body
('string-literal
;; Unflatten case
- (lexeme body: '(string-literal (encoding-prefix) "")
- type: 'preprocessing-token))
+ (list (lexeme body: '(string-literal (encoding-prefix) "")
+ type: 'preprocessing-token)))
(('string-literal `(encoding-prefix ,px) args ...)
- (lexeme body: `(string-literal (encoding-prefix . ,px) ,@args)
- type: 'preprocessing-token))
+ (list (lexeme body: `(string-literal (encoding-prefix . ,px) ,@args)
+ type: 'preprocessing-token)))
(('string-literal args ...)
- (lexeme body: `(string-literal (encoding-prefix) ,@args)
- type: 'preprocessing-token))
+ (list (lexeme body: `(string-literal (encoding-prefix) ,@args)
+ type: 'preprocessing-token)))
(('character-constant `(character-prefix ,px) args ...)
- (lexeme body: `(character-constant (character-prefix . ,px)
- ,@args)
- type: 'preprocessing-token))
+ (list (lexeme body: `(character-constant (character-prefix . ,px)
+ ,@args)
+ type: 'preprocessing-token)))
(('character-constant args ...)
- (lexeme body: `(character-constant (character-prefix) ,@args)
- type: 'preprocessing-token))
- (body (lexeme body: body type: 'preprocessing-token))))
+ (list (lexeme body: `(character-constant (character-prefix) ,@args)
+ type: 'preprocessing-token)))
+ (`(h-string-compound ,body)
+ (list (lexeme type: 'preprocessing-token body: '(punctuator "#"))
+ (lexeme type: 'preprocessing-token body: '(identifier "include"))
+ (lexeme type: 'preprocessing-token body: body)))
+ (body (list (lexeme body: body type: 'preprocessing-token)))))
;; "unflatten"
('comment (lexeme body: "" type: 'comment))))
@@ -409,19 +430,19 @@
(define (lex string)
(if (string-null? string)
'()
- (map lex-output->lexeme-object
- (let ((result (match-pattern preprocessing-tokens string)))
- (let ((trailing (substring (peg:string result)
- (peg:end result))))
- (unless (string-null? trailing)
- (scm-error 'cpp-lex-error "lex"
- "Failed to lex string, remaining trailing characters: ~s"
- (list trailing) #f)))
- (unless (list? (peg:tree result))
- (scm-error 'cpp-lex-error "lex"
- "Parsing just failed. Chars: ~s"
- (list (peg:string result)) #f))
- (cdr (peg:tree result))))))
+ (append-map! lex-output->lexeme-objects
+ (let ((result (match-pattern preprocessing-tokens string)))
+ (let ((trailing (substring (peg:string result)
+ (peg:end result))))
+ (unless (string-null? trailing)
+ (scm-error 'cpp-lex-error "lex"
+ "Failed to lex string, remaining trailing characters: ~s"
+ (list trailing) #f)))
+ (unless (list? (peg:tree result))
+ (scm-error 'cpp-lex-error "lex"
+ "Parsing just failed. Chars: ~s"
+ (list (peg:string result)) #f))
+ (cdr (peg:tree result))))))
diff --git a/module/c/preprocessor2.scm b/module/c/preprocessor2.scm
index f4451e84..e4590d41 100644
--- a/module/c/preprocessor2.scm
+++ b/module/c/preprocessor2.scm
@@ -535,7 +535,7 @@
(map (lambda (path-prefix) (path-append path-prefix string))
(c-search-path)))
(scm-error 'cpp-error "resolve-h-file"
- "Can't resolve file: ~s"
+ "Can't find file: <~a>"
(list string) #f)))))
;; #include "myheader.h"
diff --git a/tests/test/cpp/lex2.scm b/tests/test/cpp/lex2.scm
index 54bc0aff..e30aac31 100644
--- a/tests/test/cpp/lex2.scm
+++ b/tests/test/cpp/lex2.scm
@@ -9,6 +9,9 @@
(define (ls . xs)
(map l xs))
+;; See comment on h-string-compound in (c lex2)
+(test-expect-fail "H-string looking as argument to macro")
+
(test-equal "Integer literal"
(ls '(pp-number "10"))
(lex "10"))
@@ -108,17 +111,33 @@
(test-equal "Propper H-string"
(list (l '(punctuator "#"))
(l '(identifier "include"))
- (lexeme type: 'whitespace body: " ")
+ ;; (lexeme type: 'whitespace body: " ")
(l '(h-string "a")))
(lex "#include <a>"))
-
(test-equal "Not a H string"
(ls '(punctuator "<")
'(identifier "a")
'(punctuator ">"))
(lex "<a>"))
+ ;; Assume that s is defined as
+ ;; #define s(x) #x
+ ;; then the following expression would expand to
+ ;; "#include<a>"
+ (test-equal "H-string looking as argument to macro"
+ (ls '(identifier "s")
+ '(punctuator "(")
+ '(punctuator "#")
+ '(identifier "include")
+ '(punctuator "<")
+ '(identifier "a")
+ '(punctuator ">")
+ '(punctuator ")"))
+ (lex "s(#include<a>)"))
+
+ ;; The standard says this case is undefined
+ ;; 6.4.7 p. 3
(test-equal "Quotation mark inside h-string"
(ls '(punctuator "#")
'(identifier "include")
diff --git a/tests/test/cpp/preprocessor2.scm b/tests/test/cpp/preprocessor2.scm
index fd18ddce..be5ba94c 100644
--- a/tests/test/cpp/preprocessor2.scm
+++ b/tests/test/cpp/preprocessor2.scm
@@ -591,7 +591,7 @@
(test-equal "H-string"
"/usr/include/stdio.h"
(resolve-header (make-environment)
- (lex "<stdio.h>")))
+ (cddr (lex "#include <stdio.h>"))))
(test-equal "Q-string"
"/usr/include/stdio.h"
(resolve-header (make-environment)