aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorHugo Hörnquist <hugo@lysator.liu.se>2022-07-13 11:35:39 +0200
committerHugo Hörnquist <hugo@lysator.liu.se>2022-07-13 11:35:39 +0200
commit7b78cf68200118fac395592da3d78b22b4be0cd7 (patch)
tree9cd8411147b6732a7288e4ba40ade220658d73c3
parentEnsure #error works. (diff)
downloadcalp-7b78cf68200118fac395592da3d78b22b4be0cd7.tar.gz
calp-7b78cf68200118fac395592da3d78b22b4be0cd7.tar.xz
Add support for "other" in preprocessing-tokens.
-rw-r--r--module/c/cpp-types.scm7
-rw-r--r--module/c/lex2.scm16
-rw-r--r--module/c/unlex.scm6
-rw-r--r--tests/test/cpp/lex2.scm12
4 files changed, 36 insertions, 5 deletions
diff --git a/module/c/cpp-types.scm b/module/c/cpp-types.scm
index 1df70594..82ebb922 100644
--- a/module/c/cpp-types.scm
+++ b/module/c/cpp-types.scm
@@ -5,8 +5,9 @@
:export (whitespace-token?
comment-token?
preprocessing-token?
- newline-token?
+ other-token?
placemaker-token?
+ newline-token?
identifier-token?
punctuator-token?
number-token?
@@ -27,6 +28,10 @@
(and (lexeme? x)
(eq? 'preprocessing-token (lexeme-type x))))
+(define (other-token? x)
+ (and (lexeme? x)
+ (eq? 'other (lexeme-type x))))
+
(define (placemaker-token? x)
(and (lexeme? x)
(eq? 'placemaker (lexeme-type x))))
diff --git a/module/c/lex2.scm b/module/c/lex2.scm
index fcddcdc4..72f79f55 100644
--- a/module/c/lex2.scm
+++ b/module/c/lex2.scm
@@ -325,15 +325,25 @@
(define-peg-pattern comment all
(or line-comment block-comment))
+(define-peg-pattern non-whitespace all
+ (and (not-followed-by whitespace)
+ peg-any))
+
(define-peg-pattern preprocessing-tokens all
(* (or whitespace
comment
- preprocessing-token)))
+ preprocessing-token
+ non-whitespace)))
+;; comment could be merged with whitespace, but then unlex would have to know that
+
+;; other is the "each non-white-space character that cannot be one of the above"
+;; clause from 6.4 p. 1
+
(define-type (lexeme)
- (type type: (memv '(whitespace comment preprocessing-token placemaker)))
+ (type type: (memv '(whitespace comment preprocessing-token other placemaker)))
(body type: (or string? list?))
(noexpand type: (list-of string?)
default: '()))
@@ -343,6 +353,8 @@
(define (lex-output->lexeme-object x)
(match x
+ (`(non-whitespace ,body)
+ (lexeme body: body type: 'other))
(`(whitespace ,body)
(lexeme body: body type: 'whitespace ))
(`(comment ,body)
diff --git a/module/c/unlex.scm b/module/c/unlex.scm
index e3d36f86..e714816d 100644
--- a/module/c/unlex.scm
+++ b/module/c/unlex.scm
@@ -13,7 +13,8 @@
(typecheck tokens (list-of lexeme?))
(string-concatenate
(map (lambda (x) (cond (x preprocessing-token? => stringify-token)
- ((whitespace-token? x) (lexeme-body x))))
+ ((whitespace-token? x) (lexeme-body x))
+ ((other-token? x) (lexeme-body x))))
tokens)))
;; takes a list of preprocessing-token's, and return a "source" string
@@ -22,7 +23,8 @@
(string-concatenate
(map (lambda (x)
(cond ((preprocessing-token? x) (stringify-token x))
- ((whitespace-token? x) " ")))
+ ((whitespace-token? x) " ")
+ ((other-token? x) (lexeme-body x))))
(squeeze-whitespace tokens))))
(define (stringify-escape-sequence sub-token)
diff --git a/tests/test/cpp/lex2.scm b/tests/test/cpp/lex2.scm
index b80bcf37..47bb4a16 100644
--- a/tests/test/cpp/lex2.scm
+++ b/tests/test/cpp/lex2.scm
@@ -80,3 +80,15 @@
(lexeme type: 'whitespace body: "\n")
(lexeme type: 'whitespace body: " "))
(lex " \n "))
+
+
+;; Refer to 6.4 p.1 for the syntax requirement
+;; 6.10.9 p. 2 for the sample string
+(test-equal "each non-white-space character that cannot be one of the above"
+ (list (lexeme type: 'preprocessing-token body: '(punctuator "."))
+ (lexeme type: 'preprocessing-token body: '(punctuator "."))
+ (lexeme type: 'other body: "\\") ; <- Interesting part
+ (lexeme type: 'preprocessing-token body: '(identifier "listing"))
+ (lexeme type: 'preprocessing-token body: '(punctuator "."))
+ (lexeme type: 'preprocessing-token body: '(identifier "dir")))
+ (lex "..\\listing.dir"))