aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorHugo Hörnquist <hugo@lysator.liu.se>2022-07-20 16:25:35 +0200
committerHugo Hörnquist <hugo@lysator.liu.se>2022-07-21 14:27:23 +0200
commit37dc6cc5ab804da964f22787561d030898115809 (patch)
treeb8d0acd2fcc8f0c2c51b2d76a4d1e5d942143a2b
parentCleanup in lex2 test. (diff)
downloadcalp-37dc6cc5ab804da964f22787561d030898115809.tar.gz
calp-37dc6cc5ab804da964f22787561d030898115809.tar.xz
Acknowledge string prefixes.
-rw-r--r--module/c/cpp-util.scm10
-rw-r--r--module/c/lex2.scm26
-rw-r--r--module/c/preprocessor2.scm20
-rw-r--r--module/c/unlex.scm15
-rw-r--r--tests/test/cpp/lex2.scm35
-rw-r--r--tests/test/cpp/util.scm14
6 files changed, 93 insertions, 27 deletions
diff --git a/module/c/cpp-util.scm b/module/c/cpp-util.scm
index 633c5a0c..9674317b 100644
--- a/module/c/cpp-util.scm
+++ b/module/c/cpp-util.scm
@@ -117,12 +117,14 @@
((null? (cdr tokens)) tokens)
((string-token? (car tokens))
(lambda (a . _) a)
- => (lambda parts-a
+ => (lambda (prefix-a . parts-a)
(cond ((string-token? (cadr tokens))
(lambda (a . _) a)
- => (lambda parts-b (merge-string-literals
- (cons (make-string-literal (append parts-a parts-b))
- (cddr tokens)))))
+ => (lambda (prefix-b . parts-b)
+ (merge-string-literals
+ ;; TODO check validity of prefixes
+ (cons (make-string-literal (cons prefix-a (append parts-a parts-b)))
+ (cddr tokens)))))
(else (cons (car tokens)
(merge-string-literals (cdr tokens)))))))
(else (cons (car tokens) (merge-string-literals (cdr tokens))))))
diff --git a/module/c/lex2.scm b/module/c/lex2.scm
index c09f6423..85c9be19 100644
--- a/module/c/lex2.scm
+++ b/module/c/lex2.scm
@@ -374,12 +374,26 @@
(`(comment ,body)
(lexeme body: body type: 'comment ))
(`(preprocessing-token ,body)
- (case body
- ;; "unflatten"
- ((string-literal)
- (lexeme body: '(string-literal "") type: 'preprocessing-token))
- (else
- (lexeme body: body type: 'preprocessing-token))))
+ (match body
+ ('string-literal
+ ;; Unflatten case
+ (lexeme body: '(string-literal (encoding-prefix) "")
+ type: 'preprocessing-token))
+ (('string-literal `(encoding-prefix ,px) args ...)
+ (lexeme body: `(string-literal (encoding-prefix . ,px) ,@args)
+ type: 'preprocessing-token))
+ (('string-literal args ...)
+ (lexeme body: `(string-literal (encoding-prefix) ,@args)
+ type: 'preprocessing-token))
+ (('character-constant `(character-prefix ,px) args ...)
+ (lexeme body: `(character-constant (character-prefix . ,px)
+ ,@args)
+ type: 'preprocessing-token))
+ (('character-constant args ...)
+ (lexeme body: `(character-constant (character-prefix) ,@args)
+ type: 'preprocessing-token))
+ (body (lexeme body: body type: 'preprocessing-token))))
+
;; "unflatten"
('comment (lexeme body: "" type: 'comment))))
diff --git a/module/c/preprocessor2.scm b/module/c/preprocessor2.scm
index c6be3936..3f9552c5 100644
--- a/module/c/preprocessor2.scm
+++ b/module/c/preprocessor2.scm
@@ -397,11 +397,15 @@
body: (lambda (environment arguments)
(typecheck arguments (and (list-of (list-of lexeme?))
(not null?)))
- (aif (string-token? (caar arguments))
- (values (handle-pragma environment (lex it)) '())
- (scm-error 'cpp-pragma-error "_Pragma"
- "Invalid argument to _Pragma: ~s"
- (list (car arguments)) #f)))))
+ (cond ((string-token? (caar arguments))
+ (lambda (a . _) a)
+ ;; TODO handle rest
+ => (lambda (encoding it . rest)
+ (values (handle-pragma environment (lex it))
+ '())))
+ (else (scm-error 'cpp-pragma-error "_Pragma"
+ "Invalid argument to _Pragma: ~s"
+ (list (car arguments)) #f))))))
@@ -591,10 +595,12 @@
(remaining (drop-whitespace (cdr tokens))))
(cond ((null? remaining) (set environment current-line (1- line)))
((string-token? (car remaining))
- => (lambda (file)
+ (lambda (a . _) a)
+ => (lambda (encoding . fragments)
(-> environment
(set current-line (1- line))
- (set current-file file))))
+ ;; TODO properly join string
+ (set current-file (car fragments)))))
;; no newlines in #line
(%first-time (loop #f ((unval resolve-token-stream 1) environment tokens)))
(else (err))))))
diff --git a/module/c/unlex.scm b/module/c/unlex.scm
index 1f8ba0a1..e467a50f 100644
--- a/module/c/unlex.scm
+++ b/module/c/unlex.scm
@@ -59,21 +59,26 @@
;; Returns the "source" of the token, as a preprocessing string literal token
(define (stringify-token preprocessing-token)
(match (lexeme-body preprocessing-token)
- (('string-literal `(encoding-prefix ,prefix) parts ...)
- (stringify-string-tokens parts))
- (('string-literal parts ...)
+ (('string-literal `(encoding-prefix . ,prefix) parts ...)
(stringify-string-tokens parts))
+
(`(header-name (q-string ,s))
(format #f "~s" s))
+
(`(header-name (h-string ,s))
(format #f "<~a>" s))
+
(`(identifier ,id) id)
+
(`(pp-number ,n) n)
- (`(character-constant ,c)
+
+ ;; TODO remaining parts
+ (('character-constant `(character-encoding . ,x) c parts ...)
(format #f "'~a'" c))
+
(`(punctuator ,p) p)))
;; takes a token list, and return a single string literal token
(define (stringify-tokens tokens)
(lexeme type: 'preprocessing-token
- body: `(string-literal ,(unlex-aggressive tokens))))
+ body: `(string-literal (encoding-prefix) ,(unlex-aggressive tokens))))
diff --git a/tests/test/cpp/lex2.scm b/tests/test/cpp/lex2.scm
index af6163e8..f4f9b857 100644
--- a/tests/test/cpp/lex2.scm
+++ b/tests/test/cpp/lex2.scm
@@ -14,7 +14,7 @@
(lex "10"))
(test-equal "String literal"
- (ls `(string-literal "Hello"))
+ (ls `(string-literal (encoding-prefix) "Hello"))
(lex "\"Hello\""))
@@ -31,7 +31,7 @@
(test-equal "Comment inside string"
- (ls `(string-literal "Hel/*lo"))
+ (ls `(string-literal (encoding-prefix) "Hel/*lo"))
(lex "\"Hel/*lo\""))
(test-equal "#define line"
@@ -117,7 +117,7 @@
(test-equal "Interaction of h-strings and regular strings"
(test-equal "Less than string, not h-string"
(ls '(pp-number "1")
- '(string-literal "<")
+ '(string-literal (encoding-prefix) "<")
'(punctuator ">"))
(lex "1\"<\">"))
@@ -131,7 +131,7 @@
(list (l '(punctuator "#"))
(l '(identifier "include"))
(lexeme type: 'whitespace body: " ")
- (l '(string-literal "test")))
+ (l '(string-literal (encoding-prefix) "test")))
;; # include here, since generated tokens could possible depend on that context,
;; and the reason regular strings are returned is since the lexer doesn't check
;; that context
@@ -142,7 +142,7 @@
(test-group "Unicode"
(test-equal "In string literals"
- (ls '(string-literal "åäö"))
+ (ls '(string-literal (encoding-prefix) "åäö"))
(lex "\"åäö\""))
(test-equal "Outside string literals"
@@ -150,3 +150,28 @@
(lexeme type: 'other body: "ä")
(lexeme type: 'other body: "ö"))
(lex "åäö")))
+
+
+
+
+(test-group "Characters with prefixes"
+ (test-equal (ls '(character-constant (character-prefix . "u")
+ "a"))
+ (lex "u'a'"))
+ (test-equal (ls '(character-constant (character-prefix . "U")
+ "a"))
+ (lex "U'a'"))
+ (test-equal (ls '(character-constant (character-prefix . "L")
+ "a"))
+ (lex "L'a'")))
+
+;; Note that these strings have 0 "data" components
+(test-group "Strings with prefixes"
+ (test-equal (ls '(string-literal (encoding-prefix . "u8")))
+ (lex "u8\"\""))
+ (test-equal (ls '(string-literal (encoding-prefix . "u")))
+ (lex "u\"\""))
+ (test-equal (ls '(string-literal (encoding-prefix . "U")))
+ (lex "U\"\""))
+ (test-equal (ls '(string-literal (encoding-prefix . "L")))
+ (lex "L\"\"")))
diff --git a/tests/test/cpp/util.scm b/tests/test/cpp/util.scm
new file mode 100644
index 00000000..8329294a
--- /dev/null
+++ b/tests/test/cpp/util.scm
@@ -0,0 +1,14 @@
+(define-module (test cpp util)
+ :use-module (srfi srfi-64)
+ :use-module (srfi srfi-88)
+ :use-module (c cpp-util)
+ :use-module ((c lex2) :select (lex lexeme)))
+
+(test-group "Merge string literals"
+ (test-equal "To simple strings"
+ (list (lexeme type: 'preprocessing-token
+ body: '(string-literal (encoding-prefix) "Hello" "World")))
+ (merge-string-literals (lex "\"Hello\"\"World\"")))
+
+ ;; TODO tests with prefixes
+ )