aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorHugo Hörnquist <hugo@lysator.liu.se>2022-06-30 02:33:01 +0200
committerHugo Hörnquist <hugo@lysator.liu.se>2022-07-07 21:17:22 +0200
commitfc9f889b376ccec4a795ca87874f8efa22321726 (patch)
treef5bd1aabbe5af107e797c628ab6ab61e378864d3
parentC parser add basic float support. (diff)
downloadcalp-fc9f889b376ccec4a795ca87874f8efa22321726.tar.gz
calp-fc9f889b376ccec4a795ca87874f8efa22321726.tar.xz
Fix escape sequences in chars and strings.
-rw-r--r--module/c/lex.scm2
-rw-r--r--module/c/parse.scm95
-rw-r--r--tests/test/cpp.scm25
3 files changed, 89 insertions, 33 deletions
diff --git a/module/c/lex.scm b/module/c/lex.scm
index b6523a87..b3c82001 100644
--- a/module/c/lex.scm
+++ b/module/c/lex.scm
@@ -72,7 +72,7 @@
(define-peg-pattern base-16-char all
(and (ignore "x") base-16-digit (? base-16-digit)))
-(define-peg-pattern escaped-char body
+(define-peg-pattern escaped-char all
(and (ignore "\\") (or base-16-char
base-8-char
peg-any)))
diff --git a/module/c/parse.scm b/module/c/parse.scm
index 09ede544..d8cfd7cd 100644
--- a/module/c/parse.scm
+++ b/module/c/parse.scm
@@ -60,13 +60,38 @@
(list else vars) #f)))
vars)))
-;; Converts string to a null-terminated bytevector
-(define* (string->c-string str optional: (transcoder (make-transcoder (utf-8-codec))))
- (let* ((bv* (string->bytevector str transcoder))
- (bv (make-bytevector (1+ (bytevector-length bv*)))))
- (bytevector-copy! bv* 0 bv 0 (bytevector-length bv*))
- (bytevector-u8-set! bv (bytevector-length bv*) 0)
- bv))
+;; Takes a list of strings and integers, and merges it all into a single
+;; bytevector representing a c string
+(define* (string-fragments->c-string
+ fragments optional: (transcoder (make-transcoder (utf-8-codec))))
+
+ (define fragments-fixed
+ (map (lambda (frag)
+ (if (string? frag)
+ (string->bytevector frag transcoder)
+ frag))
+ fragments))
+
+ (define bv-length
+ (fold (lambda (item sum) (+ sum (if (bytevector? item)
+ (bytevector-length item)
+ 1)))
+ 0 fragments-fixed))
+
+ (define bv (make-bytevector (1+ bv-length)))
+ ;; trailing null byte
+ (bytevector-u8-set! bv bv-length 0)
+ (fold (lambda (item idx)
+ (cond ((bytevector? item)
+ (bytevector-copy! item 0
+ bv idx
+ (bytevector-length item))
+ (+ idx (bytevector-length item)))
+ (else (bytevector-u8-set! bv idx item)
+ (+ idx 1))))
+ 0
+ fragments-fixed)
+ bv)
(define (parse-float-form float-form)
(let ((float-string
@@ -91,6 +116,33 @@
(list `(float ,@args)) #f)))))
+(define (resolve-escaped-char form)
+ (match form
+ (('base-8-char n) (string->number n 8))
+ (('base-16-char n) (string->number n 16))
+ (c (char->integer
+ (case (string-ref c 0)
+ ((#\a) #\alarm)
+ ((#\b) #\backspace)
+ ((#\e) #\esc) ;; non-standard
+ ((#\f) #\page)
+ ((#\n) #\newline)
+ ((#\r) #\return)
+ ((#\t) #\tab)
+ ((#\v) #\vtab)
+ ((#\\) #\\)
+ ;; These are valid in both strings and chars
+ ((#\') #\')
+ ((#\") #\"))))))
+
+;; Takes a list of strings and escaped-char form
+;; and returns a list of strings and integers
+(define (resolve-string-fragment fragment)
+ (match fragment
+ (('escaped-char c)
+ (resolve-escaped-char c))
+ (fargment fragment)))
+
(define (parse-lexeme-tree tree)
(match tree
['() '()]
@@ -118,23 +170,7 @@
;; Character literals, stored as raw integers
;; so mathematical operations keep working on them.
- [('char ('escaped-char ('base-8-char n)))
- (-> n (string->number 8) #; integer->char)]
- [('char ('escaped-char ('base-16-char n)))
- (-> n (string->number 16) #; integer->char)]
- [('char ('escaped-char c))
- (char->integer
- (case (string-ref c 0)
- ((#\a) #\alarm)
- ((#\b) #\backspace)
- ((#\e) #\esc)
- ((#\f) #\page)
- ((#\n) #\newline)
- ((#\r) #\return)
- ((#\t) #\tab)
- ((#\v) #\vtab)
- ((#\\) #\\)
- ((#\') #\')))]
+ [('char ('escaped-char c)) (resolve-escaped-char c)]
[('char c) (char->integer (string-ref c 0))]
@@ -205,9 +241,14 @@
['string #vu8(0)]
- [('string str) (string->c-string str)]
- [(('string str) ...)
- (string->c-string (string-concatenate str))]
+ [('string str ...)
+ (-> (map resolve-string-fragment str)
+ string-fragments->c-string)]
+
+ ;; implicit concatenation of string literals
+ [(('string str ...) ...)
+ (-> (map resolve-string-fragment (concatenate str))
+ string-fragments->c-string)]
[('infix args ...)
(let ((r (resolve-order-of-operations
diff --git a/tests/test/cpp.scm b/tests/test/cpp.scm
index 32072932..fa767fe3 100644
--- a/tests/test/cpp.scm
+++ b/tests/test/cpp.scm
@@ -514,23 +514,38 @@
(test-group "String with only escape"
(let ((form (string #\" #\\ #\" #\")))
- (test-equal `(string "\"") (lex form))
+ (test-equal `(string (escaped-char "\"")) (lex form))
(test-equal #vu8(34 0) (run form))))
(test-group "String with escape at start"
(let ((form (string #\" #\\ #\" #\a #\")))
- (test-equal `(string "\"a") (lex form))
+ (test-equal `(string (escaped-char "\"") "a") (lex form))
(test-equal #vu8(34 #x61 0) (run form))))
(test-group "String with escape at end"
(let ((form (string #\" #\a #\\ #\" #\")))
- (test-equal `(string "a\"") (lex form))
+ (test-equal `(string "a" (escaped-char "\"")) (lex form))
(test-equal #vu8(#x61 34 0) (run form))))
(test-group "String with escape in middle"
(let ((form (string #\" #\a #\\ #\" #\b #\")))
- (test-equal `(string "a\"b") (lex form))
- (test-equal #vu8(#x61 34 #x62 0) (run form)))))
+ (test-equal `(string "a" (escaped-char "\"") "b") (lex form))
+ (test-equal #vu8(#x61 34 #x62 0) (run form))))
+
+ ;; \e is semi non-standard
+ (test-group "String with bakslash-e esacpe"
+ (let ((form "\"\\e\""))
+ (test-equal '(string (escaped-char "e")) (lex form))
+ (test-equal #vu8(#x1b 0) (run form))))
+
+ (test-group "String with byte secquence escape"
+ (let ((form "\"\\xf0\\x9f\\x92\\xa9\""))
+ (test-equal '(string (escaped-char (base-16-char "f0"))
+ (escaped-char (base-16-char "9f"))
+ (escaped-char (base-16-char "92"))
+ (escaped-char (base-16-char "a9")))
+ (lex form))
+ (test-equal #vu8(#xf0 #x9f #x92 #xa9 0) (run form)))))
(test-group "__asm__"
(let ((form "__asm__(\".globl \" __XSTRING(sym))"))