diff options
Diffstat (limited to '')
-rw-r--r-- | module/c/lex.scm | 2 | ||||
-rw-r--r-- | module/c/parse.scm | 95 | ||||
-rw-r--r-- | tests/test/cpp.scm | 25 |
3 files changed, 89 insertions, 33 deletions
diff --git a/module/c/lex.scm b/module/c/lex.scm index b6523a87..b3c82001 100644 --- a/module/c/lex.scm +++ b/module/c/lex.scm @@ -72,7 +72,7 @@ (define-peg-pattern base-16-char all (and (ignore "x") base-16-digit (? base-16-digit))) -(define-peg-pattern escaped-char body +(define-peg-pattern escaped-char all (and (ignore "\\") (or base-16-char base-8-char peg-any))) diff --git a/module/c/parse.scm b/module/c/parse.scm index 09ede544..d8cfd7cd 100644 --- a/module/c/parse.scm +++ b/module/c/parse.scm @@ -60,13 +60,38 @@ (list else vars) #f))) vars))) -;; Converts string to a null-terminated bytevector -(define* (string->c-string str optional: (transcoder (make-transcoder (utf-8-codec)))) - (let* ((bv* (string->bytevector str transcoder)) - (bv (make-bytevector (1+ (bytevector-length bv*))))) - (bytevector-copy! bv* 0 bv 0 (bytevector-length bv*)) - (bytevector-u8-set! bv (bytevector-length bv*) 0) - bv)) +;; Takes a list of strings and integers, and merges it all into a single +;; bytevector representing a c string +(define* (string-fragments->c-string + fragments optional: (transcoder (make-transcoder (utf-8-codec)))) + + (define fragments-fixed + (map (lambda (frag) + (if (string? frag) + (string->bytevector frag transcoder) + frag)) + fragments)) + + (define bv-length + (fold (lambda (item sum) (+ sum (if (bytevector? item) + (bytevector-length item) + 1))) + 0 fragments-fixed)) + + (define bv (make-bytevector (1+ bv-length))) + ;; trailing null byte + (bytevector-u8-set! bv bv-length 0) + (fold (lambda (item idx) + (cond ((bytevector? item) + (bytevector-copy! item 0 + bv idx + (bytevector-length item)) + (+ idx (bytevector-length item))) + (else (bytevector-u8-set! bv idx item) + (+ idx 1)))) + 0 + fragments-fixed) + bv) (define (parse-float-form float-form) (let ((float-string @@ -91,6 +116,33 @@ (list `(float ,@args)) #f))))) +(define (resolve-escaped-char form) + (match form + (('base-8-char n) (string->number n 8)) + (('base-16-char n) (string->number n 16)) + (c (char->integer + (case (string-ref c 0) + ((#\a) #\alarm) + ((#\b) #\backspace) + ((#\e) #\esc) ;; non-standard + ((#\f) #\page) + ((#\n) #\newline) + ((#\r) #\return) + ((#\t) #\tab) + ((#\v) #\vtab) + ((#\\) #\\) + ;; These are valid in both strings and chars + ((#\') #\') + ((#\") #\")))))) + +;; Takes a list of strings and escaped-char form +;; and returns a list of strings and integers +(define (resolve-string-fragment fragment) + (match fragment + (('escaped-char c) + (resolve-escaped-char c)) + (fargment fragment))) + (define (parse-lexeme-tree tree) (match tree ['() '()] @@ -118,23 +170,7 @@ ;; Character literals, stored as raw integers ;; so mathematical operations keep working on them. - [('char ('escaped-char ('base-8-char n))) - (-> n (string->number 8) #; integer->char)] - [('char ('escaped-char ('base-16-char n))) - (-> n (string->number 16) #; integer->char)] - [('char ('escaped-char c)) - (char->integer - (case (string-ref c 0) - ((#\a) #\alarm) - ((#\b) #\backspace) - ((#\e) #\esc) - ((#\f) #\page) - ((#\n) #\newline) - ((#\r) #\return) - ((#\t) #\tab) - ((#\v) #\vtab) - ((#\\) #\\) - ((#\') #\')))] + [('char ('escaped-char c)) (resolve-escaped-char c)] [('char c) (char->integer (string-ref c 0))] @@ -205,9 +241,14 @@ ['string #vu8(0)] - [('string str) (string->c-string str)] - [(('string str) ...) - (string->c-string (string-concatenate str))] + [('string str ...) + (-> (map resolve-string-fragment str) + string-fragments->c-string)] + + ;; implicit concatenation of string literals + [(('string str ...) ...) + (-> (map resolve-string-fragment (concatenate str)) + string-fragments->c-string)] [('infix args ...) (let ((r (resolve-order-of-operations diff --git a/tests/test/cpp.scm b/tests/test/cpp.scm index 32072932..fa767fe3 100644 --- a/tests/test/cpp.scm +++ b/tests/test/cpp.scm @@ -514,23 +514,38 @@ (test-group "String with only escape" (let ((form (string #\" #\\ #\" #\"))) - (test-equal `(string "\"") (lex form)) + (test-equal `(string (escaped-char "\"")) (lex form)) (test-equal #vu8(34 0) (run form)))) (test-group "String with escape at start" (let ((form (string #\" #\\ #\" #\a #\"))) - (test-equal `(string "\"a") (lex form)) + (test-equal `(string (escaped-char "\"") "a") (lex form)) (test-equal #vu8(34 #x61 0) (run form)))) (test-group "String with escape at end" (let ((form (string #\" #\a #\\ #\" #\"))) - (test-equal `(string "a\"") (lex form)) + (test-equal `(string "a" (escaped-char "\"")) (lex form)) (test-equal #vu8(#x61 34 0) (run form)))) (test-group "String with escape in middle" (let ((form (string #\" #\a #\\ #\" #\b #\"))) - (test-equal `(string "a\"b") (lex form)) - (test-equal #vu8(#x61 34 #x62 0) (run form))))) + (test-equal `(string "a" (escaped-char "\"") "b") (lex form)) + (test-equal #vu8(#x61 34 #x62 0) (run form)))) + + ;; \e is semi non-standard + (test-group "String with bakslash-e esacpe" + (let ((form "\"\\e\"")) + (test-equal '(string (escaped-char "e")) (lex form)) + (test-equal #vu8(#x1b 0) (run form)))) + + (test-group "String with byte secquence escape" + (let ((form "\"\\xf0\\x9f\\x92\\xa9\"")) + (test-equal '(string (escaped-char (base-16-char "f0")) + (escaped-char (base-16-char "9f")) + (escaped-char (base-16-char "92")) + (escaped-char (base-16-char "a9"))) + (lex form)) + (test-equal #vu8(#xf0 #x9f #x92 #xa9 0) (run form))))) (test-group "__asm__" (let ((form "__asm__(\".globl \" __XSTRING(sym))")) |