From fc9f889b376ccec4a795ca87874f8efa22321726 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Hugo=20H=C3=B6rnquist?= Date: Thu, 30 Jun 2022 02:33:01 +0200 Subject: Fix escape sequences in chars and strings. --- module/c/parse.scm | 95 ++++++++++++++++++++++++++++++++++++++---------------- 1 file changed, 68 insertions(+), 27 deletions(-) (limited to 'module/c/parse.scm') diff --git a/module/c/parse.scm b/module/c/parse.scm index 09ede544..d8cfd7cd 100644 --- a/module/c/parse.scm +++ b/module/c/parse.scm @@ -60,13 +60,38 @@ (list else vars) #f))) vars))) -;; Converts string to a null-terminated bytevector -(define* (string->c-string str optional: (transcoder (make-transcoder (utf-8-codec)))) - (let* ((bv* (string->bytevector str transcoder)) - (bv (make-bytevector (1+ (bytevector-length bv*))))) - (bytevector-copy! bv* 0 bv 0 (bytevector-length bv*)) - (bytevector-u8-set! bv (bytevector-length bv*) 0) - bv)) +;; Takes a list of strings and integers, and merges it all into a single +;; bytevector representing a c string +(define* (string-fragments->c-string + fragments optional: (transcoder (make-transcoder (utf-8-codec)))) + + (define fragments-fixed + (map (lambda (frag) + (if (string? frag) + (string->bytevector frag transcoder) + frag)) + fragments)) + + (define bv-length + (fold (lambda (item sum) (+ sum (if (bytevector? item) + (bytevector-length item) + 1))) + 0 fragments-fixed)) + + (define bv (make-bytevector (1+ bv-length))) + ;; trailing null byte + (bytevector-u8-set! bv bv-length 0) + (fold (lambda (item idx) + (cond ((bytevector? item) + (bytevector-copy! item 0 + bv idx + (bytevector-length item)) + (+ idx (bytevector-length item))) + (else (bytevector-u8-set! bv idx item) + (+ idx 1)))) + 0 + fragments-fixed) + bv) (define (parse-float-form float-form) (let ((float-string @@ -91,6 +116,33 @@ (list `(float ,@args)) #f))))) +(define (resolve-escaped-char form) + (match form + (('base-8-char n) (string->number n 8)) + (('base-16-char n) (string->number n 16)) + (c (char->integer + (case (string-ref c 0) + ((#\a) #\alarm) + ((#\b) #\backspace) + ((#\e) #\esc) ;; non-standard + ((#\f) #\page) + ((#\n) #\newline) + ((#\r) #\return) + ((#\t) #\tab) + ((#\v) #\vtab) + ((#\\) #\\) + ;; These are valid in both strings and chars + ((#\') #\') + ((#\") #\")))))) + +;; Takes a list of strings and escaped-char form +;; and returns a list of strings and integers +(define (resolve-string-fragment fragment) + (match fragment + (('escaped-char c) + (resolve-escaped-char c)) + (fargment fragment))) + (define (parse-lexeme-tree tree) (match tree ['() '()] @@ -118,23 +170,7 @@ ;; Character literals, stored as raw integers ;; so mathematical operations keep working on them. - [('char ('escaped-char ('base-8-char n))) - (-> n (string->number 8) #; integer->char)] - [('char ('escaped-char ('base-16-char n))) - (-> n (string->number 16) #; integer->char)] - [('char ('escaped-char c)) - (char->integer - (case (string-ref c 0) - ((#\a) #\alarm) - ((#\b) #\backspace) - ((#\e) #\esc) - ((#\f) #\page) - ((#\n) #\newline) - ((#\r) #\return) - ((#\t) #\tab) - ((#\v) #\vtab) - ((#\\) #\\) - ((#\') #\')))] + [('char ('escaped-char c)) (resolve-escaped-char c)] [('char c) (char->integer (string-ref c 0))] @@ -205,9 +241,14 @@ ['string #vu8(0)] - [('string str) (string->c-string str)] - [(('string str) ...) - (string->c-string (string-concatenate str))] + [('string str ...) + (-> (map resolve-string-fragment str) + string-fragments->c-string)] + + ;; implicit concatenation of string literals + [(('string str ...) ...) + (-> (map resolve-string-fragment (concatenate str)) + string-fragments->c-string)] [('infix args ...) (let ((r (resolve-order-of-operations -- cgit v1.2.3