From 37dc6cc5ab804da964f22787561d030898115809 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Hugo=20H=C3=B6rnquist?= <hugo@lysator.liu.se>
Date: Wed, 20 Jul 2022 16:25:35 +0200
Subject: Acknowledge string prefixes.

---
 module/c/cpp-util.scm      | 10 ++++++----
 module/c/lex2.scm          | 26 ++++++++++++++++++++------
 module/c/preprocessor2.scm | 20 +++++++++++++-------
 module/c/unlex.scm         | 15 ++++++++++-----
 4 files changed, 49 insertions(+), 22 deletions(-)

(limited to 'module')

diff --git a/module/c/cpp-util.scm b/module/c/cpp-util.scm
index 633c5a0c..9674317b 100644
--- a/module/c/cpp-util.scm
+++ b/module/c/cpp-util.scm
@@ -117,12 +117,14 @@
         ((null? (cdr tokens)) tokens)
         ((string-token? (car tokens))
          (lambda (a . _) a)
-         => (lambda parts-a
+         => (lambda (prefix-a . parts-a)
               (cond ((string-token? (cadr tokens))
                      (lambda (a . _) a)
-                     => (lambda parts-b (merge-string-literals
-                                    (cons (make-string-literal (append parts-a parts-b))
-                                          (cddr tokens)))))
+                     => (lambda (prefix-b . parts-b)
+                          (merge-string-literals
+                           ;; TODO check validity of prefixes
+                           (cons (make-string-literal (cons prefix-a (append parts-a parts-b)))
+                                 (cddr tokens)))))
                     (else (cons (car tokens)
                                 (merge-string-literals (cdr tokens)))))))
         (else (cons (car tokens) (merge-string-literals (cdr tokens))))))
diff --git a/module/c/lex2.scm b/module/c/lex2.scm
index c09f6423..85c9be19 100644
--- a/module/c/lex2.scm
+++ b/module/c/lex2.scm
@@ -374,12 +374,26 @@
     (`(comment ,body)
      (lexeme body: body type: 'comment ))
     (`(preprocessing-token ,body)
-     (case body
-       ;; "unflatten"
-       ((string-literal)
-        (lexeme body: '(string-literal "") type: 'preprocessing-token))
-       (else
-        (lexeme body: body type: 'preprocessing-token))))
+     (match body
+       ('string-literal
+        ;; Unflatten case
+        (lexeme body: '(string-literal (encoding-prefix) "")
+                type: 'preprocessing-token))
+       (('string-literal `(encoding-prefix ,px) args ...)
+        (lexeme body: `(string-literal (encoding-prefix . ,px) ,@args)
+                type: 'preprocessing-token))
+       (('string-literal args ...)
+        (lexeme body: `(string-literal (encoding-prefix) ,@args)
+                type: 'preprocessing-token))
+       (('character-constant `(character-prefix ,px) args ...)
+        (lexeme body: `(character-constant (character-prefix . ,px)
+                                           ,@args)
+                type: 'preprocessing-token))
+       (('character-constant args ...)
+        (lexeme body: `(character-constant (character-prefix) ,@args)
+                type: 'preprocessing-token))
+       (body (lexeme body: body type: 'preprocessing-token))))
+
     ;; "unflatten"
     ('comment (lexeme body: "" type: 'comment))))
 
diff --git a/module/c/preprocessor2.scm b/module/c/preprocessor2.scm
index c6be3936..3f9552c5 100644
--- a/module/c/preprocessor2.scm
+++ b/module/c/preprocessor2.scm
@@ -397,11 +397,15 @@
    body: (lambda (environment arguments)
            (typecheck arguments (and (list-of (list-of lexeme?))
                                      (not null?)))
-           (aif (string-token? (caar arguments))
-                (values (handle-pragma environment (lex it)) '())
-                (scm-error 'cpp-pragma-error "_Pragma"
-                           "Invalid argument to _Pragma: ~s"
-                           (list (car arguments)) #f)))))
+           (cond ((string-token? (caar arguments))
+                  (lambda (a . _) a)
+                  ;; TODO handle rest
+                  => (lambda (encoding it . rest)
+                       (values (handle-pragma environment (lex it))
+                               '())))
+                 (else (scm-error 'cpp-pragma-error "_Pragma"
+                                  "Invalid argument to _Pragma: ~s"
+                                  (list (car arguments)) #f))))))
 
 
 
@@ -591,10 +595,12 @@
                         (remaining (drop-whitespace (cdr tokens))))
                     (cond ((null? remaining) (set environment current-line (1- line)))
                           ((string-token? (car remaining))
-                           => (lambda (file)
+                           (lambda (a . _) a)
+                           => (lambda (encoding . fragments)
                                 (-> environment
                                     (set current-line (1- line))
-                                    (set current-file file))))
+                                    ;; TODO properly join string
+                                    (set current-file (car fragments)))))
                           ;; no newlines in #line
                           (%first-time (loop #f ((unval resolve-token-stream 1) environment tokens)))
                           (else (err))))))
diff --git a/module/c/unlex.scm b/module/c/unlex.scm
index 1f8ba0a1..e467a50f 100644
--- a/module/c/unlex.scm
+++ b/module/c/unlex.scm
@@ -59,21 +59,26 @@
 ;; Returns the "source" of the token, as a preprocessing string literal token
 (define (stringify-token preprocessing-token)
   (match (lexeme-body preprocessing-token)
-    (('string-literal `(encoding-prefix ,prefix) parts ...)
-     (stringify-string-tokens parts))
-    (('string-literal parts ...)
+    (('string-literal `(encoding-prefix . ,prefix) parts ...)
      (stringify-string-tokens parts))
+
     (`(header-name (q-string ,s))
      (format #f "~s" s))
+
     (`(header-name (h-string ,s))
      (format #f "<~a>" s))
+
     (`(identifier ,id) id)
+
     (`(pp-number ,n) n)
-    (`(character-constant ,c)
+
+    ;; TODO remaining parts
+    (('character-constant `(character-encoding . ,x) c parts ...)
      (format #f "'~a'" c))
+
     (`(punctuator ,p) p)))
 
 ;; takes a token list, and return a single string literal token
 (define (stringify-tokens tokens)
   (lexeme type: 'preprocessing-token
-          body: `(string-literal ,(unlex-aggressive tokens))))
+          body: `(string-literal (encoding-prefix) ,(unlex-aggressive tokens))))
-- 
cgit v1.2.3