1 files changed, 66 insertions, 45 deletions
diff --git a/module/c/lex2.scm b/module/c/lex2.scm
index af90dcce..6c13f0f9 100644
--- a/module/c/lex2.scm
+++ b/module/c/lex2.scm
@@ -4,7 +4,7 @@
   :use-module ((hnh util) :select (->))
   :use-module (hnh util object)
   :use-module (hnh util type)
-  :use-module ((srfi srfi-1) :select (fold))
+  :use-module ((srfi srfi-1) :select (fold append-map!))
   :use-module (srfi srfi-88)
   :use-module ((c trigraph) :select (replace-trigraphs))
   :use-module ((c line-fold) :select (fold-lines))
@@ -37,7 +37,8 @@
   ;; string literal moved before header-name since string literals
   ;; otherwise became q-strings
   (or string-literal
-      header-name
+      ;; header-name
+      h-string-compound
       character-constant
       identifier
       pp-number
@@ -288,25 +289,40 @@
 
 ;;; A.1.8 Header names
 
+(define-peg-pattern h-string-leadup none
+  (and "#" (* inline-whitespace)
+       "include" (* inline-whitespace)
+       "<"))
+
+;; Weird case which handles that '<' and '>' change semantics when after #include
+;; 6.4.7 p. 4
+;; NOTE this is techincally incorrect, since this form is only valid at start of line.
+;; Consider the case
+;; #define s(x) #x
+;; s(#include <test>)
+;; That can't however easily be added since PEG has no start-of-line operator
+(define-peg-pattern h-string-compound all
+  (and h-string-leadup h-string (ignore ">")))
+
 (define-peg-pattern h-string all (+ h-char))
-(define-peg-pattern q-string all (+ q-char))
+;; (define-peg-pattern q-string all (+ q-char))
 
-;; (6.4.7)
-(define-peg-pattern header-name all
-  (or (and (ignore "<")  h-string (ignore ">"))
-      ;; NOTE this case will never be reached, since it's treated as a regular
-      ;; string instead
-      (and (ignore "\"") q-string (ignore "\""))))
+;; ;; (6.4.7)
+;; (define-peg-pattern header-name all
+;;   (or (and (ignore "<")  h-string (ignore ">"))
+;;       ;; NOTE this case will never be reached, since it's treated as a regular
+;;       ;; string instead
+;;       (and (ignore "\"") q-string (ignore "\""))))
 
 ;; (6.4.7)
 (define-peg-pattern h-char body
   (or (and (not-followed-by (or ">" "\n")) peg-any)
       escape-sequence))
 
-;; (6.4.7)
-(define-peg-pattern q-char body
-  (or (and (not-followed-by (or "\"" "\n")) peg-any)
-      escape-sequence))
+;; ;; (6.4.7)
+;; (define-peg-pattern q-char body
+;;   (or (and (not-followed-by (or "\"" "\n")) peg-any)
+;;       escape-sequence))
 
 ;;; A.1.9 Preprocessing numbers
 
@@ -321,10 +337,11 @@
 
 
 
+(define-peg-pattern inline-whitespace body
+  (or "\t" "\v" "\f" " "))
+
 (define-peg-pattern whitespace all
-  (or "\t" "\n" "\v" "\f" " "
-      ;; "\r"
-      ))
+  (or "\n" inline-whitespace))
 
 (define-peg-pattern block-comment body
   (and (ignore "/*")
@@ -366,34 +383,38 @@
 (define (placemaker)
   (lexeme type: 'placemaker body: '()))
 
-(define (lex-output->lexeme-object x)
+(define (lex-output->lexeme-objects x)
   (match x
     (`(non-whitespace ,body)
-     (lexeme body: body type: 'other))
+     (list (lexeme body: body type: 'other)))
     (`(whitespace ,body)
-     (lexeme body: body type: 'whitespace ))
+     (list (lexeme body: body type: 'whitespace)))
     (`(comment ,body)
-     (lexeme body: body type: 'comment ))
+     (list (lexeme body: body type: 'comment)))
     (`(preprocessing-token ,body)
      (match body
        ('string-literal
         ;; Unflatten case
-        (lexeme body: '(string-literal (encoding-prefix) "")
-                type: 'preprocessing-token))
+        (list (lexeme body: '(string-literal (encoding-prefix) "")
+                      type: 'preprocessing-token)))
        (('string-literal `(encoding-prefix ,px) args ...)
-        (lexeme body: `(string-literal (encoding-prefix . ,px) ,@args)
-                type: 'preprocessing-token))
+        (list (lexeme body: `(string-literal (encoding-prefix . ,px) ,@args)
+                      type: 'preprocessing-token)))
        (('string-literal args ...)
-        (lexeme body: `(string-literal (encoding-prefix) ,@args)
-                type: 'preprocessing-token))
+        (list (lexeme body: `(string-literal (encoding-prefix) ,@args)
+                      type: 'preprocessing-token)))
        (('character-constant `(character-prefix ,px) args ...)
-        (lexeme body: `(character-constant (character-prefix . ,px)
-                                           ,@args)
-                type: 'preprocessing-token))
+        (list (lexeme body: `(character-constant (character-prefix . ,px)
+                                                 ,@args)
+                      type: 'preprocessing-token)))
        (('character-constant args ...)
-        (lexeme body: `(character-constant (character-prefix) ,@args)
-                type: 'preprocessing-token))
-       (body (lexeme body: body type: 'preprocessing-token))))
+        (list (lexeme body: `(character-constant (character-prefix) ,@args)
+                      type: 'preprocessing-token)))
+       (`(h-string-compound ,body)
+        (list (lexeme type: 'preprocessing-token body: '(punctuator "#"))
+              (lexeme type: 'preprocessing-token body: '(identifier "include"))
+              (lexeme type: 'preprocessing-token body: body)))
+       (body (list (lexeme body: body type: 'preprocessing-token)))))
 
     ;; "unflatten"
     ('comment (lexeme body: "" type: 'comment))))
@@ -409,19 +430,19 @@
 (define (lex string)
   (if (string-null? string)
       '()
-      (map lex-output->lexeme-object
-           (let ((result (match-pattern preprocessing-tokens string)))
-             (let ((trailing (substring (peg:string result)
-                                        (peg:end result))))
-               (unless (string-null? trailing)
-                 (scm-error 'cpp-lex-error "lex"
-                            "Failed to lex string, remaining trailing characters: ~s"
-                            (list trailing) #f)))
-             (unless (list? (peg:tree result))
-               (scm-error 'cpp-lex-error "lex"
-                          "Parsing just failed. Chars: ~s"
-                          (list (peg:string result)) #f))
-             (cdr (peg:tree result))))))
+      (append-map! lex-output->lexeme-objects
+                   (let ((result (match-pattern preprocessing-tokens string)))
+                     (let ((trailing (substring (peg:string result)
+                                                (peg:end result))))
+                       (unless (string-null? trailing)
+                         (scm-error 'cpp-lex-error "lex"
+                                    "Failed to lex string, remaining trailing characters: ~s"
+                                    (list trailing) #f)))
+                     (unless (list? (peg:tree result))
+                       (scm-error 'cpp-lex-error "lex"
+                                  "Parsing just failed. Chars: ~s"
+                                  (list (peg:string result)) #f))
+                     (cdr (peg:tree result))))))