aboutsummaryrefslogtreecommitdiff
path: root/module/c/lex2.scm
diff options
context:
space:
mode:
Diffstat (limited to 'module/c/lex2.scm')
-rw-r--r--module/c/lex2.scm111
1 files changed, 66 insertions, 45 deletions
diff --git a/module/c/lex2.scm b/module/c/lex2.scm
index af90dcce..6c13f0f9 100644
--- a/module/c/lex2.scm
+++ b/module/c/lex2.scm
@@ -4,7 +4,7 @@
:use-module ((hnh util) :select (->))
:use-module (hnh util object)
:use-module (hnh util type)
- :use-module ((srfi srfi-1) :select (fold))
+ :use-module ((srfi srfi-1) :select (fold append-map!))
:use-module (srfi srfi-88)
:use-module ((c trigraph) :select (replace-trigraphs))
:use-module ((c line-fold) :select (fold-lines))
@@ -37,7 +37,8 @@
;; string literal moved before header-name since string literals
;; otherwise became q-strings
(or string-literal
- header-name
+ ;; header-name
+ h-string-compound
character-constant
identifier
pp-number
@@ -288,25 +289,40 @@
;;; A.1.8 Header names
+(define-peg-pattern h-string-leadup none
+ (and "#" (* inline-whitespace)
+ "include" (* inline-whitespace)
+ "<"))
+
+;; Weird case which handles that '<' and '>' change semantics when after #include
+;; 6.4.7 p. 4
+;; NOTE this is techincally incorrect, since this form is only valid at start of line.
+;; Consider the case
+;; #define s(x) #x
+;; s(#include <test>)
+;; That can't however easily be added since PEG has no start-of-line operator
+(define-peg-pattern h-string-compound all
+ (and h-string-leadup h-string (ignore ">")))
+
(define-peg-pattern h-string all (+ h-char))
-(define-peg-pattern q-string all (+ q-char))
+;; (define-peg-pattern q-string all (+ q-char))
-;; (6.4.7)
-(define-peg-pattern header-name all
- (or (and (ignore "<") h-string (ignore ">"))
- ;; NOTE this case will never be reached, since it's treated as a regular
- ;; string instead
- (and (ignore "\"") q-string (ignore "\""))))
+;; ;; (6.4.7)
+;; (define-peg-pattern header-name all
+;; (or (and (ignore "<") h-string (ignore ">"))
+;; ;; NOTE this case will never be reached, since it's treated as a regular
+;; ;; string instead
+;; (and (ignore "\"") q-string (ignore "\""))))
;; (6.4.7)
(define-peg-pattern h-char body
(or (and (not-followed-by (or ">" "\n")) peg-any)
escape-sequence))
-;; (6.4.7)
-(define-peg-pattern q-char body
- (or (and (not-followed-by (or "\"" "\n")) peg-any)
- escape-sequence))
+;; ;; (6.4.7)
+;; (define-peg-pattern q-char body
+;; (or (and (not-followed-by (or "\"" "\n")) peg-any)
+;; escape-sequence))
;;; A.1.9 Preprocessing numbers
@@ -321,10 +337,11 @@
+(define-peg-pattern inline-whitespace body
+ (or "\t" "\v" "\f" " "))
+
(define-peg-pattern whitespace all
- (or "\t" "\n" "\v" "\f" " "
- ;; "\r"
- ))
+ (or "\n" inline-whitespace))
(define-peg-pattern block-comment body
(and (ignore "/*")
@@ -366,34 +383,38 @@
(define (placemaker)
(lexeme type: 'placemaker body: '()))
-(define (lex-output->lexeme-object x)
+(define (lex-output->lexeme-objects x)
(match x
(`(non-whitespace ,body)
- (lexeme body: body type: 'other))
+ (list (lexeme body: body type: 'other)))
(`(whitespace ,body)
- (lexeme body: body type: 'whitespace ))
+ (list (lexeme body: body type: 'whitespace)))
(`(comment ,body)
- (lexeme body: body type: 'comment ))
+ (list (lexeme body: body type: 'comment)))
(`(preprocessing-token ,body)
(match body
('string-literal
;; Unflatten case
- (lexeme body: '(string-literal (encoding-prefix) "")
- type: 'preprocessing-token))
+ (list (lexeme body: '(string-literal (encoding-prefix) "")
+ type: 'preprocessing-token)))
(('string-literal `(encoding-prefix ,px) args ...)
- (lexeme body: `(string-literal (encoding-prefix . ,px) ,@args)
- type: 'preprocessing-token))
+ (list (lexeme body: `(string-literal (encoding-prefix . ,px) ,@args)
+ type: 'preprocessing-token)))
(('string-literal args ...)
- (lexeme body: `(string-literal (encoding-prefix) ,@args)
- type: 'preprocessing-token))
+ (list (lexeme body: `(string-literal (encoding-prefix) ,@args)
+ type: 'preprocessing-token)))
(('character-constant `(character-prefix ,px) args ...)
- (lexeme body: `(character-constant (character-prefix . ,px)
- ,@args)
- type: 'preprocessing-token))
+ (list (lexeme body: `(character-constant (character-prefix . ,px)
+ ,@args)
+ type: 'preprocessing-token)))
(('character-constant args ...)
- (lexeme body: `(character-constant (character-prefix) ,@args)
- type: 'preprocessing-token))
- (body (lexeme body: body type: 'preprocessing-token))))
+ (list (lexeme body: `(character-constant (character-prefix) ,@args)
+ type: 'preprocessing-token)))
+ (`(h-string-compound ,body)
+ (list (lexeme type: 'preprocessing-token body: '(punctuator "#"))
+ (lexeme type: 'preprocessing-token body: '(identifier "include"))
+ (lexeme type: 'preprocessing-token body: body)))
+ (body (list (lexeme body: body type: 'preprocessing-token)))))
;; "unflatten"
('comment (lexeme body: "" type: 'comment))))
@@ -409,19 +430,19 @@
(define (lex string)
(if (string-null? string)
'()
- (map lex-output->lexeme-object
- (let ((result (match-pattern preprocessing-tokens string)))
- (let ((trailing (substring (peg:string result)
- (peg:end result))))
- (unless (string-null? trailing)
- (scm-error 'cpp-lex-error "lex"
- "Failed to lex string, remaining trailing characters: ~s"
- (list trailing) #f)))
- (unless (list? (peg:tree result))
- (scm-error 'cpp-lex-error "lex"
- "Parsing just failed. Chars: ~s"
- (list (peg:string result)) #f))
- (cdr (peg:tree result))))))
+ (append-map! lex-output->lexeme-objects
+ (let ((result (match-pattern preprocessing-tokens string)))
+ (let ((trailing (substring (peg:string result)
+ (peg:end result))))
+ (unless (string-null? trailing)
+ (scm-error 'cpp-lex-error "lex"
+ "Failed to lex string, remaining trailing characters: ~s"
+ (list trailing) #f)))
+ (unless (list? (peg:tree result))
+ (scm-error 'cpp-lex-error "lex"
+ "Parsing just failed. Chars: ~s"
+ (list (peg:string result)) #f))
+ (cdr (peg:tree result))))))