From 7b78cf68200118fac395592da3d78b22b4be0cd7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Hugo=20H=C3=B6rnquist?= Date: Wed, 13 Jul 2022 11:35:39 +0200 Subject: Add support for "other" in preprocessing-tokens. --- module/c/cpp-types.scm | 7 ++++++- module/c/lex2.scm | 16 ++++++++++++++-- module/c/unlex.scm | 6 ++++-- 3 files changed, 24 insertions(+), 5 deletions(-) (limited to 'module/c') diff --git a/module/c/cpp-types.scm b/module/c/cpp-types.scm index 1df70594..82ebb922 100644 --- a/module/c/cpp-types.scm +++ b/module/c/cpp-types.scm @@ -5,8 +5,9 @@ :export (whitespace-token? comment-token? preprocessing-token? - newline-token? + other-token? placemaker-token? + newline-token? identifier-token? punctuator-token? number-token? @@ -27,6 +28,10 @@ (and (lexeme? x) (eq? 'preprocessing-token (lexeme-type x)))) +(define (other-token? x) + (and (lexeme? x) + (eq? 'other (lexeme-type x)))) + (define (placemaker-token? x) (and (lexeme? x) (eq? 'placemaker (lexeme-type x)))) diff --git a/module/c/lex2.scm b/module/c/lex2.scm index fcddcdc4..72f79f55 100644 --- a/module/c/lex2.scm +++ b/module/c/lex2.scm @@ -325,15 +325,25 @@ (define-peg-pattern comment all (or line-comment block-comment)) +(define-peg-pattern non-whitespace all + (and (not-followed-by whitespace) + peg-any)) + (define-peg-pattern preprocessing-tokens all (* (or whitespace comment - preprocessing-token))) + preprocessing-token + non-whitespace))) +;; comment could be merged with whitespace, but then unlex would have to know that + +;; other is the "each non-white-space character that cannot be one of the above" +;; clause from 6.4 p. 1 + (define-type (lexeme) - (type type: (memv '(whitespace comment preprocessing-token placemaker))) + (type type: (memv '(whitespace comment preprocessing-token other placemaker))) (body type: (or string? list?)) (noexpand type: (list-of string?) default: '())) @@ -343,6 +353,8 @@ (define (lex-output->lexeme-object x) (match x + (`(non-whitespace ,body) + (lexeme body: body type: 'other)) (`(whitespace ,body) (lexeme body: body type: 'whitespace )) (`(comment ,body) diff --git a/module/c/unlex.scm b/module/c/unlex.scm index e3d36f86..e714816d 100644 --- a/module/c/unlex.scm +++ b/module/c/unlex.scm @@ -13,7 +13,8 @@ (typecheck tokens (list-of lexeme?)) (string-concatenate (map (lambda (x) (cond (x preprocessing-token? => stringify-token) - ((whitespace-token? x) (lexeme-body x)))) + ((whitespace-token? x) (lexeme-body x)) + ((other-token? x) (lexeme-body x)))) tokens))) ;; takes a list of preprocessing-token's, and return a "source" string @@ -22,7 +23,8 @@ (string-concatenate (map (lambda (x) (cond ((preprocessing-token? x) (stringify-token x)) - ((whitespace-token? x) " "))) + ((whitespace-token? x) " ") + ((other-token? x) (lexeme-body x)))) (squeeze-whitespace tokens)))) (define (stringify-escape-sequence sub-token) -- cgit v1.2.3