From 37dc6cc5ab804da964f22787561d030898115809 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Hugo=20H=C3=B6rnquist?= Date: Wed, 20 Jul 2022 16:25:35 +0200 Subject: Acknowledge string prefixes. --- module/c/cpp-util.scm | 10 ++++++---- module/c/lex2.scm | 26 ++++++++++++++++++++------ module/c/preprocessor2.scm | 20 +++++++++++++------- module/c/unlex.scm | 15 ++++++++++----- 4 files changed, 49 insertions(+), 22 deletions(-) (limited to 'module') diff --git a/module/c/cpp-util.scm b/module/c/cpp-util.scm index 633c5a0c..9674317b 100644 --- a/module/c/cpp-util.scm +++ b/module/c/cpp-util.scm @@ -117,12 +117,14 @@ ((null? (cdr tokens)) tokens) ((string-token? (car tokens)) (lambda (a . _) a) - => (lambda parts-a + => (lambda (prefix-a . parts-a) (cond ((string-token? (cadr tokens)) (lambda (a . _) a) - => (lambda parts-b (merge-string-literals - (cons (make-string-literal (append parts-a parts-b)) - (cddr tokens))))) + => (lambda (prefix-b . parts-b) + (merge-string-literals + ;; TODO check validity of prefixes + (cons (make-string-literal (cons prefix-a (append parts-a parts-b))) + (cddr tokens))))) (else (cons (car tokens) (merge-string-literals (cdr tokens))))))) (else (cons (car tokens) (merge-string-literals (cdr tokens)))))) diff --git a/module/c/lex2.scm b/module/c/lex2.scm index c09f6423..85c9be19 100644 --- a/module/c/lex2.scm +++ b/module/c/lex2.scm @@ -374,12 +374,26 @@ (`(comment ,body) (lexeme body: body type: 'comment )) (`(preprocessing-token ,body) - (case body - ;; "unflatten" - ((string-literal) - (lexeme body: '(string-literal "") type: 'preprocessing-token)) - (else - (lexeme body: body type: 'preprocessing-token)))) + (match body + ('string-literal + ;; Unflatten case + (lexeme body: '(string-literal (encoding-prefix) "") + type: 'preprocessing-token)) + (('string-literal `(encoding-prefix ,px) args ...) + (lexeme body: `(string-literal (encoding-prefix . ,px) ,@args) + type: 'preprocessing-token)) + (('string-literal args ...) + (lexeme body: `(string-literal (encoding-prefix) ,@args) + type: 'preprocessing-token)) + (('character-constant `(character-prefix ,px) args ...) + (lexeme body: `(character-constant (character-prefix . ,px) + ,@args) + type: 'preprocessing-token)) + (('character-constant args ...) + (lexeme body: `(character-constant (character-prefix) ,@args) + type: 'preprocessing-token)) + (body (lexeme body: body type: 'preprocessing-token)))) + ;; "unflatten" ('comment (lexeme body: "" type: 'comment)))) diff --git a/module/c/preprocessor2.scm b/module/c/preprocessor2.scm index c6be3936..3f9552c5 100644 --- a/module/c/preprocessor2.scm +++ b/module/c/preprocessor2.scm @@ -397,11 +397,15 @@ body: (lambda (environment arguments) (typecheck arguments (and (list-of (list-of lexeme?)) (not null?))) - (aif (string-token? (caar arguments)) - (values (handle-pragma environment (lex it)) '()) - (scm-error 'cpp-pragma-error "_Pragma" - "Invalid argument to _Pragma: ~s" - (list (car arguments)) #f))))) + (cond ((string-token? (caar arguments)) + (lambda (a . _) a) + ;; TODO handle rest + => (lambda (encoding it . rest) + (values (handle-pragma environment (lex it)) + '()))) + (else (scm-error 'cpp-pragma-error "_Pragma" + "Invalid argument to _Pragma: ~s" + (list (car arguments)) #f)))))) @@ -591,10 +595,12 @@ (remaining (drop-whitespace (cdr tokens)))) (cond ((null? remaining) (set environment current-line (1- line))) ((string-token? (car remaining)) - => (lambda (file) + (lambda (a . _) a) + => (lambda (encoding . fragments) (-> environment (set current-line (1- line)) - (set current-file file)))) + ;; TODO properly join string + (set current-file (car fragments))))) ;; no newlines in #line (%first-time (loop #f ((unval resolve-token-stream 1) environment tokens))) (else (err)))))) diff --git a/module/c/unlex.scm b/module/c/unlex.scm index 1f8ba0a1..e467a50f 100644 --- a/module/c/unlex.scm +++ b/module/c/unlex.scm @@ -59,21 +59,26 @@ ;; Returns the "source" of the token, as a preprocessing string literal token (define (stringify-token preprocessing-token) (match (lexeme-body preprocessing-token) - (('string-literal `(encoding-prefix ,prefix) parts ...) - (stringify-string-tokens parts)) - (('string-literal parts ...) + (('string-literal `(encoding-prefix . ,prefix) parts ...) (stringify-string-tokens parts)) + (`(header-name (q-string ,s)) (format #f "~s" s)) + (`(header-name (h-string ,s)) (format #f "<~a>" s)) + (`(identifier ,id) id) + (`(pp-number ,n) n) - (`(character-constant ,c) + + ;; TODO remaining parts + (('character-constant `(character-encoding . ,x) c parts ...) (format #f "'~a'" c)) + (`(punctuator ,p) p))) ;; takes a token list, and return a single string literal token (define (stringify-tokens tokens) (lexeme type: 'preprocessing-token - body: `(string-literal ,(unlex-aggressive tokens)))) + body: `(string-literal (encoding-prefix) ,(unlex-aggressive tokens)))) -- cgit v1.2.3