(define-module (c unlex) :use-module (hnh util type) :use-module (ice-9 match) :use-module (c lex2) :use-module (c cpp-types) :use-module (c cpp-util) :use-module ((texinfo string-utils) :select (escape-special-chars)) :export (unlex unlex-aggressive stringify-token stringify-tokens)) (define (unlex tokens) (typecheck tokens (list-of lexeme?)) (string-concatenate (map (lambda (x) (cond (x preprocessing-token? => stringify-token) ((whitespace-token? x) (lexeme-body x)) ((other-token? x) (lexeme-body x)))) tokens))) ;; takes a list of preprocessing-token's, and return a "source" string (define (unlex-aggressive tokens) (typecheck tokens (list-of lexeme?)) (string-concatenate (map (lambda (x) (cond ((preprocessing-token? x) (stringify-token x)) ((whitespace-token? x) " ") ((other-token? x) (lexeme-body x)))) (squeeze-whitespace tokens)))) (define (stringify-escape-sequence sub-token) (match sub-token (`(simple-escape-sequence ,x) (format #f "\\~a" x)) (`(octal-escape-sequence ,x) (format #f "\\~a" x)) (`(hexadecimal-escape-sequence ,x) (format #f "\\x~a" x)) (`(universal-character-name ,x) (case (string-length x) ((4) (format #f "\\u~a" x)) ((8) (format #f "\\U~a" x)))))) (define (stringify-string-tokens fragments) (with-output-to-string (lambda () (display #\") (for-each (match-lambda (`(escape-sequence ,x) (display (stringify-escape-sequence x))) ;; Backslash in source strings is usually encoded by an ;; 'escape-sequence, but literal backslashes can be in ;; "regular" string fragments as result of the stringification ;; operator (#). (s (display (escape-special-chars s "\"\\" #\\)))) fragments) (display #\")))) ;; Returns the "source" of the token, as a preprocessing string literal token (define (stringify-token preprocessing-token) (match (lexeme-body preprocessing-token) (('string-literal `(encoding-prefix ,prefix) parts ...) (stringify-string-tokens parts)) (('string-literal parts ...) (stringify-string-tokens parts)) (`(header-name (q-string ,s)) (format #f "~s" s)) (`(header-name (h-string ,s)) (format #f "<~a>" s)) (`(identifier ,id) id) (`(pp-number ,n) n) (`(character-constant ,c) (format #f "'~a'" c)) (`(punctuator ,p) p))) ;; takes a token list, and return a single string literal token (define (stringify-tokens tokens) (lexeme type: 'preprocessing-token body: `(string-literal ,(unlex-aggressive tokens))))