1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
|
(define-module (c unlex)
:use-module (hnh util type)
:use-module (ice-9 match)
:use-module (c lex2)
:use-module (c cpp-types)
:use-module (c cpp-util)
:use-module ((texinfo string-utils) :select (escape-special-chars))
:export (unlex
unlex-aggressive
stringify-token
stringify-tokens))
(define (unlex tokens)
(typecheck tokens (list-of lexeme?))
(string-concatenate
(map (lambda (x) (cond (x preprocessing-token? => stringify-token)
((whitespace-token? x) (lexeme-body x))
((other-token? x) (lexeme-body x))))
tokens)))
;; takes a list of preprocessing-token's, and return a "source" string
(define (unlex-aggressive tokens)
(typecheck tokens (list-of lexeme?))
(string-concatenate
(map (lambda (x)
(cond ((preprocessing-token? x) (stringify-token x))
((whitespace-token? x) " ")
((other-token? x) (lexeme-body x))))
(squeeze-whitespace tokens))))
(define (stringify-escape-sequence sub-token)
(match sub-token
(`(simple-escape-sequence ,x)
(format #f "\\~a" x))
(`(octal-escape-sequence ,x)
(format #f "\\~a" x))
(`(hexadecimal-escape-sequence ,x)
(format #f "\\x~a" x))
(`(universal-character-name ,x)
(case (string-length x)
((4) (format #f "\\u~a" x))
((8) (format #f "\\U~a" x))))))
(define (stringify-string-tokens fragments)
(with-output-to-string
(lambda ()
(display #\")
(for-each (match-lambda
(`(escape-sequence ,x)
(display (stringify-escape-sequence x)))
;; Backslash in source strings is usually encoded by an
;; 'escape-sequence, but literal backslashes can be in
;; "regular" string fragments as result of the stringification
;; operator (#).
(s (display (escape-special-chars s "\"\\" #\\))))
fragments)
(display #\"))))
;; Returns the "source" of the token, as a preprocessing string literal token
(define (stringify-token preprocessing-token)
(match (lexeme-body preprocessing-token)
(('string-literal `(encoding-prefix ,prefix) parts ...)
(stringify-string-tokens parts))
(('string-literal parts ...)
(stringify-string-tokens parts))
(`(header-name (q-string ,s))
(format #f "~s" s))
(`(header-name (h-string ,s))
(format #f "<~a>" s))
(`(identifier ,id) id)
(`(pp-number ,n) n)
(`(character-constant ,c)
(format #f "'~a'" c))
(`(punctuator ,p) p)))
;; takes a token list, and return a single string literal token
(define (stringify-tokens tokens)
(lexeme type: 'preprocessing-token
body: `(string-literal ,(unlex-aggressive tokens))))
|