1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
|
(define-module (c unlex)
:use-module (hnh util type)
:use-module (ice-9 match)
:use-module (c lex2)
:use-module (c cpp-types)
:use-module (c cpp-util)
:use-module ((texinfo string-utils) :select (escape-special-chars))
:export (unlex
unlex-aggressive
stringify-token
stringify-tokens))
(define (unlex tokens)
(typecheck tokens (list-of lexeme?))
(string-concatenate
(map (lambda (x) (cond (x preprocessing-token? => stringify-token)
((whitespace-token? x) (lexeme-body x))
((other-token? x) (lexeme-body x))))
tokens)))
;; takes a list of preprocessing-token's, and return a "source" string
(define (unlex-aggressive tokens)
(typecheck tokens (list-of lexeme?))
(string-concatenate
(map (lambda (x)
(cond ((preprocessing-token? x) (stringify-token x))
((whitespace-token? x) " ")
((other-token? x) (lexeme-body x))))
(squeeze-whitespace tokens))))
(define (stringify-escape-sequence sub-token)
(match sub-token
(`(simple-escape-sequence ,x)
(format #f "\\~a" x))
(`(octal-escape-sequence ,x)
(format #f "\\~a" x))
(`(hexadecimal-escape-sequence ,x)
(format #f "\\x~a" x))
(`(universal-character-name ,x)
(case (string-length x)
((4) (format #f "\\u~a" x))
((8) (format #f "\\U~a" x))))))
(define (stringify-string-tokens fragments)
(with-output-to-string
(lambda ()
(display #\")
(for-each (match-lambda
(`(escape-sequence ,x)
(display (stringify-escape-sequence x)))
;; Backslash in source strings is usually encoded by an
;; 'escape-sequence, but literal backslashes can be in
;; "regular" string fragments as result of the stringification
;; operator (#).
(s (display (escape-special-chars s "\"\\" #\\))))
fragments)
(display #\"))))
;; Returns the "source" of the token, as a preprocessing string literal token
(define (stringify-token preprocessing-token)
(match (lexeme-body preprocessing-token)
(('string-literal `(encoding-prefix . ,prefix) parts ...)
(stringify-string-tokens parts))
(`(header-name (q-string ,s))
(format #f "~s" s))
(`(header-name (h-string ,s))
(format #f "<~a>" s))
(`(identifier ,id) id)
(`(pp-number ,n) n)
(('character-constant `(character-prefix . ,x) c parts ...)
(with-output-to-string
(lambda ()
(unless (null? x)
(display (car x)))
(display #\')
(match c
(`(escape-sequence (simple-escape-sequence ,x))
(format #t "\\~a" x))
(`(escape-sequence (octal-escape-sequence ,x))
(format #t "\\~a" x))
(`(escape-sequence (octal-escape-sequence ,x))
(format #t "\\x~a" x))
(`(escape-sequence (universal-character-name ,x))
(format #t "\\~a~a"
(case (string-length x)
((4) #\u) ((8) #\U))
x))
(else (display c)))
;; TODO remaining parts
(display #\'))))
(`(punctuator ,p) p)
(_ (scm-error 'cpp-error "stringify-token"
"No matching patterns for: ~s"
(list preprocessing-token) #f))))
;; takes a token list, and return a single string literal token
(define (stringify-tokens tokens)
(lexeme type: 'preprocessing-token
body: `(string-literal (encoding-prefix) ,(unlex-aggressive tokens))))
|