blob: 3ea06505385c23f4b02bf2728f2afbc1503a35e1 (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
|
(define-module (c cpp-util)
:use-module ((srfi srfi-1) :select (drop-while break))
:use-module (srfi srfi-71)
:use-module ((hnh util) :select (->))
:use-module (hnh util type)
:use-module ((hnh util lens) :select (modify ref))
:use-module ((c lex2) :select (lex lexeme?))
:use-module ((c unlex) :select (unlex))
:use-module (c cpp-types)
:export (tokens-until-eol
tokens-until-cpp-directive
next-token-matches?
squeeze-whitespace
drop-whitespace
drop-whitespace-right
drop-whitespace-both
cleanup-whitespace
concatenate-tokens))
;; Does the next non-whitespace token in the stream satisfy the predicate?
(define (next-token-matches? predicate tokens)
(let ((tokens (drop-whitespace tokens)))
(if (null? tokens)
#f
(predicate (car tokens)))))
(define (next-token-matches/line? predicate tokens)
(let ((tokens (drop-whitespace/line tokens)))
(if (null? tokens)
#f
(predicate (car tokens)))))
;; Returns two values:
;; - tokens until a newline token is met
;; - (potentially the newline token) and the remaining tokens
(define (tokens-until-eol tokens)
;; (typecheck tokens (list-of lexeme?))
(break newline-token? tokens))
;; call predicate with the remaining token stream, until we run out of token, or
;; predicate matches
(define (break-lexemes predicate lex-list)
(let loop ((rem lex-list) (done '()))
(cond ((null? rem) (values (reverse done) '()))
((predicate rem) (values (reverse done) rem))
(else (loop (cdr rem) (cons (car rem) done))))))
;; Finds the next instance of "\n#" (possibly with inbetween whitespace)
;; and return the values before and after (inclusive)
(define (tokens-until-cpp-directive tokens)
(break-lexemes
(lambda (tokens)
(and (newline-token? (car tokens))
(next-token-matches/line?
(lambda (token) (equal? "#" (punctuator-token? token)))
(cdr tokens))))
tokens))
;; Replace all whitespace with single spaces.
(define (squeeze-whitespace tokens)
(cond ((null? tokens) '())
((null? (cdr tokens))
(list
(if (whitespace-token? (car tokens))
(car (lex " "))
(car tokens))))
((and (whitespace-token? (car tokens))
(whitespace-token? (cadr tokens)))
(squeeze-whitespace (cons (car (lex " "))
(cddr tokens))))
(else (cons (car tokens)
(squeeze-whitespace (cdr tokens))))))
;; Drop leading whitespace tokens
(define (drop-whitespace tokens)
;; (typecheck tokens (list-of lexeme?))
(drop-while whitespace-token? tokens))
(define (drop-whitespace/line tokens)
;; (typecheck tokens (list-of lexeme?))
(drop-while (lambda (t)
(and (whitespace-token? t)
(not (newline-token? t))))
tokens))
(define (drop-whitespace-right tokens)
;; (typecheck tokens (list-of lexeme?))
(-> tokens reverse drop-whitespace reverse))
(define (drop-whitespace-both tokens)
;; (typecheck tokens (list-of lexeme?))
(-> tokens
drop-whitespace
drop-whitespace-right))
;; helper procedure to parse-parameter-list.
;; If a parameter is complex then whitespace is kept, but squeezed to single spaces. Surounding whitespace is removed.
;; Example:
;; #define str(x, y) #y
;; str(x, ( 2, 4 ) )
;; expands to:
;; "( 2, 4 )"
;; 6.10.3.2 p 2
(define (cleanup-whitespace tokens)
;; (typecheck tokens (list-of lexeme?))
(-> tokens drop-whitespace-both squeeze-whitespace))
(define (concatenate-tokens a b)
(car (lex (string-append (unlex (list a))
(unlex (list b))))))
|