module/c/cpp-util.scm


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111

(define-module (c cpp-util)
  :use-module ((srfi srfi-1) :select (drop-while break))
  :use-module (srfi srfi-71)
  :use-module ((hnh util) :select (->))
  :use-module (hnh util type)
  :use-module ((hnh util lens) :select (modify ref))
  :use-module ((c lex2) :select (lex lexeme?))
  :use-module ((c unlex) :select (unlex))
  :use-module (c cpp-types)
  :export (tokens-until-eol
           tokens-until-cpp-directive
           next-token-matches?
           squeeze-whitespace
           drop-whitespace
           drop-whitespace-right
           drop-whitespace-both
           cleanup-whitespace
           concatenate-tokens))


;; Does the next non-whitespace token in the stream satisfy the predicate?
(define (next-token-matches? predicate tokens)
  (let ((tokens (drop-whitespace tokens)))
    (if (null? tokens)
        #f
        (predicate (car tokens)))))

(define (next-token-matches/line? predicate tokens)
  (let ((tokens (drop-whitespace/line tokens)))
    (if (null? tokens)
        #f
        (predicate (car tokens)))))

;; Returns two values:
;; - tokens until a newline token is met
;; - (potentially the newline token) and the remaining tokens
(define (tokens-until-eol tokens)
  ;; (typecheck tokens (list-of lexeme?))
  (break newline-token? tokens))

;; call predicate with the remaining token stream, until we run out of token, or
;; predicate matches
(define (break-lexemes predicate lex-list)
  (let loop ((rem lex-list) (done '()))
    (cond ((null? rem)     (values (reverse done) '()))
          ((predicate rem) (values (reverse done) rem))
          (else (loop (cdr rem) (cons (car rem) done))))))

;; Finds the next instance of "\n#" (possibly with inbetween whitespace)
;; and return the values before and after (inclusive)
(define (tokens-until-cpp-directive tokens)
  (break-lexemes
   (lambda (tokens)
     (and (newline-token? (car tokens))
          (next-token-matches/line?
           (lambda (token) (equal? "#" (punctuator-token? token)))
           (cdr tokens))))
   tokens))

;; Replace all whitespace with single spaces.
(define (squeeze-whitespace tokens)
  (cond ((null? tokens) '())
        ((null? (cdr tokens))
         (list
          (if (whitespace-token? (car tokens))
              (car (lex " "))
              (car tokens))))
        ((and (whitespace-token? (car tokens))
              (whitespace-token? (cadr tokens)))
         (squeeze-whitespace (cons (car (lex " "))
                                   (cddr tokens))))
        (else (cons (car tokens)
                    (squeeze-whitespace (cdr tokens))))))

;; Drop leading whitespace tokens
(define (drop-whitespace tokens)
  ;; (typecheck tokens (list-of lexeme?))
  (drop-while whitespace-token? tokens))

(define (drop-whitespace/line tokens)
  ;; (typecheck tokens (list-of lexeme?))
  (drop-while (lambda (t)
                (and (whitespace-token? t)
                     (not (newline-token? t))))
              tokens))

(define (drop-whitespace-right tokens)
  ;; (typecheck tokens (list-of lexeme?))
  (-> tokens reverse drop-whitespace reverse))

(define (drop-whitespace-both tokens)
  ;; (typecheck tokens (list-of lexeme?))
  (-> tokens
      drop-whitespace
      drop-whitespace-right))

;; helper procedure to parse-parameter-list.
;; If a parameter is complex then whitespace is kept, but squeezed to single spaces. Surounding whitespace is removed.
;; Example:
;; #define str(x, y) #y
;; str(x,   (   2,     4    )     )
;; expands to:
;; "( 2, 4 )"
;; 6.10.3.2 p 2
(define (cleanup-whitespace tokens)
  ;; (typecheck tokens (list-of lexeme?))
  (-> tokens drop-whitespace-both squeeze-whitespace))

(define (concatenate-tokens a b)
  (car (lex (string-append (unlex (list a))
                           (unlex (list b))))))