aboutsummaryrefslogtreecommitdiff
path: root/module/c/cpp-util.scm
blob: 9674317b475668dba682989977a7a3fd8f68e9c9 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
(define-module (c cpp-util)
  :use-module ((srfi srfi-1) :select (drop-while break))
  :use-module (srfi srfi-71)
  :use-module ((hnh util) :select (->))
  :use-module (hnh util type)
  :use-module ((hnh util lens) :select (modify ref))
  :use-module ((c lex2) :select (lex lexeme?))
  :use-module ((c unlex) :select (unlex))
  :use-module (c cpp-types)
  :export (tokens-until-eol
           tokens-until-cpp-directive
           next-token-matches?
           squeeze-whitespace
           drop-whitespace
           drop-whitespace-right
           drop-whitespace-both
           cleanup-whitespace
           concatenate-tokens
           merge-string-literals))


;; Does the next non-whitespace token in the stream satisfy the predicate?
(define (next-token-matches? predicate tokens)
  (let ((tokens (drop-whitespace tokens)))
    (if (null? tokens)
        #f
        (predicate (car tokens)))))

(define (next-token-matches/line? predicate tokens)
  (let ((tokens (drop-whitespace/line tokens)))
    (if (null? tokens)
        #f
        (predicate (car tokens)))))

;; Returns two values:
;; - tokens until a newline token is met
;; - (potentially the newline token) and the remaining tokens
(define (tokens-until-eol tokens)
  ;; (typecheck tokens (list-of lexeme?))
  (break newline-token? tokens))

;; call predicate with the remaining token stream, until we run out of token, or
;; predicate matches
(define (break-lexemes predicate lex-list)
  (let loop ((rem lex-list) (done '()))
    (cond ((null? rem)     (values (reverse done) '()))
          ((predicate rem) (values (reverse done) rem))
          (else (loop (cdr rem) (cons (car rem) done))))))

;; Finds the next instance of "\n#" (possibly with inbetween whitespace)
;; and return the values before and after (inclusive)
(define (tokens-until-cpp-directive tokens)
  (break-lexemes
   (lambda (tokens)
     (and (newline-token? (car tokens))
          (next-token-matches/line?
           (lambda (token) (equal? "#" (punctuator-token? token)))
           (cdr tokens))))
   tokens))

;; Replace all whitespace with single spaces.
(define (squeeze-whitespace tokens)
  (cond ((null? tokens) '())
        ((null? (cdr tokens))
         (list
          (if (whitespace-token? (car tokens))
              (car (lex " "))
              (car tokens))))
        ((and (whitespace-token? (car tokens))
              (whitespace-token? (cadr tokens)))
         (squeeze-whitespace (cons (car (lex " "))
                                   (cddr tokens))))
        (else (cons (car tokens)
                    (squeeze-whitespace (cdr tokens))))))

;; Drop leading whitespace tokens
(define (drop-whitespace tokens)
  ;; (typecheck tokens (list-of lexeme?))
  (drop-while whitespace-token? tokens))

(define (drop-whitespace/line tokens)
  ;; (typecheck tokens (list-of lexeme?))
  (drop-while (lambda (t)
                (and (whitespace-token? t)
                     (not (newline-token? t))))
              tokens))

(define (drop-whitespace-right tokens)
  ;; (typecheck tokens (list-of lexeme?))
  (-> tokens reverse drop-whitespace reverse))

(define (drop-whitespace-both tokens)
  ;; (typecheck tokens (list-of lexeme?))
  (-> tokens
      drop-whitespace
      drop-whitespace-right))

;; helper procedure to parse-parameter-list.
;; If a parameter is complex then whitespace is kept, but squeezed to single spaces. Surounding whitespace is removed.
;; Example:
;; #define str(x, y) #y
;; str(x,   (   2,     4    )     )
;; expands to:
;; "( 2, 4 )"
;; 6.10.3.2 p 2
(define (cleanup-whitespace tokens)
  ;; (typecheck tokens (list-of lexeme?))
  (-> tokens drop-whitespace-both squeeze-whitespace))

(define (concatenate-tokens a b)
  (car (lex (string-append (unlex (list a))
                           (unlex (list b))))))


(define (merge-string-literals tokens)
  (cond ((null? tokens) '())
        ((null? (cdr tokens)) tokens)
        ((string-token? (car tokens))
         (lambda (a . _) a)
         => (lambda (prefix-a . parts-a)
              (cond ((string-token? (cadr tokens))
                     (lambda (a . _) a)
                     => (lambda (prefix-b . parts-b)
                          (merge-string-literals
                           ;; TODO check validity of prefixes
                           (cons (make-string-literal (cons prefix-a (append parts-a parts-b)))
                                 (cddr tokens)))))
                    (else (cons (car tokens)
                                (merge-string-literals (cdr tokens)))))))
        (else (cons (car tokens) (merge-string-literals (cdr tokens))))))