(define-module (c cpp-util) :use-module ((srfi srfi-1) :select (drop-while break)) :use-module (srfi srfi-71) :use-module ((hnh util) :select (->)) :use-module (hnh util type) :use-module ((hnh util lens) :select (modify ref)) :use-module ((c lex2) :select (lex lexeme?)) :use-module ((c unlex) :select (unlex)) :use-module (c cpp-types) :export (tokens-until-eol tokens-until-cpp-directive next-token-matches? squeeze-whitespace drop-whitespace drop-whitespace-right drop-whitespace-both cleanup-whitespace concatenate-tokens)) ;; Does the next non-whitespace token in the stream satisfy the predicate? (define (next-token-matches? predicate tokens) (let ((tokens (drop-whitespace tokens))) (if (null? tokens) #f (predicate (car tokens))))) (define (next-token-matches/line? predicate tokens) (let ((tokens (drop-whitespace/line tokens))) (if (null? tokens) #f (predicate (car tokens))))) ;; Returns two values: ;; - tokens until a newline token is met ;; - (potentially the newline token) and the remaining tokens (define (tokens-until-eol tokens) ;; (typecheck tokens (list-of lexeme?)) (break newline-token? tokens)) ;; call predicate with the remaining token stream, until we run out of token, or ;; predicate matches (define (break-lexemes predicate lex-list) (let loop ((rem lex-list) (done '())) (cond ((null? rem) (values (reverse done) '())) ((predicate rem) (values (reverse done) rem)) (else (loop (cdr rem) (cons (car rem) done)))))) ;; Finds the next instance of "\n#" (possibly with inbetween whitespace) ;; and return the values before and after (inclusive) (define (tokens-until-cpp-directive tokens) (break-lexemes (lambda (tokens) (and (newline-token? (car tokens)) (next-token-matches/line? (lambda (token) (equal? "#" (punctuator-token? token))) (cdr tokens)))) tokens)) ;; Replace all whitespace with single spaces. (define (squeeze-whitespace tokens) (cond ((null? tokens) '()) ((null? (cdr tokens)) (list (if (whitespace-token? (car tokens)) (car (lex " ")) (car tokens)))) ((and (whitespace-token? (car tokens)) (whitespace-token? (cadr tokens))) (squeeze-whitespace (cons (car (lex " ")) (cddr tokens)))) (else (cons (car tokens) (squeeze-whitespace (cdr tokens)))))) ;; Drop leading whitespace tokens (define (drop-whitespace tokens) ;; (typecheck tokens (list-of lexeme?)) (drop-while whitespace-token? tokens)) (define (drop-whitespace/line tokens) ;; (typecheck tokens (list-of lexeme?)) (drop-while (lambda (t) (and (whitespace-token? t) (not (newline-token? t)))) tokens)) (define (drop-whitespace-right tokens) ;; (typecheck tokens (list-of lexeme?)) (-> tokens reverse drop-whitespace reverse)) (define (drop-whitespace-both tokens) ;; (typecheck tokens (list-of lexeme?)) (-> tokens drop-whitespace drop-whitespace-right)) ;; helper procedure to parse-parameter-list. ;; If a parameter is complex then whitespace is kept, but squeezed to single spaces. Surounding whitespace is removed. ;; Example: ;; #define str(x, y) #y ;; str(x, ( 2, 4 ) ) ;; expands to: ;; "( 2, 4 )" ;; 6.10.3.2 p 2 (define (cleanup-whitespace tokens) ;; (typecheck tokens (list-of lexeme?)) (-> tokens drop-whitespace-both squeeze-whitespace)) (define (concatenate-tokens a b) (car (lex (string-append (unlex (list a)) (unlex (list b))))))