diff options
author | Hugo Hörnquist <hugo@lysator.liu.se> | 2022-07-13 04:39:14 +0200 |
---|---|---|
committer | Hugo Hörnquist <hugo@lysator.liu.se> | 2022-07-13 04:39:14 +0200 |
commit | c1cf0693982d9c1f1b871966752140fee5d76d19 (patch) | |
tree | 65cf5c501f5b2029eb08bed7fa9844b9530385f3 /module | |
parent | Resolve # ## # (diff) | |
download | calp-c1cf0693982d9c1f1b871966752140fee5d76d19.tar.gz calp-c1cf0693982d9c1f1b871966752140fee5d76d19.tar.xz |
work
Diffstat (limited to 'module')
-rw-r--r-- | module/c/cpp-types.scm | 1 | ||||
-rw-r--r-- | module/c/cpp-util.scm | 43 | ||||
-rw-r--r-- | module/c/lex2.scm | 13 | ||||
-rw-r--r-- | module/c/preprocessor2.scm | 155 | ||||
-rw-r--r-- | module/c/unlex.scm | 31 |
5 files changed, 171 insertions, 72 deletions
diff --git a/module/c/cpp-types.scm b/module/c/cpp-types.scm index 555120d6..1df70594 100644 --- a/module/c/cpp-types.scm +++ b/module/c/cpp-types.scm @@ -53,6 +53,7 @@ (`(pp-number ,x) x) (_ #f)))) +;; TODO this fails if there are multiple components in the string token (define (string-token? token) (and (preprocessing-token? token) (match (lexeme-body token) diff --git a/module/c/cpp-util.scm b/module/c/cpp-util.scm index fff3cc9e..7969ccd5 100644 --- a/module/c/cpp-util.scm +++ b/module/c/cpp-util.scm @@ -1,11 +1,15 @@ (define-module (c cpp-util) :use-module ((srfi srfi-1) :select (drop-while break)) + :use-module (srfi srfi-71) :use-module ((hnh util) :select (->)) :use-module (hnh util type) + :use-module ((hnh util lens) :select (modify ref)) :use-module ((c lex2) :select (lex lexeme?)) :use-module ((c unlex) :select (unlex)) :use-module (c cpp-types) :export (tokens-until-eol + tokens-until-cpp-directive + next-token-matches? squeeze-whitespace drop-whitespace drop-whitespace-right @@ -13,6 +17,20 @@ cleanup-whitespace concatenate-tokens)) + +;; Does the next non-whitespace token in the stream satisfy the predicate? +(define (next-token-matches? predicate tokens) + (let ((tokens (drop-whitespace tokens))) + (if (null? tokens) + #f + (predicate (car tokens))))) + +(define (next-token-matches/line? predicate tokens) + (let ((tokens (drop-whitespace/line tokens))) + (if (null? tokens) + #f + (predicate (car tokens))))) + ;; Returns two values: ;; - tokens until a newline token is met ;; - (potentially the newline token) and the remaining tokens @@ -20,6 +38,24 @@ (typecheck tokens (list-of lexeme?)) (break newline-token? tokens)) +;; call predicate with the remaining token stream, until we run out of token, or +;; predicate matches +(define (break-lexemes predicate lex-list) + (let loop ((rem lex-list) (done '())) + (cond ((null? rem) (values (reverse done) '())) + ((predicate rem) (values (reverse done) rem)) + (else (loop (cdr rem) (cons (car rem) done)))))) + +;; Finds the next instance of "\n#" (possibly with inbetween whitespace) +;; and return the values before and after (inclusive) +(define (tokens-until-cpp-directive tokens) + (break-lexemes + (lambda (tokens) + (and (newline-token? (car tokens)) + (next-token-matches/line? + (lambda (token) (equal? "#" (punctuator-token? token))) + (cdr tokens)))) + tokens)) ;; Replace all whitespace with single spaces. (define (squeeze-whitespace tokens) @@ -41,6 +77,13 @@ (typecheck tokens (list-of lexeme?)) (drop-while whitespace-token? tokens)) +(define (drop-whitespace/line tokens) + (typecheck tokens (list-of lexeme?)) + (drop-while (lambda (t) + (and (whitespace-token? t) + (not (newline-token? t)))) + tokens)) + (define (drop-whitespace-right tokens) (typecheck tokens (list-of lexeme?)) (-> tokens reverse drop-whitespace reverse)) diff --git a/module/c/lex2.scm b/module/c/lex2.scm index 652aa6c1..fcddcdc4 100644 --- a/module/c/lex2.scm +++ b/module/c/lex2.scm @@ -368,4 +368,15 @@ (if (string-null? string) '() (map lex-output->lexeme-object - (cdr (peg:tree (match-pattern preprocessing-tokens string)))))) + (let ((result (match-pattern preprocessing-tokens string))) + (let ((trailing (substring (peg:string result) + (peg:end result)))) + (unless (string-null? trailing) + (scm-error 'cpp-lex-error "lex" + "Failed to lex string, remaining trailing characters: ~s" + (list trailing) #f))) + (unless (list? (peg:tree result)) + (scm-error 'cpp-lex-error "lex" + "Parsing just failed. Chars: ~s" + (list (peg:string result)) #f)) + (cdr (peg:tree result)))))) diff --git a/module/c/preprocessor2.scm b/module/c/preprocessor2.scm index 4678ded7..d7bf3b64 100644 --- a/module/c/preprocessor2.scm +++ b/module/c/preprocessor2.scm @@ -9,7 +9,7 @@ :select (function-like-macro variadic? identifier-list)) :use-module ((c cpp-environment object-like-macro) :select (object-like-macro object-like-macro?)) :use-module ((c cpp-environment internal-macro) :select (internal-macro)) - :use-module ((hnh util) :select (-> intersperse aif swap unless)) + :use-module ((hnh util) :select (-> intersperse aif swap unless unval)) :use-module ((hnh util lens) :select (set modify cdr*)) :use-module (hnh util path) :use-module (hnh util type) @@ -19,7 +19,8 @@ :use-module (c unlex) :use-module (c cpp-types) :use-module (c cpp-util) - :export (defined-macro)) + :use-module (ice-9 control) + :export (defined-macro _Pragma-macro)) (define-syntax-rule (alist-of variable key-type value-type) (build-validator-body variable (list-of (pair-of key-type value-type)))) @@ -34,6 +35,21 @@ (define (comma-token? token) (equal? "," (punctuator-token? token))) (define (ellipsis-token? token) (equal? "..." (punctuator-token? token))) + +(define-syntax-rule (abort* form) + (call-with-values (lambda () form) abort)) + +(define-syntax-rule (on-fst form) + (% form + (lambda (prompt fst . rest) + (apply values (prompt fst) rest)))) + +(define-syntax-rule (on-snd form) + (% form + (lambda (prompt fst snd . rest) + (apply values fst (prompt snd) rest)))) + + ;; parameters is a lexeme list, as returned by parse-parameter-list (define (build-parameter-map macro parameters) (typecheck macro cpp-macro?) @@ -157,7 +173,9 @@ (if (or (concat-token? last) (next-token-matches? concat-token? tokens)) replacement - (resolve-token-stream environment replacement once?: #t)) + ;; resolve-token-stream only modifies environment by updating current line + ;; that can't happen in a macro body + ((unval resolve-token-stream 1) environment replacement once?: #t)) (loop (cdr tokens) #f)))))) ((whitespace-token? (car tokens)) (cons (car tokens) (loop (cdr tokens) last))) @@ -190,13 +208,6 @@ (let ((name (macro-identifier macro))) (cond ((object-macro? macro) - ;; #define f(a) f(x * (a)) - ;; #define w 0,1 - ;; #define m(a) a(w) - ;; m(f) - ;; ⇒ f(0,1) - ;; instead of expected - ;; f(2 * (0,1)) (values environment (append (fold (swap mark-noexpand) (expand## (macro-body macro)) (cons name noexpand-list)) @@ -211,17 +222,23 @@ (cons name noexpand-list)) remaining))) (values environment + ;; TODO#1 the token shouldn't be expanded here, but it should neither be marked no-expand? + ;; Consider the case + ;; #define m(a) a(0,1) + ;; #define f(a) f(2 * (a)) + ;; m(f) (append (mark-noexpand (lex (macro-identifier macro)) (macro-identifier macro)) remaining-tokens)))) ((internal-macro? macro) (if (next-token-matches? left-parenthesis-token? remaining-tokens) (let ((containing remaining newlines (parse-parameter-list remaining-tokens))) - (values (bump-line environment newlines) - (append (fold (swap mark-noexpand) - ((macro-body macro) environment containing) - (cons name noexpand-list)) - remaining))) + (let ((env* tokens* ((macro-body macro) environment containing))) + (values (bump-line env* newlines) + (append (fold (swap mark-noexpand) + tokens* + (cons name noexpand-list)) + remaining)))) (values environment (append (mark-noexpand (lex (macro-identifier macro)) (macro-identifier macro)) remaining-tokens)))) @@ -256,13 +273,6 @@ -(define (next-token-matches? predicate tokens) - (let ((tokens (drop-whitespace tokens))) - (if (null? tokens) - #f - (predicate (car tokens))))) - - ;; returns three values: ;; - a list of tokens where each is a parameter to the function like macro ;; - the remaining tokenstream @@ -356,17 +366,24 @@ (not null?))) (aif (identifier-token? (car (list-ref arguments 0))) (let ((in-env (boolean->c-boolean (in-environment? environment it)))) - (lex (number->string in-env))) + (values environment (lex (number->string in-env)))) (scm-error 'cpp-error "defined" "Invalid parameter list to `defined': ~s" (list arguments) #f))))) -;; (lex "STDC FP_CONTRACT ON") -;; (#<<lexeme> type: preprocessing-token body: (identifier "STDC") noexpand: ()> -;; #<<lexeme> type: whitespace body: " " noexpand: ()> -;; #<<lexeme> type: preprocessing-token body: (identifier "FP_CONTRACT") noexpand: ()> -;; #<<lexeme> type: whitespace body: " " noexpand: ()> -;; #<<lexeme> type: preprocessing-token body: (identifier "ON") noexpand: ()>) +(define _Pragma-macro + (internal-macro + identifier: "_Pragma" + body: (lambda (environment arguments) + (typecheck arguments (and (list-of (list-of lexeme?)) + (not null?))) + (aif (string-token? (caar arguments)) + (values (handle-pragma environment (lex it)) '()) + (scm-error 'cpp-pragma-error "_Pragma" + "Invalid argument to _Pragma: ~s" + (list (car arguments)) #f))))) + + ;; environment, tokens → environment (define (handle-pragma environment tokens) @@ -395,13 +412,6 @@ environment)))) -;; TODO -;; (define _Pragma-macro -;; (internal-macro -;; identifier: "_Pragma" -;; body: (lambda (environment tokens) -;; ))) - (define (resolve-constant-expression tokens) (typecheck tokens (list-of lexeme?)) 'TODO @@ -424,26 +434,28 @@ ;; Expands a token-stream which contains no pre-processing directives (#if:s, ...) ;; If @var{once?} is true then the resulting body won't be scanned again for tokens to expand +;; environment, tokens, [boolean] → environment, tokens (define* (resolve-token-stream environment tokens key: once?) (typecheck environment cpp-environment?) (typecheck tokens (list-of lexeme?)) ;; (pprint-environment environment) ;; (format (current-error-port) "~a~%~%" (unlex tokens)) (let loop ((environment environment) (tokens tokens)) - (cond ((null? tokens) '()) - ((car tokens) - (lambda (x) (and (identifier-token? x) - (not (marked-noexpand? x)))) - => (lambda (token) - (call-with-values - (lambda () (maybe-extend-identifier environment - (identifier-token? token) - (lexeme-noexpand token) - (cdr tokens))) - ;; Here is the after expansion - (if once? (lambda (_ t) t) loop)))) - (else (cons (car tokens) - (loop environment (cdr tokens))))))) + (cond ((null? tokens) (values environment '())) + ((newline-token? (car tokens)) + (on-snd (cons (car tokens) (abort* (loop (bump-line environment) (cdr tokens)))))) + ((and (identifier-token? (car tokens)) + (not (marked-noexpand? (car tokens)))) + (call-with-values + (lambda () (maybe-extend-identifier environment + (identifier-token? (car tokens)) + (lexeme-noexpand (car tokens)) + (cdr tokens))) + ;; Here is the after expansion + (if once? values loop))) + (else (on-snd (cons (car tokens) (abort* (loop environment (cdr tokens))))))))) + + ;; returns a new environment ;; handle body of #if @@ -453,7 +465,8 @@ (typecheck tokens (list-of lexeme?)) (-> (extend-environment environment defined-macro) - (resolve-token-stream tokens) + ;; no newlines in #if line + ((unval resolve-token-stream 1) tokens) resolve-constant-expression c-boolean->boolean (if (enter-active-if environment) @@ -503,7 +516,8 @@ (-> str resolve-q-file read-file tokenize)))) (else (unless %first-time (err "Failed parsing tokens")) - (loop #f (resolve-token-stream environment tokens))))))) + ;; No newlines in #include + (loop #f ((unval resolve-token-stream 1) environment tokens))))))) ;; environment, tokens → environment (define (handle-line-directive environment tokens*) @@ -517,17 +531,19 @@ (cond ((null? tokens)) ((number-token? (car tokens)) => (lambda (line) - (let ((line (string->number line))) - (let ((remaining (drop-whitespace (cdr tokens)))) - (cond ((null? remaining) (set environment current-line (1- line))) - ((string-token? (car remaining)) - => (lambda (file) - (-> environment - (set current-line (1- line)) - (set current-file file)))) - (%first-time (loop #f (resolve-token-stream environment tokens))) - (else (err))))))) - (%first-time (loop #f (resolve-token-stream environment tokens))) + (let ((line (string->number line)) + (remaining (drop-whitespace (cdr tokens)))) + (cond ((null? remaining) (set environment current-line (1- line))) + ((string-token? (car remaining)) + => (lambda (file) + (-> environment + (set current-line (1- line)) + (set current-file file)))) + ;; no newlines in #line + (%first-time (loop #f ((unval resolve-token-stream 1) environment tokens))) + (else (err)))))) + ;; no newlines in #line + (%first-time (loop #f ((unval resolve-token-stream 1) environment tokens))) (else (err)))))) ;; environment, tokens → environment @@ -634,15 +650,16 @@ remaining-tokens))))))))) ;; Line is not a pre-processing directive - (else (let ((line-tokens remaining-tokens (tokens-until-eol (cdr tokens)))) - (let ((env* tokens* (loop environment remaining-tokens))) - (values env* - (append (unless (in-comment-block? environment) - (resolve-token-stream environment line-tokens)) + (else (let ((preceding-tokens remaining-tokens (tokens-until-cpp-directive (cdr tokens)))) + (let* ((env* resolved-tokens (if (in-comment-block? environment) + (values environment '()) + (resolve-token-stream environment preceding-tokens)))) + (on-snd (append resolved-tokens ;; The initial newline is presreved here, for better output, ;; and to keep at least one whitespace token when there was one previously. ;; possibly also keep a newline for line-directives. - (append (unless (null? remaining-tokens) (lex "\n")) tokens*))))))))) + (unless (null? remaining-tokens) (lex "\n")) + (abort* (loop env* remaining-tokens)))))))))) (else (err "Unexpected middle of line"))))) diff --git a/module/c/unlex.scm b/module/c/unlex.scm index 18e800d9..e3d36f86 100644 --- a/module/c/unlex.scm +++ b/module/c/unlex.scm @@ -5,6 +5,7 @@ :use-module (c cpp-types) :use-module (c cpp-util) :export (unlex + unlex-aggressive stringify-token stringify-tokens)) @@ -24,11 +25,37 @@ ((whitespace-token? x) " "))) (squeeze-whitespace tokens)))) +(define (stringify-escape-sequence sub-token) + (match sub-token + (`(simple-escape-sequence ,x) + (format #f "\\~a" x)) + (`(octal-escape-sequence ,x) + (format #f "\\~a" x)) + (`(hexadecimal-escape-sequence ,x) + (format #f "\\x~a" x)) + (`(universal-character-name ,x) + (case (string-length x) + ((4) (format #f "\\u~a" x)) + ((8) (format #f "\\U~a" x)))))) + +(define (stringify-string-tokens fragments) + (with-output-to-string + (lambda () + (display #\") + (for-each (match-lambda + (`(escape-sequence ,x) + (display (stringify-escape-sequence x))) + (s (display s))) + fragments) + (display #\")))) + ;; Returns the "source" of the token, as a preprocessing string literal token (define (stringify-token preprocessing-token) (match (lexeme-body preprocessing-token) - (`(string-literal ,s) - (format #f "~s" s)) + (('string-literal `(encoding-prefix ,prefix) parts ...) + (stringify-string-tokens parts)) + (('string-literal parts ...) + (stringify-string-tokens parts)) (`(header-name (q-string ,s)) (format #f "~s" s)) (`(header-name (h-string ,s)) |