From 65a47e17747a397b3ebea1c6fead303277ebed5f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Hugo=20H=C3=B6rnquist?= Date: Sat, 23 Jul 2022 17:53:06 +0200 Subject: General cleanup in preprocessor. --- module/c/cpp-environment.scm | 41 ++- module/c/preprocessor2.scm | 601 +++++++++++++++++++++---------------------- module/hnh/util/type.scm | 34 ++- 3 files changed, 334 insertions(+), 342 deletions(-) (limited to 'module') diff --git a/module/c/cpp-environment.scm b/module/c/cpp-environment.scm index 39e596d1..da8e4413 100644 --- a/module/c/cpp-environment.scm +++ b/module/c/cpp-environment.scm @@ -1,7 +1,6 @@ (define-module (c cpp-environment) :use-module (srfi srfi-1) :use-module (srfi srfi-88) - :use-module (ice-9 hash-table) :use-module ((hnh util) :select (->>)) :use-module (hnh util object) :use-module (hnh util type) @@ -18,7 +17,6 @@ macro-identifier-list macro-variadic? cpp-macro? - ;; pprint-macro enter-into-if transition-to-if @@ -39,10 +37,8 @@ object-macro? internal-macro? - cpp-environment cpp-environment? cpp-if-status - ;; cpp-variables cpp-file-stack make-environment in-environment? @@ -108,10 +104,8 @@ (cpp-if-status type: (and (list-of if-status?) (not null?)) default: (list (if-status outside))) - ;; not exported since type signatures don't hold inside the hash table - ;; TODO replace hash table with something that doesn't require copying the - ;; entire structure every time - (cpp-variables type: hash-table? default: (make-hash-table)) + (cpp-variables type: (alist-of string? cpp-macro?) + default: '()) (cpp-file-stack type: (and (not null?) (list-of (pair-of string? exact-integer?))) default: '(("*outside*" . 1)))) @@ -208,32 +202,31 @@ (define (make-environment) (cpp-environment)) -(define (clone-hash-table ht) - (alist->hash-table (hash-map->list cons ht))) +;; (define (clone-hash-table ht) +;; (alist->hash-table (hash-map->list cons ht))) -(define (clone-environment environment) - (modify environment cpp-variables clone-hash-table)) +;; (define (clone-environment environment) +;; (modify environment cpp-variables clone-hash-table)) (define (in-environment? environment key) - (hash-get-handle (cpp-variables environment) key)) + (assoc key (cpp-variables environment))) (define (remove-identifier environment key) (typecheck key string?) - (let ((environment (clone-environment environment))) - (hash-remove! (cpp-variables environment) key) - environment)) + (modify environment cpp-variables + (lambda (vars) (remove (lambda (slot) (string=? key (car slot))) + vars)))) (define (add-identifier environment key value) (typecheck key string?) (typecheck value cpp-macro?) - (let ((environment (clone-environment environment))) - (hash-set! (cpp-variables environment) key value) - environment)) + (modify environment cpp-variables + (lambda (vars) (acons key value vars)))) (define (get-identifier environment key) - (hash-ref (cpp-variables environment) key)) + (assoc-ref (cpp-variables environment) key)) (define (extend-environment environment macros) @@ -250,10 +243,10 @@ (define* (pprint-environment environment optional: (port (current-error-port))) (display "== Environment ==\n" port) - (hash-for-each (lambda (key macro) - (pprint-macro macro port) - (newline port)) - (cpp-variables environment))) + (for-each (lambda (pair) + (pprint-macro (cdr pair) port) + (newline port)) + (cpp-variables environment))) (define* (pprint-macro x optional: (p (current-output-port))) (cond ((internal-macro? x) diff --git a/module/c/preprocessor2.scm b/module/c/preprocessor2.scm index a34fd2dd..d65a4ac9 100644 --- a/module/c/preprocessor2.scm +++ b/module/c/preprocessor2.scm @@ -9,22 +9,20 @@ :use-module ((c cpp-environment function-like-macro) :select (function-like-macro variadic?)) :use-module ((c cpp-environment object-like-macro) - :select (object-like-macro object-like-macro?)) + :select (object-like-macro)) :use-module ((c cpp-environment internal-macro) :select (internal-macro)) - :use-module ((hnh util) :select (-> ->> intersperse swap unless unval break/all)) + :use-module ((hnh util) :select (-> ->> intersperse unless unval break/all)) :use-module ((hnh util lens) :select (set modify cdr*)) :use-module (hnh util path) :use-module (hnh util type) :use-module (hnh util object) - :use-module ((hnh util values) :select (abort* on-fst on-snd apply/values)) + :use-module ((hnh util values) :select (abort* on-snd value-ref apply/values)) :use-module ((hnh util io) :select (read-file)) :use-module ((c lex2) :select (lex placemaker lexeme? - lexeme-body lexeme-noexpand - tokenize )) :use-module (c unlex) @@ -42,12 +40,6 @@ -(define-syntax-rule (alist-of variable key-type value-type) - (build-validator-body variable (list-of (pair-of key-type value-type)))) - -(define (list-of-length lst n) - (= n (length lst))) - (define parameter-map? (of-type? (alist-of string? (list-of lexeme?)))) (define (concat-token? token) (and (equal? "##" (punctuator-token? token)) @@ -58,21 +50,8 @@ (define (comma-token? token) (equal? "," (punctuator-token? token))) (define (ellipsis-token? token) (equal? "..." (punctuator-token? token))) + -;; parameters is a lexeme list, as returned by parse-parameter-list -(define (build-parameter-map macro parameters) - (typecheck macro cpp-macro?) - (typecheck parameters (list-of (list-of lexeme?))) - (map (lambda (pair) (modify pair cdr* drop-whitespace-both)) - (if (macro-variadic? macro) - (let ((head rest (split-at parameters (length (macro-identifier-list macro))))) - (cons (cons "__VA_ARGS__" (concatenate (intersperse - (lex ",") - rest))) - (map cons (macro-identifier-list macro) head))) - (map cons - (macro-identifier-list macro) - parameters)))) (define (expand# macro parameter-map) (typecheck macro cpp-macro?) @@ -191,7 +170,19 @@ (else (cons (car tokens) (loop (cdr tokens) (car tokens))))))) - (define parameter-map (build-parameter-map macro parameters)) + ;; parameters is a lexeme list, as returned by parse-parameter-list + (define parameter-map + (map (lambda (pair) (modify pair cdr* drop-whitespace-both)) + (if (macro-variadic? macro) + (let ((head rest (split-at parameters (length (macro-identifier-list macro))))) + (cons (cons "__VA_ARGS__" (concatenate (intersperse (lex ",") rest))) + (map cons + (macro-identifier-list macro) + head))) + (map cons + (macro-identifier-list macro) + parameters)))) + (remove placemaker-token? (-> macro (expand# parameter-map) @@ -208,40 +199,36 @@ (let ((name (macro-identifier macro))) (cond ((object-macro? macro) - (values environment (append (fold (swap mark-noexpand) - (expand## (macro-body macro)) - (cons name noexpand-list)) - remaining-tokens))) - - ((function-macro? macro) + (values environment + (append (fold mark-noexpand + (expand## (macro-body macro)) + (cons name noexpand-list)) + remaining-tokens))) + + ((or (function-macro? macro) + (internal-macro? macro)) (if (next-token-matches? left-parenthesis-token? remaining-tokens) - (let ((containing remaining newlines (parse-parameter-list remaining-tokens))) + (let* ((containing remaining newlines (parse-parameter-list remaining-tokens)) + (environment tokens* + (if (function-macro? macro) + (values environment (apply-macro environment macro containing)) + ((macro-body macro) environment containing)))) (values (bump-line environment newlines) - (append (fold (swap mark-noexpand) - (apply-macro environment macro containing) + (append (fold mark-noexpand + tokens* (cons name noexpand-list)) remaining))) + (values environment ;; TODO#1 the token shouldn't be expanded here, but it should neither be marked no-expand? ;; Consider the case ;; #define m(a) a(0,1) ;; #define f(a) f(2 * (a)) ;; m(f) - (append (mark-noexpand (lex (macro-identifier macro)) (macro-identifier macro)) + (append (mark-noexpand (macro-identifier macro) + (lex (macro-identifier macro))) remaining-tokens)))) - ((internal-macro? macro) - (if (next-token-matches? left-parenthesis-token? remaining-tokens) - (let ((containing remaining newlines (parse-parameter-list remaining-tokens))) - (let ((env* tokens* ((macro-body macro) environment containing))) - (values (bump-line env* newlines) - (append (fold (swap mark-noexpand) - tokens* - (cons name noexpand-list)) - remaining)))) - (values environment - (append (mark-noexpand (lex (macro-identifier macro)) (macro-identifier macro)) - remaining-tokens)))) (else (scm-error 'wrong-type-arg "expand-macro" @@ -302,22 +289,21 @@ (define (newline-count group) - (let loop ((tokens (parenthesis-group-tokens group))) - (fold (lambda (item nls) - (+ nls - (cond ((newline-token? item) 1) - ((parenthesis-group? item) (newline-count item)) - (else 0)))) - 0 tokens))) + (count newline-token? (flatten-group (parenthesis-group-tokens group)))) + ;; tokens ⇒ parenthesis-group, remaining-tokens (define (parse-group tokens) (typecheck tokens (not null?)) (typecheck (car tokens) left-parenthesis-token?) + ;; Push each found symbol onto a stack. + ;; If the given symbol is a right parenthesis, pop elements from the stack + ;; until a left parenthesis is found, construct a group of these elements, + ;; and push it back onto the stack + (let loop ((stack '()) (remaining tokens)) - (cond ((and (not (null? stack)) - (null? (cdr stack)) + (cond ((and (of-type? stack (list-of-length 1)) (car stack)) parenthesis-group? => (lambda (item) (values item remaining))) @@ -326,14 +312,14 @@ "Ran out of tokens while parsing: ~s (stack: ~s)" (list (unlex tokens) stack) #f)) (else - (let ((token remaining (car+cdr remaining))) - (loop (cond ((right-parenthesis-token? token) - (let ((group rest (break left-parenthesis-token? stack))) - (cons (make-parenthesis-group (reverse group)) - ;; Remove left-parenthesis - (cdr rest)))) - (else (cons token stack))) - remaining)))))) + (loop (cond ((right-parenthesis-token? (car remaining)) + (let ((group rest (break left-parenthesis-token? stack))) + (cons (make-parenthesis-group (reverse group)) + ;; Remove left-parenthesis + (cdr rest)))) + (else (cons (car remaining) stack))) + (cdr remaining)))))) + ;; returns three values: @@ -387,29 +373,29 @@ ;; environment, tokens → environment (define (handle-pragma environment tokens) + (define (err) + (scm-error 'cpp-pragma-error "handle-pragma" + "Invalid pragma directive: ~a" + (list (unlex tokens)) #f)) + (typecheck environment cpp-environment?) ;; (typecheck tokens (list-of lexeme?)) - (let ((err (lambda () - (scm-error 'cpp-pragma-error "handle-pragma" - "Invalid pragma directive: ~a" - (list (unlex tokens)) #f)))) - - (cond ((null? tokens) (err)) - ((equal? "STDC" (identifier-token? (car tokens))) - (call-with-values (lambda () (apply values (filter identifier-token? (cdr tokens)))) - (case-lambda ((identifier on-off-switch) - (format (current-output-port) - "#Pragma STDC ~a ~a" - (unlex (list identifier)) - (unlex (list on-off-switch))) - environment) - (_ (err))))) - (else - (format (current-output-port) - "Non-standard #Pragma: ~a" - (unlex tokens)) - environment)))) + (cond ((null? tokens) (err)) + ((equal? "STDC" (identifier-token? (car tokens))) + (call-with-values (lambda () (apply values (filter identifier-token? (cdr tokens)))) + (case-lambda ((identifier on-off-switch) + (format (current-output-port) + "#Pragma STDC ~a ~a" + (unlex (list identifier)) + (unlex (list on-off-switch))) + environment) + (_ (err))))) + (else + (format (current-output-port) + "Non-standard #Pragma: ~a" + (unlex tokens)) + environment))) ;; (next-token-or-group (lex " x y") @@ -421,15 +407,13 @@ (define (next-token-or-group tokens) (let loop ((tokens (drop-whitespace tokens))) (cond ((null? tokens) - ;; TODO error here? - '()) + (scm-error 'misc-error "next-token-or-group" "Out of tokens" '() #f)) ((left-parenthesis-token? (car tokens)) (parse-group tokens)) ((preprocessing-token? (car tokens)) (car+cdr tokens)) - (else - (loop (cdr tokens)))))) - + (else (scm-error 'misc-error "next-token-or-group" + "This should be impossible" '() #f))))) (define (parse-if-line environment cpp-tokens) @@ -443,49 +427,44 @@ (drop-identifiers (let ((environment (join-file-line environment))) - (let loop ((tokens cpp-tokens)) - (cond ((null? tokens) '()) - ((identifier-token? (car tokens)) - (lambda (s) (and s (string=? s "defined"))) - => (lambda _ - (let ((next rest (next-token-or-group (cdr tokens)))) - (cons (if (and=> (identifier-token? (if (parenthesis-group? next) - ;; TODO empty group - (car (drop-whitespace (parenthesis-group-tokens next))) - next)) - (lambda (it) (in-environment? environment it))) - one zero) - (loop rest))))) - - ((and (identifier-token? (car tokens)) - (not (marked-noexpand? (car tokens)))) - (let ((_ tokens - (maybe-extend-identifier environment - (identifier-token? (car tokens)) - (lexeme-noexpand (car tokens)) - (cdr tokens)))) - (loop tokens))) - - (else (cons (car tokens) - (loop (cdr tokens))))))))) - - - - + (let loop ((tokens cpp-tokens)) + (cond ((null? tokens) '()) + ((identifier-token? (car tokens)) + (lambda (s) (and s (string=? s "defined"))) + => (lambda _ + (let ((next rest (next-token-or-group (cdr tokens)))) + (cons (if (and=> (identifier-token? (if (parenthesis-group? next) + ;; TODO empty group + (car (drop-whitespace (parenthesis-group-tokens next))) + next)) + (lambda (it) (in-environment? environment it))) + one zero) + (loop rest))))) + + ((and (identifier-token? (car tokens)) + (not (marked-noexpand? (car tokens)))) + (-> (maybe-extend-identifier environment + (identifier-token? (car tokens)) + (lexeme-noexpand (car tokens)) + (cdr tokens)) + (value-ref 1) + loop)) + + (else (cons (car tokens) + (loop (cdr tokens))))))))) (define (mark-noexpand1 token name) (modify token lexeme-noexpand xcons name)) -(define (mark-noexpand tokens name) +(define (mark-noexpand name tokens) ;; (typecheck tokens (list-of lexeme?)) ;; (typecheck name string?) (map (lambda (token) (mark-noexpand1 token name)) tokens)) (define (marked-noexpand? token) - (cond ((identifier-token? token) - => (lambda (id) (member id (lexeme-noexpand token)))) - (else #f))) + (and=> (identifier-token? token) + (lambda (id) (member id (lexeme-noexpand token))))) ;; Expands a token-stream which contains no pre-processing directives (#if:s, ...) ;; If @var{once?} is true then the resulting body won't be scanned again for tokens to expand @@ -493,8 +472,7 @@ (define* (resolve-token-stream environment tokens key: once?) (typecheck environment cpp-environment?) ;; (typecheck tokens (list-of lexeme?)) - ;; (pprint-environment environment) - ;; (format (current-error-port) "~a~%~%" (unlex tokens)) + (let loop ((environment environment) (tokens tokens)) (cond ((null? tokens) (values environment '())) ((newline-token? (car tokens)) @@ -514,7 +492,7 @@ ;; returns a new environment ;; handle body of #if ;; environment, (list token) → environment -(define (resolve-for-if environment tokens) +(define (handle-if-directive environment tokens) (typecheck environment cpp-environment?) ;; (typecheck tokens (list-of lexeme?)) (enter-into-if @@ -536,6 +514,7 @@ (typecheck identifier string?) ;; (typecheck remaining-tokens (list-of lexeme?)) (typecheck noexpand-list (list-of string?)) + (cond ((get-identifier (join-file-line environment) identifier) => (lambda (value) (expand-macro (join-file-line environment) @@ -544,8 +523,7 @@ remaining-tokens))) (else ; It wasn't an identifier, leave it as is (values environment - (append (mark-noexpand (lex identifier) - identifier) + (append (mark-noexpand identifier (lex identifier)) remaining-tokens))))) ;; 'gcc -xc -E -v /dev/null' prints GCC:s search path @@ -556,23 +534,18 @@ ;; #include (define (resolve-h-file string) (typecheck string string?) - (cond - ;; NOTE do I want this case? - ;; GCC has it - ((path-absolute? string) string) - (else - (or - (find file-exists? - (map (lambda (path-prefix) - (path-append path-prefix string)) - (c-search-path))) - (scm-error 'cpp-error "resolve-h-file" - "Can't resolve file: ~s" - (list string) #f))))) + (cond ((path-absolute? string) string) + (else (or (find file-exists? + (map (lambda (path-prefix) (path-append path-prefix string)) + (c-search-path))) + (scm-error 'cpp-error "resolve-h-file" + "Can't resolve file: ~s" + (list string) #f))))) ;; #include "myheader.h" (define (resolve-q-file string) (typecheck string string?) + (cond ((file-exists? string) string) ;; This should always be a fallback (6.10.2, p. 3) (else (resolve-h-file string)))) @@ -586,22 +559,24 @@ (scm-error 'cpp-error "resolve-and-include-header" (string-append msg ", tokens: ~s") (append args (list (unlex tokens))) #f)))) - (let loop ((%first-time #t) (tokens tokens)) - (cond ((null? tokens) (err "Invalid #include line")) - ((h-string-token? (car tokens)) - => (lambda (str) - (unless (null? (drop-whitespace (cdr tokens))) - (err "Unexpected tokens after #include <>")) - (resolve-h-file str))) - ((q-string-token? (car tokens)) - => (lambda (str) - (unless (null? (drop-whitespace (cdr tokens))) - (err "Unexpected tokens after #include \"\"")) - (resolve-q-file str))) - (else - (unless %first-time (err "Failed parsing tokens")) - ;; No newlines in #include - (loop #f ((unval resolve-token-stream 1) environment tokens))))))) + (let retry% ((%first-time #t) (tokens tokens)) + (let ((retry (lambda () (retry% #f ((unval resolve-token-stream 1) environment tokens))))) + (cond ((null? tokens) (err "Invalid #include line")) + ((h-string-token? (car tokens)) + => (lambda (str) + (unless (null? (drop-whitespace (cdr tokens))) + (err "Unexpected tokens after #include <>")) + (resolve-h-file str))) + ((q-string-token? (car tokens)) + => (lambda (str) + (unless (null? (drop-whitespace (cdr tokens))) + (err "Unexpected tokens after #include \"\"")) + (resolve-q-file str))) + (else + (unless %first-time (err "Failed parsing tokens")) + (retry))))))) + + ;; environment, tokens → environment (define (handle-line-directive environment tokens*) @@ -611,29 +586,31 @@ (let ((err (lambda () (scm-error 'cpp-error "handle-line-directive" "Invalid line directive: ~s" (list tokens*) #f)))) - (let loop ((%first-time #t) (tokens tokens*)) - (cond ((null? tokens)) - ((pp-number? (car tokens)) - => (lambda (line) - (let ((line (string->number line)) - (remaining (drop-whitespace (cdr tokens)))) - (cond ((null? remaining) (set environment current-line (1- line))) - ((string-token? (car remaining)) - (lambda (a . _) a) - => (lambda (encoding . fragments) - (-> environment - (set current-line (1- line)) - ;; TODO properly join string - (set current-file (car fragments))))) - ;; no newlines in #line - (%first-time (loop #f ((unval resolve-token-stream 1) environment tokens))) - (else (err)))))) - ;; no newlines in #line - (%first-time (loop #f ((unval resolve-token-stream 1) environment tokens))) - (else (err)))))) + (let retry% ((%first-time #t) (tokens tokens*)) + (let ((retry (lambda () (retry% #f ((unval resolve-token-stream 1) environment tokens))))) + (cond ((null? tokens)) + ((pp-number? (car tokens)) + => (lambda (line) + (let ((line (string->number line)) + (remaining (drop-whitespace (cdr tokens)))) + (cond ((null? remaining) (set environment current-line (1- line))) + ((string-token? (car remaining)) + (lambda (a . _) a) + => (lambda (encoding . fragments) + (-> environment + (set current-line (1- line)) + ;; TODO properly join string + (set current-file (car fragments))))) + ;; no newlines in #line + (%first-time (retry)) + (else (err)))))) + ;; no newlines in #line + (%first-time (retry)) + (else (err))))))) + ;; environment, tokens → environment -(define (resolve-define environment tokens) +(define (handle-define-directive environment tokens) (typecheck environment cpp-environment?) ;; (typecheck tokens (list-of lexeme?)) @@ -660,8 +637,116 @@ body: (drop-whitespace-both tail)))))))) +(define (handle-include-directive environment body) + ;; TODO change to store source location in lexemes + ;; and rewrite the following to + ;; (loop environment + ;; (append (-> path read-file tokenize) remaining-tokens)) + ;; TODO and then transfer these source locations when we move + ;; to "real" tokens (c to-token) + (let ((path (resolve-header environment body))) + (values + ;; same hack as at start of loop + (-> environment (enter-file path) (bump-line -1)) + (->> path read-file tokenize (append (lex "\n"))))) ) + + +;; enter if depending on the status of thunk +(define (enter-depending env thunk) + (enter-into-if env + (if (thunk) + (if-status active) + (if-status inactive)))) + +;; is the next token defined in the environment? +(define (next-in-environment env body) + (in-environment? env (identifier-token? (car body)))) + +(define (handle-ifdef-directive env body) + (enter-depending env (lambda () (next-in-environment env body)))) + +(define (handle-ifndef-directive env body) + (enter-depending env (lambda () (not (next-in-environment env body))))) + +(define (handle-error-directive environment body) + (throw 'cpp-error-directive + (format #f "#error ~a" (unlex body)) + (format #f "at ~s:~a" + (current-file environment) + (current-line environment)) + (format #f "included as ~s" + (cpp-file-stack environment)))) + +;; handles a line starting with a hash (#) +;; line-tokens are the cpp-tokens between hash and EOL +;; remaining tokens are the remaining tokens in the stream +;; loop is (almost) a continuation +(define (handle-preprocessing-directive environment line-tokens remaining-tokens loop) + (if (null? line-tokens) + ;; null directive + (loop environment remaining-tokens) + + (let ((directive (string->symbol (identifier-token? (car line-tokens))))) + (cond + ((in-conditional/inactive-inactive? environment) + (-> environment + ((case directive + ((ifdef ifndef if) (lambda (e) (enter-into-if e (if-status inactive-inactive)))) + ((endif) leave-if) + ((elif else) identity) + (else identity))) + (loop remaining-tokens))) + + ((in-conditional/inactive? environment) + (-> environment + ((case directive + ((ifdef ifndef if) (lambda (e) (enter-into-if e (if-status inactive-inactive)))) + ((endif) leave-if) + ((else) (lambda (e) (transition-to-if e (if-status active)))) + ((elif) (lambda (environment) + (-> environment + leave-if + (handle-if-directive (drop-whitespace (cdr line-tokens)))))) + (else identity))) + (loop remaining-tokens))) + + ;; From here on we are not in a comment block + (else + (let ((body (drop-whitespace (cdr line-tokens)))) + (if (eq? 'include directive) + ;; include is special since it returns a token stream + (let ((environment included-tokens + (call-with-values + (lambda () (handle-include-directive environment body)) + loop))) + (on-snd (append included-tokens + (abort* (loop (leave-file environment) + remaining-tokens))))) + + (let ((op (case directive ; (environment, list token) → environment + ((if) handle-if-directive) + ((ifdef) handle-ifdef-directive) + ((ifndef) handle-ifndef-directive) + ;; NOTE possibly validate that body is empty for endif and else + ((endif) (lambda (env _) (leave-if env))) + ((else elif) (lambda (env _) (transition-to-if env (if-status inactive-inactive)))) + ((define) handle-define-directive) + ((undef) (lambda (env body) (remove-identifier env (identifier-token? (car body))))) + ((line) handle-line-directive) + ((error) handle-error-directive) + ((pragma) handle-pragma) + (else (throw 'propagate + "Unknown preprocessing directive: ~s" + (list line-tokens)))))) + (-> environment + (op body) + (loop remaining-tokens)))))))))) + + + +;; Handles an entire stream (a whole file) of cpp-tokens ;; environment, tokens -> environment, tokens (define (handle-preprocessing-tokens environment tokens) ;; Prepend a newline to ensure that the token stream always starts with a @@ -684,107 +769,24 @@ (tokens* (drop-whitespace (cdr tokens)))) (cond ((null? tokens*) (values environment '())) ((equal? "#" (punctuator-token? (car tokens*))) - (let ((line-tokens remaining-tokens (tokens-until-eol (cdr tokens*)))) - ;; drop whitespace after newline check to not "eat" the next newline token - (let ((line-tokens (drop-whitespace line-tokens))) - (cond ((null? line-tokens) - ;; null directive - (loop environment remaining-tokens)) - - ((in-conditional/inactive-inactive? environment) - (let ((op (case (string->symbol (identifier-token? (car line-tokens))) - ((ifdef ifndef if) (lambda (e) (enter-into-if e (if-status inactive-inactive)))) - ((endif) leave-if) - ((elif else) identity) - (else identity)))) - (loop (op environment) remaining-tokens))) - - ((in-conditional/inactive? environment) - (let ((op (case (string->symbol (identifier-token? (car line-tokens))) - ((ifdef ifndef if) (lambda (e) (enter-into-if e (if-status inactive-inactive)))) - ((endif) leave-if) - ((else) (lambda (e) (transition-to-if e (if-status active)))) - ((elif) (lambda (environment) - (-> environment - leave-if - (resolve-for-if (drop-whitespace (cdr line-tokens)))))) - (else identity)))) - (loop (op environment) remaining-tokens))) - - ;; From here on we are not in a comment block - (else - (let ((directive (string->symbol (identifier-token? (car line-tokens)))) - (body (drop-whitespace (cdr line-tokens)))) - (if (eq? 'include directive) - ;; include is special since it returns a token stream - (let ((path (resolve-header environment body))) - ;; TODO change to store source location in lexemes - ;; and rewrite the following to - ;; (loop environment - ;; (append (-> path read-file tokenize) remaining-tokens)) - ;; TODO and then transfer these source locations when we move - ;; to "real" tokens (c to-token) - (let ((env* tokens* - (loop - ;; same hack as at start of loop - (-> environment - (enter-file path) - (bump-line -1)) - (append (lex "\n") - (-> path read-file tokenize))))) - (on-snd (append tokens* (abort* (loop (leave-file env*) - remaining-tokens)))))) - - (let ((operation ; (environment, list token) → environment - (case directive - ((if) resolve-for-if) - ((ifdef) - (lambda (env body) - (enter-into-if env - (if (in-environment? env (identifier-token? (car body))) - (if-status active) - (if-status inactive))))) - ((ifndef) - (lambda (env body) - (enter-into-if env - (if (in-environment? env (identifier-token? (car body))) - (if-status inactive) - (if-status active))))) - ;; NOTE possibly validate that body is empty for endif and else - ;; checks that these aren't outside #if is handled internally - ((endif) (lambda (env _) (leave-if env))) - ((else elif) (lambda (env _) (transition-to-if env (if-status inactive-inactive)))) - ((define) resolve-define) - ((undef) (lambda (env body) - (remove-identifier - env (identifier-token? (car body))))) - ((line) handle-line-directive) - ((error) (lambda (_ tokens) - (throw 'cpp-error-directive - (format #f "#error ~a" (unlex tokens)) - (format #f "at ~s:~a" - (current-file environment) - (current-line environment)) - (format #f "included as ~s" - (cpp-file-stack environment)) - ))) - ((pragma) handle-pragma) - (else (err "Unknown preprocessing directive: ~s" - (list line-tokens)))))) - (loop (operation environment body) - remaining-tokens))))))))) + (let* ((line-tokens remaining-tokens (tokens-until-eol (cdr tokens*))) + ;; drop whitespace after newline check to not "eat" the next newline token + (line-tokens (drop-whitespace line-tokens))) + (catch 'propagate + (lambda () (handle-preprocessing-directive environment line-tokens remaining-tokens loop)) + (lambda (_ . args) (apply err args))))) ;; Line is not a pre-processing directive - (else (let ((preceding-tokens remaining-tokens (tokens-until-cpp-directive (cdr tokens)))) - (let* ((env* resolved-tokens (if (in-conditional/inactive? environment) - (values environment '()) - (resolve-token-stream environment preceding-tokens)))) - (on-snd (append resolved-tokens - ;; The initial newline is presreved here, for better output, - ;; and to keep at least one whitespace token when there was one previously. - ;; possibly also keep a newline for line-directives. - (unless (null? remaining-tokens) (lex "\n")) - (abort* (loop env* remaining-tokens)))))))))) + (else (let* ((preceding-tokens remaining-tokens (tokens-until-cpp-directive (cdr tokens))) + (env* resolved-tokens (if (in-conditional/inactive? environment) + (values environment '()) + (resolve-token-stream environment preceding-tokens)))) + (on-snd (append resolved-tokens + ;; The initial newline is presreved here, for better output, + ;; and to keep at least one whitespace token when there was one previously. + ;; possibly also keep a newline for line-directives. + (unless (null? remaining-tokens) (lex "\n")) + (abort* (loop env* remaining-tokens))))))))) (else (err "Unexpected middle of line, (near ~s)" (unlex tokens)))))) @@ -794,40 +796,29 @@ (define* (make-default-environment key: (now (localtime (current-time)))) - (call-with-values - (lambda () - (preprocess-string - (format - #f - " -#define __STDC__ 1 -#define __STDC_HOSTED__ 1 -#define __STDC_VERSION__ 201112L -#define __DATE__ \"~a\" -#define __TIME__ \"~a\" -" - ;; TODO format should always be in - ;; english, and not tranlated - (strftime "\"%b %_d %Y\"" now) - (strftime "\"%H:%M:%S\"" now)) - (make-environment))) - (lambda (env _) env))) + (-> (string-append + "#define __STDC__ 1\n" + "#define __STDC_HOSTED__ 1\n" + "#define __STDC_VERSION__ 201112L\n" + ;; TODO format should always be in + ;; english, and not tranlated + (format #f "#define __DATE__ \"~a\"~%" (strftime "%b %_d %Y" now)) + (format #f "#define __TIME__ \"~a\"~%" (strftime "%H:%M:%S" now))) + (preprocess-string (make-environment)) + (value-ref 0))) (define* (preprocess-string str optional: (environment (make-default-environment))) - (on-snd - (->> - (abort* - (->> str + (->> str ;;; Phase 1-3 - tokenize + tokenize ;;; 4. Execution of preprocessing directives, all preprocessing directives are then deleted - (handle-preprocessing-tokens environment))) - + (handle-preprocessing-tokens environment) + abort* ;;; 5. (something with character sets) ;;; 7. Whitespace tokens are discarded, each preprocessing token is converted into a token - (remove whitespace-token?) + (remove whitespace-token?) ;;; 6. concatenation of string literals ;;; Should be done before removal of whitespace, but I don't understand why - merge-string-literals - ))) + merge-string-literals + on-snd)) diff --git a/module/hnh/util/type.scm b/module/hnh/util/type.scm index 50008a3a..b998d59c 100644 --- a/module/hnh/util/type.scm +++ b/module/hnh/util/type.scm @@ -1,11 +1,26 @@ (define-module (hnh util type) :use-module ((srfi srfi-1) :select (every)) :export (build-validator-body - list-of pair-of + list-of pair-of alist-of alist-of + list-of-length of-type? typecheck current-procedure-name)) +;; DSL for specifying type predicates +;; Basically a procedure body, but the variable to test is implicit. +(define-syntax build-validator-body + (syntax-rules (and or not) + ((_ variable (and clauses ...)) (and (build-validator-body variable clauses) ...)) + ((_ variable (or clauses ...)) (or (build-validator-body variable clauses) ...)) + ((_ variable (not clause)) (not (build-validator-body variable clause))) + ((_ variable (proc args ...)) (proc variable args ...)) + ((_ variable proc) (proc variable)))) + +(define-syntax-rule (current-procedure-name) + ;; 1 since make-stack is at top of stack + (frame-procedure-name (stack-ref (make-stack #t) 1))) + (define-syntax list-of (syntax-rules () ((_ variable (rule ...)) @@ -20,19 +35,12 @@ (build-validator-body (car variable) a) (build-validator-body (cdr variable) b))) -;; DSL for specifying type predicates -;; Basically a procedure body, but the variable to test is implicit. -(define-syntax build-validator-body - (syntax-rules (and or not) - ((_ variable (and clauses ...)) (and (build-validator-body variable clauses) ...)) - ((_ variable (or clauses ...)) (or (build-validator-body variable clauses) ...)) - ((_ variable (not clause)) (not (build-validator-body variable clause))) - ((_ variable (proc args ...)) (proc variable args ...)) - ((_ variable proc) (proc variable)))) +(define-syntax-rule (alist-of variable key-type value-type) + (build-validator-body variable (list-of (pair-of key-type value-type)))) -(define-syntax-rule (current-procedure-name) - ;; 1 since make-stack is at top of stack - (frame-procedure-name (stack-ref (make-stack #t) 1))) +(define (list-of-length lst n) + (and (list? lst) + (= n (length lst)))) (define-syntax of-type? (syntax-rules () -- cgit v1.2.3