From b3f27f132f8ac405f8cdf7e201f03d157f366125 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Hugo=20H=C3=B6rnquist?= Date: Sun, 10 Jul 2022 20:24:01 +0200 Subject: work --- module/c/cpp-environment.scm | 26 +++- module/c/preprocessor2.scm | 336 ++++++++++++++++++++++++++++++++++--------- 2 files changed, 288 insertions(+), 74 deletions(-) (limited to 'module/c') diff --git a/module/c/cpp-environment.scm b/module/c/cpp-environment.scm index 51f16168..fa69e1fc 100644 --- a/module/c/cpp-environment.scm +++ b/module/c/cpp-environment.scm @@ -15,6 +15,7 @@ macro-identifier-list macro-variadic? macro? + ;; pprint-macro enter-active-if enter-inactive-if @@ -31,13 +32,14 @@ internal-macro? cpp-environment + cpp-environment? cpp-if-status cpp-variables make-environment in-environment? remove-identifier! add-identifier! get-identifier extend-environment - + disjoin-macro )) (define (macro-identifier x) @@ -73,6 +75,9 @@ (fun:function-like-macro? x) (int:internal-macro? x))) + + + (define-type (cpp-environment) (cpp-if-status type: (list-of (memv '(outside active-if inactive-if))) default: '(outside)) @@ -81,6 +86,7 @@ (list-of (pair-of string? exact-integer?))) default: '(("*outside*" . 1)))) + (define (enter-active-if environment) @@ -112,7 +118,13 @@ (define (make-environment) (cpp-environment)) -(define (in-envirnoment? environment key) +(define (clone-hash-table ht) + (alist->hash-table (hash-map->list cons ht))) + +(define (clone-environment environment) + (modify environment cpp-variables clone-hash-table)) + +(define (in-environment? environment key) (hash-get-handle (cpp-variables environment) key)) (define (remove-identifier! environment key) @@ -134,11 +146,15 @@ (define (get-identifier environment key) (hash-ref (cpp-variables environment) key)) -(define (clone-hash-table ht) - (alist->hash-table (hash-map->list cons ht))) (define (extend-environment environment macros) - (let ((env (modify environment cpp-variables clone-hash-table))) + (typecheck macros (list-of macro?)) + (let ((env (clone-environment environment))) (fold (lambda (m env) (add-identifier! env (macro-identifier m) m)) env macros))) +(define (disjoin-macro environment name) + (typecheck name string?) + (let ((env (clone-environment environment))) + (remove-identifier! env name) + env)) diff --git a/module/c/preprocessor2.scm b/module/c/preprocessor2.scm index e99b1049..0bb101f8 100644 --- a/module/c/preprocessor2.scm +++ b/module/c/preprocessor2.scm @@ -5,21 +5,58 @@ :use-module (ice-9 match) :use-module (c cpp-environment) :use-module (c eval2) - :use-module ((c cpp-environment function-like-macro) :select (function-like-macro)) + :use-module ((c cpp-environment function-like-macro) + :select (function-like-macro variadic? identifier-list)) :use-module ((c cpp-environment object-like-macro) :select (object-like-macro)) :use-module ((c cpp-environment internal-macro) :select (internal-macro)) - :use-module ((hnh util) :select (->)) + :use-module ((hnh util) :select (-> intersperse)) :use-module ((hnh util lens) :select (set)) :use-module (hnh util path) + :use-module (hnh util type) :use-module ((c lex2) :select (lex)) :use-module ((c trigraph) :select (replace-trigraphs)) :use-module ((c line-fold) :select (fold-lines)) :export ()) +;;; Call graph +;; squeeze-whitespace +;; stringify-tokens +;; expand-join +;; build-parameter-map + +;; apply-macro +;; - build-parameter-map +;; - stringify-tokens +;; - expand-join + +;; expand-macro +;; - parse-parameter-list +;; - apply-macro + +;; parse-parameter-list + +;; resolve-token-stream +;; - maybe-extend-identifier + +;; maybe-extend-identifier +;; - expand-macro + +;; resolve-define +;; - parse-identifier-list + +;; expand-stringifiers +;; - stringify-tokens + +;;; + +(define-syntax-rule (parameter-map? x) + (typecheck x (list-of (pair-of string? (list-of token?))))) + ;; Returns two values: ;; - tokens until a newline token is met ;; - (potentially the newline token) and the remaining tokens (define (tokens-until-eol tokens) + (typecheck tokens (list-of token?)) (break (lambda (token) (equal? token '(whitespace "\n"))) tokens)) @@ -29,6 +66,11 @@ (`(whitespace ,_) #t) (_ #f))) +(define (identifier-token? token) + (match token + (`(preprocessing-token (identifier ,id)) id) + (_ #f))) + (define (unwrap-preprocessing-token token) (match token (`(preprocessing-token ,x) x) @@ -41,6 +83,10 @@ (lambda () (unwrap-preprocessing-token token)) (const #f))) +(define (token? x) + (or (preprocessing-token? x) + (whitespace-token? x))) + ;; Replace all whitespace with single spaces. (define (squeeze-whitespace tokens) @@ -68,43 +114,70 @@ (format #f "'~a'" c)) (`(punctuator ,p) p))) +;; takes a token list, and return a single string literal token (define (stringify-tokens tokens) - `(preprocessing-token - (string-literal - ,(string-concatenate - (map (match-lambda (`(preprocessing-token ,body) (stringify-token body)) - (`(whitespace ,_) " ")) - (squeeze-whitespace tokens)))))) + `(preprocessing-token (string-literal ,(unlex tokens)))) + +;; takes a list of preprocessing-token's, and return a "source" string +(define (unlex tokens) + (typecheck tokens (list-of token?)) + (string-concatenate + (map (match-lambda (`(preprocessing-token ,body) (stringify-token body)) + (`(whitespace ,_) " ")) + (squeeze-whitespace tokens)))) + ;; Expand ## tokens ;; TODO (define (expand-join macro tokens) + (typecheck macro macro?) + (typecheck tokens (list-of token?)) tokens) ;; parameters is a lexeme list, as returned by parse-parameter-list (define (build-parameter-map macro parameters) + (typecheck macro macro?) + (typecheck parameters (list-of (list-of token?))) (if (macro-variadic? macro) (let ((head rest (split-at parameters (length (macro-identifier-list macro))))) - ;; TODO commas (,) should be interleaved with rest - (cons (cons "__VA_ARGS__" rest) + (cons (cons "__VA_ARGS__" (concatenate (intersperse + '((preprocessing-token (punctuator ","))) + rest))) (map cons (macro-identifier-list macro) head))) (map cons (macro-identifier-list macro) parameters))) + +;; TODO Deprecate? +(define (parameter-map->macro-list param-map) + (typecheck param-map parameter-map?) + (map (lambda (pair) + (let ((identifier (car pair)) + (body (cdr pair))) + (object-like-macro + identifier: identifier + body: body))) + param-map)) + ;; Drop leading whitespace tokens (define (drop-whitespace tokens) + (typecheck tokens (list-of token?)) (drop-while whitespace-token? tokens)) (define (drop-whitespace-right tokens) + (typecheck tokens (list-of token?)) (-> tokens reverse drop-whitespace reverse)) (define (drop-whitespace-both tokens) + (typecheck tokens (list-of token?)) (-> tokens drop-whitespace drop-whitespace-right)) (define (expand-stringifiers macro parameter-map) + (typecheck macro macro?) + (typecheck parameter-map parameter-map?) (let loop ((tokens (macro-body macro))) (match tokens (('(preprocessing-token (punctuator "#")) @@ -123,13 +196,45 @@ ;; expand function like macro (define (apply-macro environment macro parameters) - (define parameter-map (build-parameter-map macro parameters)) - (define stringify-resolved (expand-stringifiers macro parameter-map)) - ;; TODO resolve ## - (define resulting-body stringify-resolved #; (expand-join macro stringify-resolved) - ) - (resolve-token-stream (extend-environment environment parameter-map) - resulting-body)) + (typecheck environment cpp-environment?) + ;; Each element should be the lexeme list for that argument + (typecheck parameters (list-of (list-of token?))) + (typecheck macro macro?) + (when (or (and (variadic? macro) + (> (length (identifier-list macro)) + (length parameters))) + (and (not (variadic? macro)) + (not (= (length (identifier-list macro)) + (length parameters))))) + (scm-error 'cpp-arity-error "apply-macro" + ;; TODO better error message for variadic macros + "Wrong number of arguments to macro ~s, expected ~s, got ~s" + (list (macro-identifier macro) + (length (identifier-list macro)) + (length parameters)) + (list macro))) + (let () + (define parameter-map (build-parameter-map macro parameters)) + (define stringify-resolved (expand-stringifiers macro parameter-map)) + ;; TODO resolve ## + (define resulting-body stringify-resolved #; (expand-join macro stringify-resolved)) + (define (bound-identifier? id) + (member id (if (variadic? macro) + (cons "__VA_ARGS__" (identifier-list macro)) + (identifier-list macro)))) + (let loop ((tokens resulting-body)) + (cond ((null? tokens) '()) + ;; TODO the parameters should be macro-expanded before being inserted + ((identifier-token? (car tokens)) + bound-identifier? + => (lambda (id) (append (assoc-ref parameter-map id) + (loop (cdr tokens))))) + (else (cons (car tokens) + (loop (cdr tokens)))))) + #; + (let ((env (extend-environment environment + (parameter-map->macro-list parameter-map)))) + (resolve-token-stream env resulting-body)))) @@ -142,28 +247,32 @@ ;; ⇒ "VALUE" ;; token should be the token stream just after the name of the macro -(define (expand-macro environment macro tokens) - (cond ((object-macro? macro) - ;; Shouldn't we expand the macro body here? - (values environment (append (macro-body macro) tokens))) - - ((function-macro? macro) - (let ((containing remaining newlines (parse-parameter-list tokens))) - (values (bump-line environment newlines) - ;; Macro output can be macro expanded - ;; TODO self-referential macros? - (append (apply-macro environment macro containing) remaining)))) - - ((internal-macro? macro) - (let ((containing remaining newlines (parse-parameter-list tokens))) - (values (bump-line environment newlines) - (append ((macro-body macro) environment containing) - remaining)))) - - (else - (scm-error 'wrong-type-arg "expand-macro" - "Macro isn't a macro: ~s" - (list macro) #f)))) +(define (expand-macro environment macro remaining-tokens) + (typecheck environment cpp-environment?) + (typecheck remaining-tokens (list-of token?)) + (let ((name (macro-identifier macro))) + (cond ((object-macro? macro) + (values environment (append (mark-noexpand (macro-body macro) name) + remaining-tokens))) + + ((function-macro? macro) + (let ((containing remaining newlines (parse-parameter-list remaining-tokens))) + (values (bump-line environment newlines) + (append (mark-noexpand (apply-macro environment macro containing) + name) + remaining)))) + + ((internal-macro? macro) + (let ((containing remaining newlines (parse-parameter-list remaining-tokens))) + (values (bump-line environment newlines) + (append (mark-noexpand ((macro-body macro) environment containing) + name) + remaining)))) + + (else + (scm-error 'wrong-type-arg "expand-macro" + "Macro isn't a macro: ~s" + (list macro) #f))))) ;; Takes a list of preprocessing tokens, and returns two values ;; if the last token was '...' @@ -171,6 +280,7 @@ ;; Note that this is ONLY #define f(x) forms ;; not usage forms (define (parse-identifier-list tokens) + (typecheck tokens (list-of token?)) (let loop ((tokens (remove whitespace-token? tokens)) (done '())) (match tokens ('() (values #f (reverse done))) @@ -205,6 +315,7 @@ ;; "( 2, 4 )" ;; 6.10.3.2 p 2 (define (cleanup-whitespace tokens) + (typecheck tokens (list-of token?)) (-> tokens drop-whitespace-both squeeze-whitespace)) ;; returns three values: @@ -212,9 +323,10 @@ ;; - the remaining tokenstream ;; - how many newlines were encountered ;; The standard might call these "replacement lists" -(define (parse-parameter-list tokens) +(define (parse-parameter-list tokens*) + (typecheck tokens* (list-of token?)) (let %loop ((depth 0) (newlines 0) (current '()) - (parameters '()) (tokens tokens) (%first-iteration? #t)) + (parameters '()) (tokens tokens*) (%first-iteration? #t)) (define* (loop tokens key: (depth depth) (newlines newlines) (current current) (parameters parameters)) @@ -223,6 +335,9 @@ current (cons (car tokens) current)))) (match tokens + (() (scm-error 'misc-error "parse-parameter-list" + "Ran out of tokens while parsing: ~s" + (list tokens*) #f)) (('(whitespace "\n") rest ...) (loop rest newlines: (1+ newlines) current: current*)) ((`(whitespace ,_) rest ...) @@ -264,6 +379,7 @@ (loop rest current: current*)))))) +;; Add __FILE__ and __LINE__ object macros to the environment (define (join-file-line environment) (define file (current-file environment)) (define line (current-line environment)) @@ -282,6 +398,7 @@ ;; #include (define (resolve-h-file string) + (typecheck string string?) (cond ((path-absolute? string) string) (else (let ((filename @@ -296,6 +413,7 @@ ;; #include "myheader.h" (define (resolve-q-file string) + (typecheck string string?) ;; This should always be a fallback (6.10.2, p. 3) (cond (else (resolve-h-file string)))) @@ -304,8 +422,9 @@ identifier: "defined" body: (lambda (environment tokens) (match tokens - (`((preprocessing-token (identifier ,id))) - `(preprocessing-token (pp-number ,(boolean->c-boolean (in-environment? environment id))))) + (`(((preprocessing-token (identifier ,id)))) + (let ((in-env (boolean->c-boolean (in-environment? environment id)))) + (lex (number->string in-env)))) (_ (scm-error 'cpp-error "defined" "Invalid parameter list to `defined': ~s" (list tokens) #f)))))) @@ -334,26 +453,94 @@ ;; TODO (define (resolve-constant-expression tokens) + (typecheck tokens (list-of token?)) 'TODO ) +(define* (pprint-macro x optional: (p (current-output-port))) + (cond ((internal-macro? x) + (format p "/* ~a INTERNAL MACRO */" + (macro-identifier x))) + ((object-macro? x) + (format p "#define ~a ~a" + (macro-identifier x) + (unlex (macro-body x)))) + ((function-macro? x) + (format p "#define ~a(~a) ~a" + (macro-identifier x) + (string-join (macro-identifier-list x) "," 'infix) + (unlex (macro-body x)))))) + +(define* (pprint-environment environment optional: (port (current-error-port))) + (display "== Environment ==\n") + (hash-for-each (lambda (key macro) + (pprint-macro macro port) + (newline port)) + (cpp-variables environment))) + +(define noexpand (make-object-property)) + +(define (mark-noexpand tokens name) + (typecheck tokens (list-of token?)) + (typecheck name string?) + (let ((tokens tokens)) + (for-each (lambda (token) (set! (noexpand token) (cons name (noexpand token)))) tokens) + tokens)) + +(define (list-like->list x) + (if (not (pair? x)) + (list x) + (cons (car x) (list-like->list (cdr x))))) + +(define (marked-noexpand? token) + (cond ((identifier-token? token) + => (lambda (id) (member id (list-like->list (noexpand token))))) + (else #f))) + ;; Expands a token-stream which contains no pre-processing directives (#if:s, ...) (define (resolve-token-stream environment tokens) - (let loop ((tokens tokens)) + (typecheck environment cpp-environment?) + (typecheck tokens (list-of token?)) + ;; (pprint-environment environment) + ;; (format (current-error-port) "~a~%~%" (unlex tokens)) + (let loop ((environment environment) (tokens tokens)) + (unless (null? tokens) + (format (current-error-port) "~s [~a] [~a]~%" + (car tokens) + (noexpand (car tokens)) + (marked-noexpand? (car tokens)))) + (format (current-error-port) "~a~%" (unlex tokens)) + (cond ((null? tokens) '()) + ((car tokens) + (lambda (x) (and (identifier-token? x) + (not (marked-noexpand? x)))) + => (lambda (token) + (call-with-values + (lambda () (maybe-extend-identifier environment + (identifier-token? token) + (cdr tokens))) + loop))) + (else (cons (car tokens) + (loop environment (cdr tokens)))) + ) + #; (match tokens ('() '()) ((`(preprocessing-token (identifier ,id)) rest ...) (call-with-values (lambda () (maybe-extend-identifier environment id rest)) - (lambda (_ tokens) (loop tokens)))) - ((`(whitespace ,_) rest ...) - (loop rest)) + loop)) + ;; ((`(whitespace ,_) rest ...) + ;; (loop environment rest)) ((token rest ...) - (cons token (loop rest)))))) + (cons token (loop environment rest)))))) ;; returns a new environment ;; handle body of #if ;; environment, (list token) → environment (define (resolve-for-if environment tokens) + (typecheck environment cpp-environment?) + (typecheck tokens (list-of token?)) + (-> (extend-environment environment defined-macro) (resolve-token-stream tokens) resolve-constant-expression @@ -363,15 +550,24 @@ ;; environment, string, (list token) → environment, (list token) (define (maybe-extend-identifier environment identifier remaining-tokens) + (typecheck environment cpp-environment?) + (typecheck identifier string?) + (typecheck remaining-tokens (list-of token?)) + ;; (typecheck continuation procedure?) ; TODO arity? (cond ((get-identifier environment identifier) => (lambda (value) (expand-macro (join-file-line environment) value remaining-tokens))) (else ; It wasn't an identifier, leave it as is - ;; TODO shouldn't we include the identifier in the remaining tokens stream? - (values environment remaining-tokens)))) + (values environment + (append (mark-noexpand `((preprocessing-token (identifier ,identifier))) + identifier) + remaining-tokens))))) (define (resolve-and-include-header environment tokens) + (typecheck environment cpp-environment?) + (typecheck tokens (list-of token?)) + (let loop ((%first-time #t) (tokens tokens)) (match (drop-whitespace tokens) ((`(header-name (h-string ,str)) rest ...) @@ -406,6 +602,9 @@ ;; environment, tokens → environment (define (handle-line-directive environment tokens*) + (typecheck environment cpp-environment?) + (typecheck tokens* (list-of token?)) + (let loop ((%first-time #t) (tokens tokens*)) (match tokens (`((preprocessing-token (pp-number ,line)) (whitespace ,_) ... rest ...) @@ -429,6 +628,9 @@ ;; environment, tokens → environment (define (resolve-define environment tokens) + (typecheck environment cpp-environment?) + (typecheck tokens (list-of token?)) + (match tokens ((`(preprocessing-token (identifier ,identifier)) tail ...) (-> environment @@ -438,20 +640,22 @@ (match tail (('(preprocessing-token (punctuator "(")) rest ...) ;; function like macro - (call-with-values (lambda () (break (lambda (token) (equal? token '(preprocessing-token (punctuator ")")))) - rest)) - (lambda (identifier-list replacement-list) - (let ((variadic? identifiers (parse-identifier-list identifier-list))) - - (function-like-macro - identifier: identifier - variadic?: variadic? - identifier-list: identifiers - ;; NOTE 6.10.3 states that there needs to be at least on whitespace here - body: (cdr replacement-list)))))) + (let ((identifier-list + replacement-list + (break (lambda (token) (equal? token '(preprocessing-token (punctuator ")")))) + rest))) + (let ((variadic? identifiers (parse-identifier-list identifier-list))) + (function-like-macro + identifier: identifier + variadic?: variadic? + identifier-list: identifiers + ;; NOTE 6.10.3 states that there needs to be at least on whitespace here + ;; cdr drops the end parenthesis of the definition + ;; surrounding whitespace is not part of the replacement list (6.10.3 p.7) + body: (drop-whitespace-both (cdr replacement-list)))))) (_ (object-like-macro identifier: identifier - body: tail)))))))) + body: (drop-whitespace-both tail))))))))) @@ -548,8 +752,7 @@ remaining-tokens))))))) ((`(preprocessing-token (identifier ,id)) rest ...) - (call-with-values (lambda () (maybe-extend-identifier environment id rest)) - loop)) + (maybe-extend-identifier environment id rest loop)) (('(whitespace "\n") rest ...) (cons '(whitespace "\n") (loop (bump-line environment) rest))) @@ -558,11 +761,6 @@ -(define (comment->whitespace expr) - (match expr - (('comment _) '(whitespace " ")) - (other other))) - (define (read-file path) (call-with-input-file path (@ (ice-9 rdelim) read-string))) -- cgit v1.2.3