diff options
Diffstat (limited to 'module/c/preprocessor.scm')
-rw-r--r-- | module/c/preprocessor.scm | 394 |
1 files changed, 394 insertions, 0 deletions
diff --git a/module/c/preprocessor.scm b/module/c/preprocessor.scm new file mode 100644 index 00000000..49ecfa27 --- /dev/null +++ b/module/c/preprocessor.scm @@ -0,0 +1,394 @@ +(define-module (c preprocessor) + :use-module (srfi srfi-1) + :use-module (srfi srfi-9 gnu) + :use-module (ice-9 rdelim) + :use-module (ice-9 regex) + :use-module (hnh util object) + + :use-module (hnh util) + :use-module (hnh util object) + ) + +(define (read-lines port) + (let loop ((done '())) + (let ((line (read-line port))) + (if (eof-object? line) + (reverse done) + (loop (cons line done)))))) + +;; The source line of a give readen line +(define line-number (make-object-property)) +;; The source file of a given readen line +(define line-file (make-object-property)) + + +(define (mark-with-property! items property property-value) + (for-each (lambda (item) (set! (property item) property-value)) + items)) + +(define trigraph-rx (make-regexp "??([=()/'<>!-])")) +(define (expand-trigraphs line) + (regexp-substitute/global + #f trigraph-rx + line + 'pre (lambda (m) (case (string-ref (match:substring m 1) 1) + ((#\=) "#") + ((#\() "[") + ((#\)) "]") + ((#\/) "\\") + ((#\') "^") + ((#\<) "{") + ((#\>) "}") + ((#\!) "|") + ((#\-) "~"))) + 'post)) + +(define (number-lines lines) + (for-each (lambda (line number) + (set! (line-number line) number)) + lines + (iota (length lines) 1)) + lines) + +;; Should this line be merged with the next +(define (line-continued? line) + (case (string-length line) + ((0) #f) + ((1) (string=? "\\" line)) + (else + (let ((len (string-length line))) + ;; TODO can extra backslashes change this? + (and (char=? #\\ (string-ref line (- len 1))) + (not (char=? #\\ (string-ref line (- len 2))))))))) + +(define (transfer-line-number to from) + (set! (line-number to) (line-number from)) + to) + +;; Merge two lines, assuming that upper ends with a backslash +(define (merge-lines upper lower) + (let ((new-string (string-append (string-drop-right upper 1) lower))) + (transfer-line-number new-string upper) + new-string)) + +(define (fold-lines lines) + (fold-right (lambda (line done) + (if (line-continued? line) + (cons (merge-lines line (car done)) (cdr done)) + (cons line done))) + '() + lines)) + + +(define comment-rx (make-regexp "(//|/[*]|[*]/)")) + +(define (strip-comments lines) + (let loop ((in-comment #f) (lines lines) (done '())) + (if (null? lines) + (reverse done) + (let ((line (car lines))) + (cond ((regexp-exec comment-rx line) + => (lambda (m) + (format (current-output-port) "~s ~s substr = ~s~%" in-comment (line-number line) (match:substring m)) + (cond ((and in-comment (string=? "*/" (match:substring m))) + (loop #f (cons (transfer-line-number (match:suffix m) line) + (cdr lines)) + done)) + (in-comment (loop #t (cdr lines) done)) + ((string=? "//" (match:substring m)) + (loop #f (cdr lines) (cons (transfer-line-number (match:prefix m) line) + done))) + ((string=? "/*" (match:substring m)) + (loop #t (cons (transfer-line-number (match:suffix m) line) (cdr lines)) done)) + (else + (scm-error 'cpp-error "strip-comments" + "Unexpected */ in file ~a on line ~a" + (list (line-file line) (line-number line)) + #f))))) + (else (loop in-comment (cdr lines) (cons line done)))))))) + + +(define-immutable-record-type <preprocessor-directive> + (make-preprocessor-directive type body) + proprocessor-directive? + (type directive-type) + (body directive-body)) + +(define cpp-directive-rx (make-regexp "\\s*#\\s*((\\w+)(.*))?")) +(define (preprocessor-directive? line) + (cond ((regexp-exec cpp-directive-rx line) + => (lambda (m) + (if (match:substring m 2) + (make-preprocessor-directive + (string->symbol (match:substring m 2)) + (string-trim-both (match:substring m 3) char-set:whitespace)) + 'sort-of))) + (else #f))) + +;; defined + +;; TODO _Pragma + + +(define (expand-function-line-macro environment macro . params) + (expand-macro environment (apply-macro macro (map (lambda (param) (expand-macro environment param)) params)))) + +;; (define (environment-ref )) + +(define (list-of? lst predicate) + (every predicate lst)) + + +(define-type (cpp-environment) + (cpp-if-status type: (list-of? (lambda (x) (memv x '(outside active-if inactive-if)))) + ;; type: (list-of? (memv '(outside active-if inactive-if))) + default: '(outside)) + (cpp-variabes type: hash-table? default: (make-hash-table))) + +(define (make-environment) (cpp-environment)) + +(define (in-envirnoment? environment key) + (hash-get-handle (cpp-variables environment) key)) + +(define (remove-identifier! environment key) + (hash-remove! (cpp-variables environment) key) + environment) + +(define (add-identifier! environment key value) + (assert (string? key)) + (assert (macro? value)) + (hash-set! (cpp-variables environment) key value) + environment) + +;; Parantheses when defining macro +(define (parse-parameter-string string) + (map string-trim-both + (string-split (string-trim-both string (char-set #\( #\))) + #\,))) + + +(define-type (object-macro) + (body type: string?)) + +(define-type (function-macro) + (formals type: (list-of? string?)) + (body type: string?)) + +(define (macro? x) + (or (object-macro? x) + (function-macro? x))) + +;; The interesting part +;; environment, (list string) -> (values (list string) (list strings)) +;; multiple lines since since a function-like macro can extend over multiple lines +(define (expand-macros environment strings) + ) + + +(define (crash-if-not-if body guilty) + (scm-error 'cpp-error guilty + "else, elif, and endif invalid outside if scope: ~s~%file: ~s line: ~s" + (list body (line-file body) (line-number body)))) + +;; (environment, lines) -> environment x lines +(define (parse-directives environment lines) + (let loop ((environment environment) (lines lines) (done '())) + (let* ((line (car line)) + (directive? (preprocessor-directive? line))) + (case directive? + ((#f) ; regular line + (loop environment (cdr lines) + ;; TODO this doesn't work, since parse-macros works on multiple lines + (cons (parse-macros environment (car lines)) done))) + ((sort-of) ; a no-op directive + (loop environment (cdr lines) done)) + (else ; an actual directive + (case (car (cpp-if-status environment)) + ((outside) + (case (directive-type m) + ((ifndef endif else) + (scm-error 'cpp-error "parse-directives" + "Unexpected directive: ~s" + (list line) #f)) + (else ; inside if, ifelse or else + ;; outside active-if inactive-if + (case (directive-type m) + ;; stack ending directives + ((endif) + (case (car (cpp-if-status environment)) + ((outside) (crash-if-not-if (directive-body m) "endif")) + (else (loop (modify environment cpp-if-status cdr) + (cdr lines) + done)))) + + ;; stack nudging directives + ((else) + (case (car (cpp-if-status environment)) + ((outside) (crash-if-not-if (directive-body m) "else")) + (else (loop (modify environment (lens-compose cpp-if-status car*) + (lambda (old) + (case old + ((active-if) 'inactive-if) + ((inactive-if) 'active-if)))) + (cdr lines) + done)))) + ((elif) + (case (car (cpp-if-status environment)) + ((outside) (crash-if-not-if (directive-body m) "elif")) + (else ;; TODO + ) + )) + + ;; stack creating directives + ;; on inactive-if each creates a new frame, which also is inactive + ((ifndef) + (case (car (cpp-if-status environment)) + ((inactive-if) (loop (modify environment cpp-if-status + xcons 'inactive-if) + (cdr lines) + done)) + (else (loop (modify environment cpp-if-status + xcons (if (in-environment? environment (directive-body directive?)) + 'inactive-if 'active-if)) + (cdr lines) + done)))) + + ((ifdef) + (case (car (cpp-if-status environment)) + ((inactive-if) (loop (modify environment cpp-if-status + xcons 'inactive-if) + (cdr lines) + done)) + (else + (loop (modify environment cpp-if-status + xcons (if (in-environment? environment (directive-body directive?)) + 'active-if 'inactive-if)) + (cdr lines) + done)))) + + ((if) + (case (car (cpp-if-status environment)) + ((inactive-if) (loop (modify environment cpp-if-status + xcons 'inactive-if) + (cdr lines) + done)) + (else ;; TODO + ))) + + + ;; other directives + ((include) (cond ((string-match "[<\"](.*)" + => (lambda (m) + (let ((fileneme (string-drop-right (directive-body m) 1))) + (case (string-ref (match:substring m 1) 0) + ;; TODO include-path + ((#\<) (handle-file environment filename)) + ((#\") (handle-file environment filename)))))) + (else (scm-error 'cpp-error "parse-directives" + "Invalid include" + '() #f)))) + ((define) + ;; TODO what are valid names? + (cond ((string-match "^(\\w+)([(][^)]*[)])?\\s+(.*)" (directive-body directive?)) + => (lambda (m) + (loop (let ((macro-body (string-trim-both (match:substring m 3)))) + (add-identifier! + environment + (match:substring m 1) + (cond ((match:substring m 2) + => (lambda (parameter-string) + (function-macro + formals: (parse-parameter-string parameter-string) + body: macro-body))) + (else (object-macro body: macro-body))))) + (cdr lines) + done))) + (else (scm-error 'cpp-error "parse-directives" + "Invalid #define line, ~s" + (list (directive-body directive?)) + #f)))) + + ((undef) + (case (car (cpp-if-status environment)) + ((inactive-if) (loop environment (cdr lines) done)) + (else (loop (remove-identifier environment (directive-body directive?)) + (cdr lines) + done)))) + + ((line) + (case (car (cpp-if-status environment)) + ((inactive-if) (loop environment (cdr lines) done)) + ;; TODO add first-run parameter to loop, in case expand-macros still return something invalid + (else (let parse-line-directive ((tokens (string-tokenize (directive-body directive?)))) + (cond ((= 1 (length tokens)) + ;; TODO parse token + (if (integer? (car tokens)) + ;; TODO update current line + (loop environment (cdr lines) done) + (parse-line-directive (expand-macros environment (directive-body directive?))))) + ((= 2 (length tokens)) + ;; TODO parse tokens + (if (and (integer? (car tokens)) + (string-literal? (cadr tokens))) + ;; TODO update current line and file + (loop environment (cdr lines) done) + (parse-line-directive (expand-macros environment (directive-body directive?))))) + (else (parse-line-directive (expand-macros environment (directive-body directive?))))))))) + + ((error) + (throw 'cpp-error-directive + (directive-body directive?))) + + ((warning) + (format (current-error-port) "#warning ~a~%" + (directive-body directive?)) + (loop environment (cdr lines) done)) + + ((pragma) + (format (current-error-port) + "PRAGMA: ~s~%" (directive-body directive?)) + (loop environment (cdr lines) done)) + + ((ident sccs) + (format (current-error-port) + "IDENT: ~s~%" (directive-body directive?)) + (loop environment (cdr lines) done)) + + (else + (scm-error 'cpp-error "parse-directives" + "Unknown pre-processor directive: ~s" + (list line) #f) + ))))))))) + )) + + +(define* (writeln expr optional: (port (current-output-port))) + (write expr port) + (newline port)) + +(define (handle-lines environment lines) + (parse-directive environment + (compose strip-comments fold-lines number-lines))) + + ;; parse-directives environment + +;; Return a number of lines +(define (read-file file-path) + (define raw-lines (call-with-input-file file-path read-lines)) + (mark-with-property! line line-file file-path) + (handle-lines raw-lines)) + + +;; pre defined macros +;; see info manual for cpp 3.7.1 Standard Predefined Macros +;; __FILE__ +;; __LINE__ +;; __DATE__ "Feb 12 1996" +;; __TIME__ "23:59:01" +;; __STDC__ 1 +;; __STDC_VERSION__ 201112L +;; __STDC_HOSTED__ 1 + +;; __cplusplus +;; __OBJC__ +;; __ASSEMBLER__ |