From cba504b509cd59f376063f6e590362b197147a2c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Hugo=20H=C3=B6rnquist?= Date: Sun, 3 Jul 2022 12:36:35 +0200 Subject: Major work. --- module/c/compiler.scm | 65 +++ module/c/cpp-environment.scm | 137 +++++++ module/c/cpp-environment/function-like-macro.scm | 17 + module/c/cpp-environment/internal-macro.scm | 11 + module/c/cpp-environment/object-like-macro.scm | 13 + module/c/eval2.scm | 20 + module/c/lex2.scm | 323 +++++++++++++++ module/c/line-fold.scm | 29 ++ module/c/preprocessor.scm | 394 ++++++++++++++++++ module/c/preprocessor2.scm | 496 +++++++++++++++++++++++ module/c/trigraph.scm | 24 ++ tests/test/cpp/lex2.scm | 64 +++ tests/test/cpp/preprocessor2.scm | 29 ++ 13 files changed, 1622 insertions(+) create mode 100644 module/c/compiler.scm create mode 100644 module/c/cpp-environment.scm create mode 100644 module/c/cpp-environment/function-like-macro.scm create mode 100644 module/c/cpp-environment/internal-macro.scm create mode 100644 module/c/cpp-environment/object-like-macro.scm create mode 100644 module/c/eval2.scm create mode 100644 module/c/lex2.scm create mode 100644 module/c/line-fold.scm create mode 100644 module/c/preprocessor.scm create mode 100644 module/c/preprocessor2.scm create mode 100644 module/c/trigraph.scm create mode 100644 tests/test/cpp/lex2.scm create mode 100644 tests/test/cpp/preprocessor2.scm diff --git a/module/c/compiler.scm b/module/c/compiler.scm new file mode 100644 index 00000000..121e6c07 --- /dev/null +++ b/module/c/compiler.scm @@ -0,0 +1,65 @@ +(define-module (c compiler) + :use-module ((c lex2) :select (lex)) + :use-module ((c trigraph) :select (replace-trigraphs)) + :use-module ((c line-fold) :select (fold-lines)) + :use-module (hnh util) + :export (run-compiler)) + +(define (comment->whitespace expr) + (match expr + (('comment _) '(whitespace " ")) + (other other))) + +" +#define __STDC__ 1 +#define __STDC_HOSTED__ 1 +#define __STDC_VERSION__ 201112L +" + +(define now (localtime (current-time))) +(define default-macros + (list + ;; 6.10.8 + (object-like-macro + identifier: "__STDC__" + body: '(preprocessing-token (pp-number "1"))) + (object-like-macro + identifier: "__STDC_HOSTED__" + body: '(preprocessing-token (pp-number "1"))) + (object-like-macro + identifier: "__STDC_VERSION__" + body: '(preprocessing-token (pp-number "201112L"))) + (object-like-macro + identifier: "__DATE__" + ;; TODO format should always be in + ;; english, and not tranlated + body: `(preprocessing-token (string-literal ,(strftime "%b %_d %Y" now)))) + (object-like-macro + identifier: "__TIME__" + body: (preprocessing-token + (string-literal + ,(strftime "%H:%M:%S" now)))))) + +(define environment + (-> (make-environment) + (extend-environment default-macros))) + + +(define (read-file path) + (call-with-input-file path read-string)) + +;;; 5.1.11.2 Translation phases + + + +(define (run-compiler path) + (define environment (enter-file (make-environment) path)) + (-> (load-and-tokenize-file path) + (handle-preprocessing-tokens environment)) +;;; 5. (something with character sets) +;;; 6. concatenation of string literals +;;; 7. Whitespace tokens are discarded, each preprocessing token is converted into a token + ;; 6.4 paragraph 2 + ;; Each preprocessing toket thas is converted to a token shall have the lexcal form of a keyword, an identifier, a constant, a string literal, or a puncturtor +;;; 8. All external objects and functions are resolved + ) diff --git a/module/c/cpp-environment.scm b/module/c/cpp-environment.scm new file mode 100644 index 00000000..20589b8e --- /dev/null +++ b/module/c/cpp-environment.scm @@ -0,0 +1,137 @@ +(define-module (c cpp-environment) + :use-module (srfi srfi-1) + :use-module (srfi srfi-88) + :use-module (ice-9 hash-table) + :use-module (hnh util object) + :use-module (hnh util lens) + :use-module ((c cpp-environment function-like-macro) :prefix #{fun:}#) + :use-module ((c cpp-environment object-like-macro) :prefix #{obj:}#) + :use-module ((c cpp-environment internal-macro) :prefix #{int:}#) + :export ( + + macro-identifier + macro-body + macro-identifier-list + macro-variadic? + macro? + + enter-active-if + enter-inactive-if + leave-if + + enter-file + leave-file + bump-line + current-line + current-file + + function-macro? + object-macro? + internal-macro? + + cpp-environment + cpp-if-status cpp-variables + + make-environment in-environment? + remove-identifier! add-identifier! + get-identifier + extend-environment + + )) + +(define (macro-identifier x) + (define identifier + (cond ((obj:object-like-macro? x) obj:identifier) + ((fun:function-like-macro? x) fun:identifier) + ((int:internal-macro? x) int:identifier))) + (identifier x)) + + +(define (macro-body macro) + (define body-proc + (cond ((obj:object-like-macro? macro) obj:body) + ((fun:function-like-macro? macro) fun:body) + ((int:internal-macro? macro) int:body))) + (body-proc macro)) + +(define macro-identifier-list fun:identifier-list) +(define macro-variadic? fun:variadic?) + +(define function-macro? fun:function-like-macro?) +(define object-macro? obj:object-like-macro?) +(define internal-macro? int:internal-macro?) + +(define (macro? x) + (or (obj:object-like-macro? x) + (fun:function-like-macro? x) + (int:internal-macro? x))) + +(define-type (cpp-environment) + (cpp-if-status type: (list-of (memv '(outside active-if inactive-if))) + default: '(outside)) + (cpp-variabes type: hash-table? default: (make-hash-table)) + (cpp-file-stack type: list? + default: '())) + + + +(define (enter-active-if environment) + (modify environment cpp-if-status xcons 'active-if)) + +(define (enter-inactive-if environment) + (modify environment cpp-if-status xcons 'inactive-if)) + +(define (leave-if environment) + (modify environment cpp-if-status cdr)) + + + +(define (enter-file environment filename) + (modify environment cpp-file-stack xcons (cons filename 1))) + +(define (leave-file environment) + (modify environment cpp-file-stack cdr)) + +(define current-line (compose-lenses cpp-file-stack car* cdr*)) + +(define current-file (compose-lenses cpp-file-stack car* car*)) + +(define* (bump-line environment optional: (count 1)) + (modify environment current-line + count)) + + + + +(define (make-environment) (cpp-environment)) + +(define (in-envirnoment? environment key) + (hash-get-handle (cpp-variables environment) key)) + +(define (remove-identifier! environment key) + (hash-remove! (cpp-variables environment) key) + environment) + +(define (add-identifier! environment key value) + (unless (string? key) + (scm-error 'wrong-type-arg "add-identifier!" + "Key must be a string, got: ~s" + (list key) #f)) + (unless (macro? key) + (scm-error 'wrong-type-arg "add-identifier!" + "Value must be a macro, got: ~s" + (list value) #f)) + (hash-set! (cpp-variables environment) key value) + environment) + +(define (get-identifier environment key) + (hash-ref (cpp-variables environment) key)) + +(define (clone-hash-table ht) + (alist->hash-table (hash-map->list cons ht))) + +(define (extend-environment environment macros) + (let ((env (modify environment cpp-variables clone-hash-table))) + (fold (lambda (pair m) + (add-identifier! env (macro-identifier m) m )) + env macros))) + diff --git a/module/c/cpp-environment/function-like-macro.scm b/module/c/cpp-environment/function-like-macro.scm new file mode 100644 index 00000000..0a0611e3 --- /dev/null +++ b/module/c/cpp-environment/function-like-macro.scm @@ -0,0 +1,17 @@ +(define-module (c cpp-environment function-like-macro) + :use-module (hnh util object) + :export (function-like-macro + function-like-macro? + identifier + identifier-list + body + variadic?)) + +(define-type (function-like-macro) + (identifier type: string?) + (identifier-list type: (list-of string?)) + ;; TODO import these + (body type: list? ; (list-of (or whitespace-token? preprocessing-token?)) + ) + (variadic? type: boolean? + default: #f)) diff --git a/module/c/cpp-environment/internal-macro.scm b/module/c/cpp-environment/internal-macro.scm new file mode 100644 index 00000000..3c946738 --- /dev/null +++ b/module/c/cpp-environment/internal-macro.scm @@ -0,0 +1,11 @@ +(define-module (c cpp-environment internal-macro) + :use-module (hnh util object) + :export (internal-macro + internal-macro? + identifier body)) + +(define-type (internal-macro) + (identifier type: string?) + (body type: procedure? + ;; Arity 2 + )) diff --git a/module/c/cpp-environment/object-like-macro.scm b/module/c/cpp-environment/object-like-macro.scm new file mode 100644 index 00000000..5d4c8810 --- /dev/null +++ b/module/c/cpp-environment/object-like-macro.scm @@ -0,0 +1,13 @@ +(define-module (c cpp-environment object-like-macro) + :use-module (hnh util object) + :export (object-like-macro + object-like-macro? + identifier + body)) + + +(define-type (object-like-macro) + (identifier type: string?) + ;; TODO import these + (body type: list? ; (list-of (or whitespace-token? preprocessing-token?)) + )) diff --git a/module/c/eval2.scm b/module/c/eval2.scm new file mode 100644 index 00000000..d58f20bf --- /dev/null +++ b/module/c/eval2.scm @@ -0,0 +1,20 @@ +(define-module (c eval2) + :use-module ((hnh util) :select (->)) + :export (C-TRUE + C-FALSE + boolean->c-boolean + c-boolean->boolean + c-not)) + + +(define C-TRUE 1) +(define C-FALSE 0) + +(define (boolean->c-boolean bool) + (if bool C-TRUE C-FALSE)) + +(define (c-boolean->boolean bool) + (not (zero? bool))) + +(define (c-not b) + (-> b c-boolean->boolean not boolean->c-boolean)) diff --git a/module/c/lex2.scm b/module/c/lex2.scm new file mode 100644 index 00000000..23fa9da4 --- /dev/null +++ b/module/c/lex2.scm @@ -0,0 +1,323 @@ +(define-module (c lex2) + :use-module (ice-9 peg) + :export (lex)) + +;;; A.1 Lexical grammar +;;; A.1.1 Lexical elements + +;; (6.4) +(define-peg-pattern token all + (or keyword + identifier + constant + string-literal + punctuator + )) + +;; (6.4) +(define-peg-pattern preprocessing-token all + ;; string literal moved before header-name since string literals + ;; otherwise became q-strings + (or string-literal + header-name + identifier + pp-number + character-constant + punctuator + ;; Each non-white-space character that cannot be one of the above + )) + +;;; A.1.2 Keywords + +;; (6.4.1) +(define-peg-pattern keyword all + (or "auto" "break" "case" "char" "const" "continue" "default" + "do" "double" "else" "enum" "extern" "float" "for" "goto" + "if" "inline" "int" "long" "register" "restrict" "return" + "short" "signed" "sizeof" "static" "struct" "switch" + "typedef" "union" "unsigned" "void" "volatile" "while" + "_Alignas" "_Alignof" "_Atomic" "_Bool" "_Complex" + "_Generic" "_Imaginary" "_Noreturn" "_Static_assert" + "_Thread_local")) + +;;; A.1.3 Identifiers + +;; (6.4.2.1) +(define-peg-pattern identifier all + (and identifier-nondigit (* (or identifier-nondigit digit)))) + +;; (6.4.2.1) +(define-peg-pattern identifier-nondigit body + (or nondigit + universal-character-name + ;; TODO other implementation-defined characters + )) + +;; (6.4.2.1) +(define-peg-pattern nondigit body + (or "_" + (range #\A #\Z) + (range #\a #\z))) + +;; (6.4.2.1) +(define-peg-pattern digit body + (range #\0 #\9)) + +;;; A.1.4 Universal character names + +;; (6.4.3) +(define-peg-pattern universal-character-name all + (or (and "\\u" hex-quad) + (and "\\U" hex-quad hex-quad))) + +;; (6.4.3) +(define-peg-pattern hex-quad body + (and hexadecimal-digit hexadecimal-digit + hexadecimal-digit hexadecimal-digit)) + +;;; A.1.5 Constants + +;; (6.4.4) +(define-peg-pattern constant all + ;; Int and float swapped from standard since we need to try parsing + ;; the floats beforehand + (or floating-constant + integer-constant + enumeration-constant + character-constant)) + +;; (6.4.4.1) +(define-peg-pattern integer-constant all + (and (or decimal-constant + octal-constant + hexadecimal-constant) + integer-suffix)) + +;; (6.4.4.1) +(define-peg-pattern decimal-constant all + (and nonzero-digit + (+ digit))) + +;; (6.4.4.1) +(define-peg-pattern octal-constant all + (+ octal-digit)) + +;; (6.4.4.1) +(define-peg-pattern hexadecimal-constant all + (and hexadecimal-prefix (+ hexadecimal-digit))) + +;; (6.4.4.1) +(define-peg-pattern hexadecimal-prefix none + (or "0x" "0X")) + +;; (6.4.4.1) +(define-peg-pattern nonzero-digit body + (range #\1 #\9)) + +;; (6.4.4.1) +(define-peg-pattern octal-digit body + (range #\0 #\7)) + +;; (6.4.4.1) +(define-peg-pattern hexadecimal-digit body + (or (range #\0 #\9) + (range #\a #\f) + (range #\A #\F))) + +;; (6.4.4.1) +(define-peg-pattern integer-suffix all + (or (and unsigned-suffix (? long-suffix)) + (and long-suffix (? unsigned-suffix)))) + +;; (6.4.4.1) +;; This is a merger of long-suffix and long-long-suffix +(define-peg-pattern long-suffix body + (or "l" "L" "ll" "LL")) + +;; (6.4.4.1) +(define-peg-pattern unsigned-suffix body + (or "u" "U")) + +;; (6.4.4.2) +(define-peg-pattern floating-constant all + (or decimal-floating-constant + hexadecimal-floating-constant)) + +;; (6.4.4.2) +(define-peg-pattern floating-constant all + (or (and fractional-constant (? exponent-part) (? floating-suffix)) + (and digit-sequence exponent-part (? floating-suffix)))) + +;; (6.4.4.2) +(define-peg-pattern hexadecimal-floating-constant all + (and hexadecimal-prefix + (or hexadecimal-fractional-constant + hexadecimal-digit-sequence) + binary-exponent-part + (? floating-suffix))) + +;; (6.4.4.2) +(define-peg-pattern fractional-constant all + (or (and (? digit-sequence) "." digit-sequence) + (and digit-sequence "."))) + +;; (6.4.4.2) +(define-peg-pattern exponent-part all + (and (or "e" "E") (? sign) digit-sequence)) + +;; (6.4.4.2) +(define-peg-pattern sign all + (or "+" "-")) + +;; (6.4.4.2) +(define-peg-pattern digit-sequence body + (+ digit)) + +;; (6.4.4.2) +(define-peg-pattern hexadecimal-fractional-constant all + (or (and (? hexadecimal-digit-sequence) "." hexadecimal-digit-sequence) + (and hexadecimal-digit-sequence "."))) + +;; (6.4.4.2) +(define-peg-pattern binary-exponent-part all + (and (or "p" "P") + (? sign) + digit-sequence)) + +;; (6.4.4.2) +(define-peg-pattern hexadecimal-digit-sequence body + (+ hexadecimal-digit)) + +;; (6.4.4.2) +(define-peg-pattern floating-suffix all + (or "f" "l" "F" "L")) + +;; (6.4.4.3) +(define-peg-pattern enumeration-constant all + identifier) + +(define-peg-pattern character-prefix all + (or "L" "u" "U")) + +;; (6.4.4.4) +(define-peg-pattern character-constant all + (and (? character-prefix) + (ignore "'") + (+ c-char) + (ignore "'"))) + +;; (6.4.4.4) +(define-peg-pattern c-char body + (or (and (not-followed-by (or "'" "\\" "\n")) peg-any) + escape-sequence)) + +;; (6.4.4.4) +(define-peg-pattern escape-sequence all + (or simple-escape-sequence + octal-escape-sequence + hexadecimal-escape-sequence + universal-character-name)) + +;; (6.4.4.4) +(define-peg-pattern simple-escape-sequence all + (and (ignore "\\") (or "'" "\"" "?" "\\" + "a" "b" "f" "n" "r" "t" "v"))) + +;; (6.4.4.4) +(define-peg-pattern octal-escape-sequence all + (and (ignore "\\") octal-digit (? octal-digit) (? octal-digit))) + +;; (6.4.4.4) +(define-peg-pattern hexadecimal-escape-sequence all + (and (ignore "\\x") (+ hexadecimal-digit))) + +;; A.1.6 String literals + +;; (6.4.5) +(define-peg-pattern string-literal all + (and (? encoding-prefix) + (ignore "\"") + (* s-char) + (ignore "\""))) + +;; (6.4.5) +(define-peg-pattern encoding-prefix all + (or "u8" "u" "U" "L")) + +;; (6.4.5) +(define-peg-pattern s-char body + (or (and (not-followed-by (or "\"" "\\" "\n")) peg-any) + escape-sequence)) + +;;; A.1.7 + +;; (6.4.6) +(define-peg-pattern punctuator all + (or "[" "]" "(" ")" "{" "}" "." "->" + "++" "--" "&" "*" "+" "-" "~" "!" + "/" "%" "<<" ">>" "<" ">" "<=" ">=" "==" "!=" "^" "|" "&&" "||" + "?" ":" ";" "..." + "=" "*=" "/=" "%=" "+=" "-=" "<<=" ">>=" "&=" "^=" "|=" + "," "#" "##" + "<:" ":>" "<%" "%>" "%:" "%:%:")) + +;;; A.1.8 Header names + +(define-peg-pattern h-string all (+ h-char)) +(define-peg-pattern q-string all (+ q-char)) + +;; (6.4.7) +(define-peg-pattern header-name all + (or (and (ignore "<") h-string (ignore ">")) + (and (ignore "\"") q-string (ignore "\"")))) + +;; (6.4.7) +(define-peg-pattern h-char body + (or (and (not-followed-by (or ">" "\n")) peg-any) + escape-sequence)) + +;; (6.4.7) +(define-peg-pattern q-char body + (or (and (not-followed-by (or "\"" "\n")) peg-any) + escape-sequence)) + +;;; A.1.9 Preprocessing numbers + +;; (6.4.8) +(define-peg-pattern pp-number all + (and (? ".") digit + (* (or digit + identifier-nondigit + (and (or "e" "E" "p" "P") + sign) + ".")))) + + + +(define-peg-pattern whitespace all + (or "\t" "\n" "\v" "\f" " " + ;; "\r" + )) + +(define-peg-pattern block-comment body + (and (ignore "/*") + (* (and (not-followed-by "*/") + peg-any)) + (ignore "*/"))) + +(define-peg-pattern line-comment body + (and (ignore "//") + (* (and (not-followed-by "\n") + peg-any)))) + +(define-peg-pattern comment all + (or line-comment block-comment)) + +(define-peg-pattern preprocessing-tokens body + (* (or whitespace + comment + preprocessing-token))) + + +(define (lex string) + (peg:tree (match-pattern preprocessing-tokens string))) diff --git a/module/c/line-fold.scm b/module/c/line-fold.scm new file mode 100644 index 00000000..c61c2c70 --- /dev/null +++ b/module/c/line-fold.scm @@ -0,0 +1,29 @@ +(define-module (c line-fold) + :use-module (srfi srfi-1) + :use-module (srfi srfi-71) + :export (fold-lines)) + +(define (line-continued? line) + (and (not (string-null? line)) + (char=? #\\ (string-ref line (1- (string-length line)))))) + +(define (strip-backslash line) + (string-drop-right line 1)) + +(define (fold-lines string) + (with-output-to-string + (lambda () + (let loop ((lines (string-split string #\newline))) + (cond ((null? lines) 'NOOP) + ((null? (cdr lines)) + ;; TODO error message if last character is a backslash + (display (car lines)) + (newline)) + (else + (let ((to-merge remaining (span line-continued? lines))) + (for-each display (map strip-backslash to-merge)) + (display (car remaining)) + (newline) + (for-each (lambda _ (newline)) + (iota (length to-merge))) + (loop (cdr remaining))))))))) diff --git a/module/c/preprocessor.scm b/module/c/preprocessor.scm new file mode 100644 index 00000000..49ecfa27 --- /dev/null +++ b/module/c/preprocessor.scm @@ -0,0 +1,394 @@ +(define-module (c preprocessor) + :use-module (srfi srfi-1) + :use-module (srfi srfi-9 gnu) + :use-module (ice-9 rdelim) + :use-module (ice-9 regex) + :use-module (hnh util object) + + :use-module (hnh util) + :use-module (hnh util object) + ) + +(define (read-lines port) + (let loop ((done '())) + (let ((line (read-line port))) + (if (eof-object? line) + (reverse done) + (loop (cons line done)))))) + +;; The source line of a give readen line +(define line-number (make-object-property)) +;; The source file of a given readen line +(define line-file (make-object-property)) + + +(define (mark-with-property! items property property-value) + (for-each (lambda (item) (set! (property item) property-value)) + items)) + +(define trigraph-rx (make-regexp "??([=()/'<>!-])")) +(define (expand-trigraphs line) + (regexp-substitute/global + #f trigraph-rx + line + 'pre (lambda (m) (case (string-ref (match:substring m 1) 1) + ((#\=) "#") + ((#\() "[") + ((#\)) "]") + ((#\/) "\\") + ((#\') "^") + ((#\<) "{") + ((#\>) "}") + ((#\!) "|") + ((#\-) "~"))) + 'post)) + +(define (number-lines lines) + (for-each (lambda (line number) + (set! (line-number line) number)) + lines + (iota (length lines) 1)) + lines) + +;; Should this line be merged with the next +(define (line-continued? line) + (case (string-length line) + ((0) #f) + ((1) (string=? "\\" line)) + (else + (let ((len (string-length line))) + ;; TODO can extra backslashes change this? + (and (char=? #\\ (string-ref line (- len 1))) + (not (char=? #\\ (string-ref line (- len 2))))))))) + +(define (transfer-line-number to from) + (set! (line-number to) (line-number from)) + to) + +;; Merge two lines, assuming that upper ends with a backslash +(define (merge-lines upper lower) + (let ((new-string (string-append (string-drop-right upper 1) lower))) + (transfer-line-number new-string upper) + new-string)) + +(define (fold-lines lines) + (fold-right (lambda (line done) + (if (line-continued? line) + (cons (merge-lines line (car done)) (cdr done)) + (cons line done))) + '() + lines)) + + +(define comment-rx (make-regexp "(//|/[*]|[*]/)")) + +(define (strip-comments lines) + (let loop ((in-comment #f) (lines lines) (done '())) + (if (null? lines) + (reverse done) + (let ((line (car lines))) + (cond ((regexp-exec comment-rx line) + => (lambda (m) + (format (current-output-port) "~s ~s substr = ~s~%" in-comment (line-number line) (match:substring m)) + (cond ((and in-comment (string=? "*/" (match:substring m))) + (loop #f (cons (transfer-line-number (match:suffix m) line) + (cdr lines)) + done)) + (in-comment (loop #t (cdr lines) done)) + ((string=? "//" (match:substring m)) + (loop #f (cdr lines) (cons (transfer-line-number (match:prefix m) line) + done))) + ((string=? "/*" (match:substring m)) + (loop #t (cons (transfer-line-number (match:suffix m) line) (cdr lines)) done)) + (else + (scm-error 'cpp-error "strip-comments" + "Unexpected */ in file ~a on line ~a" + (list (line-file line) (line-number line)) + #f))))) + (else (loop in-comment (cdr lines) (cons line done)))))))) + + +(define-immutable-record-type + (make-preprocessor-directive type body) + proprocessor-directive? + (type directive-type) + (body directive-body)) + +(define cpp-directive-rx (make-regexp "\\s*#\\s*((\\w+)(.*))?")) +(define (preprocessor-directive? line) + (cond ((regexp-exec cpp-directive-rx line) + => (lambda (m) + (if (match:substring m 2) + (make-preprocessor-directive + (string->symbol (match:substring m 2)) + (string-trim-both (match:substring m 3) char-set:whitespace)) + 'sort-of))) + (else #f))) + +;; defined + +;; TODO _Pragma + + +(define (expand-function-line-macro environment macro . params) + (expand-macro environment (apply-macro macro (map (lambda (param) (expand-macro environment param)) params)))) + +;; (define (environment-ref )) + +(define (list-of? lst predicate) + (every predicate lst)) + + +(define-type (cpp-environment) + (cpp-if-status type: (list-of? (lambda (x) (memv x '(outside active-if inactive-if)))) + ;; type: (list-of? (memv '(outside active-if inactive-if))) + default: '(outside)) + (cpp-variabes type: hash-table? default: (make-hash-table))) + +(define (make-environment) (cpp-environment)) + +(define (in-envirnoment? environment key) + (hash-get-handle (cpp-variables environment) key)) + +(define (remove-identifier! environment key) + (hash-remove! (cpp-variables environment) key) + environment) + +(define (add-identifier! environment key value) + (assert (string? key)) + (assert (macro? value)) + (hash-set! (cpp-variables environment) key value) + environment) + +;; Parantheses when defining macro +(define (parse-parameter-string string) + (map string-trim-both + (string-split (string-trim-both string (char-set #\( #\))) + #\,))) + + +(define-type (object-macro) + (body type: string?)) + +(define-type (function-macro) + (formals type: (list-of? string?)) + (body type: string?)) + +(define (macro? x) + (or (object-macro? x) + (function-macro? x))) + +;; The interesting part +;; environment, (list string) -> (values (list string) (list strings)) +;; multiple lines since since a function-like macro can extend over multiple lines +(define (expand-macros environment strings) + ) + + +(define (crash-if-not-if body guilty) + (scm-error 'cpp-error guilty + "else, elif, and endif invalid outside if scope: ~s~%file: ~s line: ~s" + (list body (line-file body) (line-number body)))) + +;; (environment, lines) -> environment x lines +(define (parse-directives environment lines) + (let loop ((environment environment) (lines lines) (done '())) + (let* ((line (car line)) + (directive? (preprocessor-directive? line))) + (case directive? + ((#f) ; regular line + (loop environment (cdr lines) + ;; TODO this doesn't work, since parse-macros works on multiple lines + (cons (parse-macros environment (car lines)) done))) + ((sort-of) ; a no-op directive + (loop environment (cdr lines) done)) + (else ; an actual directive + (case (car (cpp-if-status environment)) + ((outside) + (case (directive-type m) + ((ifndef endif else) + (scm-error 'cpp-error "parse-directives" + "Unexpected directive: ~s" + (list line) #f)) + (else ; inside if, ifelse or else + ;; outside active-if inactive-if + (case (directive-type m) + ;; stack ending directives + ((endif) + (case (car (cpp-if-status environment)) + ((outside) (crash-if-not-if (directive-body m) "endif")) + (else (loop (modify environment cpp-if-status cdr) + (cdr lines) + done)))) + + ;; stack nudging directives + ((else) + (case (car (cpp-if-status environment)) + ((outside) (crash-if-not-if (directive-body m) "else")) + (else (loop (modify environment (lens-compose cpp-if-status car*) + (lambda (old) + (case old + ((active-if) 'inactive-if) + ((inactive-if) 'active-if)))) + (cdr lines) + done)))) + ((elif) + (case (car (cpp-if-status environment)) + ((outside) (crash-if-not-if (directive-body m) "elif")) + (else ;; TODO + ) + )) + + ;; stack creating directives + ;; on inactive-if each creates a new frame, which also is inactive + ((ifndef) + (case (car (cpp-if-status environment)) + ((inactive-if) (loop (modify environment cpp-if-status + xcons 'inactive-if) + (cdr lines) + done)) + (else (loop (modify environment cpp-if-status + xcons (if (in-environment? environment (directive-body directive?)) + 'inactive-if 'active-if)) + (cdr lines) + done)))) + + ((ifdef) + (case (car (cpp-if-status environment)) + ((inactive-if) (loop (modify environment cpp-if-status + xcons 'inactive-if) + (cdr lines) + done)) + (else + (loop (modify environment cpp-if-status + xcons (if (in-environment? environment (directive-body directive?)) + 'active-if 'inactive-if)) + (cdr lines) + done)))) + + ((if) + (case (car (cpp-if-status environment)) + ((inactive-if) (loop (modify environment cpp-if-status + xcons 'inactive-if) + (cdr lines) + done)) + (else ;; TODO + ))) + + + ;; other directives + ((include) (cond ((string-match "[<\"](.*)" + => (lambda (m) + (let ((fileneme (string-drop-right (directive-body m) 1))) + (case (string-ref (match:substring m 1) 0) + ;; TODO include-path + ((#\<) (handle-file environment filename)) + ((#\") (handle-file environment filename)))))) + (else (scm-error 'cpp-error "parse-directives" + "Invalid include" + '() #f)))) + ((define) + ;; TODO what are valid names? + (cond ((string-match "^(\\w+)([(][^)]*[)])?\\s+(.*)" (directive-body directive?)) + => (lambda (m) + (loop (let ((macro-body (string-trim-both (match:substring m 3)))) + (add-identifier! + environment + (match:substring m 1) + (cond ((match:substring m 2) + => (lambda (parameter-string) + (function-macro + formals: (parse-parameter-string parameter-string) + body: macro-body))) + (else (object-macro body: macro-body))))) + (cdr lines) + done))) + (else (scm-error 'cpp-error "parse-directives" + "Invalid #define line, ~s" + (list (directive-body directive?)) + #f)))) + + ((undef) + (case (car (cpp-if-status environment)) + ((inactive-if) (loop environment (cdr lines) done)) + (else (loop (remove-identifier environment (directive-body directive?)) + (cdr lines) + done)))) + + ((line) + (case (car (cpp-if-status environment)) + ((inactive-if) (loop environment (cdr lines) done)) + ;; TODO add first-run parameter to loop, in case expand-macros still return something invalid + (else (let parse-line-directive ((tokens (string-tokenize (directive-body directive?)))) + (cond ((= 1 (length tokens)) + ;; TODO parse token + (if (integer? (car tokens)) + ;; TODO update current line + (loop environment (cdr lines) done) + (parse-line-directive (expand-macros environment (directive-body directive?))))) + ((= 2 (length tokens)) + ;; TODO parse tokens + (if (and (integer? (car tokens)) + (string-literal? (cadr tokens))) + ;; TODO update current line and file + (loop environment (cdr lines) done) + (parse-line-directive (expand-macros environment (directive-body directive?))))) + (else (parse-line-directive (expand-macros environment (directive-body directive?))))))))) + + ((error) + (throw 'cpp-error-directive + (directive-body directive?))) + + ((warning) + (format (current-error-port) "#warning ~a~%" + (directive-body directive?)) + (loop environment (cdr lines) done)) + + ((pragma) + (format (current-error-port) + "PRAGMA: ~s~%" (directive-body directive?)) + (loop environment (cdr lines) done)) + + ((ident sccs) + (format (current-error-port) + "IDENT: ~s~%" (directive-body directive?)) + (loop environment (cdr lines) done)) + + (else + (scm-error 'cpp-error "parse-directives" + "Unknown pre-processor directive: ~s" + (list line) #f) + ))))))))) + )) + + +(define* (writeln expr optional: (port (current-output-port))) + (write expr port) + (newline port)) + +(define (handle-lines environment lines) + (parse-directive environment + (compose strip-comments fold-lines number-lines))) + + ;; parse-directives environment + +;; Return a number of lines +(define (read-file file-path) + (define raw-lines (call-with-input-file file-path read-lines)) + (mark-with-property! line line-file file-path) + (handle-lines raw-lines)) + + +;; pre defined macros +;; see info manual for cpp 3.7.1 Standard Predefined Macros +;; __FILE__ +;; __LINE__ +;; __DATE__ "Feb 12 1996" +;; __TIME__ "23:59:01" +;; __STDC__ 1 +;; __STDC_VERSION__ 201112L +;; __STDC_HOSTED__ 1 + +;; __cplusplus +;; __OBJC__ +;; __ASSEMBLER__ diff --git a/module/c/preprocessor2.scm b/module/c/preprocessor2.scm new file mode 100644 index 00000000..19daabfb --- /dev/null +++ b/module/c/preprocessor2.scm @@ -0,0 +1,496 @@ +(define-module (c preprocessor2) + :use-module (srfi srfi-1) + :use-module (srfi srfi-71) + :use-module (srfi srfi-88) + :use-module (ice-9 match) + :use-module (c cpp-environment) + :use-module (c eval2) + :use-module ((c cpp-environment function-like-macro) :select (function-like-macro)) + :use-module ((c cpp-environment object-like-macro) :select (object-like-macro)) + :use-module ((c cpp-environment internal-macro) :select (internal-macro)) + :use-module ((hnh util) :select (->)) + :use-module ((hnh util lens) :select (set)) + :use-module (hnh util path) + :use-module ((c lex2) :select (lex)) + :use-module ((c trigraph) :select (replace-trigraphs)) + :use-module ((c line-fold) :select (fold-lines)) + :export ()) + +(define (tokens-until-eol tokens) + (break (lambda (token) (equal? token '(whitespace "\n"))) + tokens)) + +(define (whitespace-token? token) + (eq? 'whitespace (car token))) + +(define (preprocessing-token? token) + (eq? 'preprocessing-token token)) + +(define (squeeze-whitespace tokens) + (match tokens + ('() '()) + (`((whitespace ,_) (whitespace ,_) ,rest ...) + (squeeze-whitespace (cons '(whitespace " ") rest))) + (`((whitespace ,_) ,rest ...) + (cons '(whitespace " ") (squeeze-whitespace rest))) + ((token rest ...) + (cons token (squeeze-whitespace rest))))) + +(define (stringify-token token) + ;; TODO propperly implement this + `(preprocessing-token + (string-literal ,(with-output-to-string (lambda () (display token)))))) + +(define (stringify-tokens tokens) + (with-output-to-string + (lambda () + (for-each (compose display stringify-token) + (squeeze-whitespace tokens))))) + +;; Expand ## tokens +;; TODO +(define (expand-join macro tokens) + tokens) + +;; expand function like macro +(define (apply-macro environment macro parameters) + (define parameter-map + (if (macro-variadic? macro) + (let ((head rest (split-at parameters (length (macro-identifier-list macro))))) + ;; TODO commas (,) should be interleaved with rest + (cons (cons "__VA_ARGS__" rest) + (map cons (macro-identifier-list macro) head))) + (map cons + (macro-identifier-list macro) + parameters))) + + ;; resolve strigify operators + (define stringify-resolved + (let loop ((tokens (macro-body macro))) + (match tokens + (`((preprocessing-token (punctuator "#")) + (whitespace ,_) ... + (preprocessing-token (identifier ,x)) + ,rest ...) + (unless (member x (macro-identifier-list macro)) + (scm-error 'macro-expand-error "apply-macro" + "'#' is not followed by a macro parameter: ~s" + (list x) #f) + (cons (stringify-tokens (assoc-ref parameter-map x)) + (loop rest)))) + ('() '()) + ((token rest ...) + (cons token (loop rest)))))) + + ;; TODO + ;; - resolve ## + (define resulting-body + (expand-join macro stringify-resolved)) + + ;; - subtitute parameters + ;; TODO what am I doing here? + (expand-macro (-> environment + (extend-environment parameter-map)) + resulting-body)) + + + +;; Expand object-like macro + +;; #define VALUE 10 +;; #define str(x) #x +;; #define OTHER str(VALUE) +;; OTHER +;; ⇒ "VALUE" + +(define (expand-macro environment macro tokens) + (cond ((object-macro? macro) + (values environment (append (macro-body macro) tokens))) + + ((function-macro? macro) + (let ((containing remaining newlines (parse-parameter-list tokens))) + (values (bump-line environment newlines) + ;; Macro output can be macro expanded + ;; TODO self-referential macros? + (append (apply-macro environment macro containing) remaining)))) + + ((internal-macro? macro) + (let ((containing remaining newlines (parse-parameter-list tokens))) + (values (bump-line environment newlines) + (append ((macro-body macro) environment containing) + remaining)))))) + +;; Takes a list of preprocessing tokens, and returns two values +;; if the last token was '...' +;; and a list of strings of all token names +;; Note that this is ONLY #define f(x) forms +;; not usage forms +(define (parse-identifier-list tokens) + (let loop ((tokens (remove whitespace-token? tokens)) (done '())) + (match tokens + ('() (values #f (reverse done))) + + ((`(preprocessing-token (punctuation "..."))) + (values #t (reverse done))) + + ((`(preprocessing-token (identifier ,id)) rest ...) + (loop rest (cons id done))) + + ((`(preprocessing-token (punctuation "...")) rest ...) + (scm-error 'cpp-error "parse-identifier-list" + "'...' only allowed as last argument in identifier list" + '() #f)) + + ((`(preprocessing-token ,other) rest ...) + (scm-error 'cpp-error "parse-identifier-list" + "Unexpected preprocessing-token in identifier list: ~s" + (list other) #f))))) + + +;; returns three values: +;; - a list of tokens where each is a parameter to the function like macro +;; - the remaining tokenstream +;; - how many newlines were encountered +(define (parse-parameter-list tokens) + (let %loop ((depth 0) (newlines 0) (current '()) + (parameters '()) (tokens tokens) (%first-iteration? #t)) + (define* (loop tokens key: + (depth depth) (newlines newlines) + (current current) (parameters parameters)) + (%loop depth newlines current parameters tokens #f)) + (let ((current* (if (zero? depth) + current + (cons (car tokens) current)))) + (match tokens + (`((whitespace "\n") ,rest ...) + (loop rest newlines: (1+ newlines) current: current*)) + (`((whitespace ,_) ,rest ...) + (loop rest current: current*)) + (`((preprocessing-token (punctuator "(")) ,rest ...) + (loop rest depth: (1+ depth) current: current*)) + (`((preprocessing-token (punctuator ")")) ,rest ...) + (if (= 1 depth) + (values (reverse (cons (reverse current) parameters)) + rest + newlines) + (loop rest + depth: (1- depth) + current: current*))) + (`((preprocessing-token (punctuator ",")) ,rest ...) + (if (= 1 depth) + (loop rest + current: '() + parameters: (cons (reverse current) parameters)) + (loop rest current: current*))))))) + + +(define (join-file-line environment) + (define file (current-file environment)) + (define line (current-line environment)) + (extend-environment + environment + ;; 6.10.8 + `(("__FILE__" . (preprocessing-token (string-literal ,file))) + ("__LINE__" . (preprocessing-token (pp-number ,(number->string line))))))) + +(define (c-search-path) (make-parameter (list "." "/usr/include"))) + +;; #include +(define (resolve-h-file string) + (cond ((path-absolute? string) string) + (else + (let ((filename + (find file-exists? + (map (lambda (path-prefix) + (path-append path-prefix string)) + (c-search-path))))) + (if filename filename + (scm-error 'cpp-error "resolve-h-file" + "Can't resolve file: ~s" + (list string) #f)))))) + +;; #include "myheader.h" +(define (resolve-q-file string) + ;; This should always be a fallback (6.10.2, p. 3) + (cond (else (resolve-h-file string)))) + +(define defined-macro + (internal-macro + identifier: "defined" + body: (lambda (environment tokens) + (match tokens + (`((preprocessor-token (identifier ,id))) + `(preprocessor-token (pp-number ,(boolean->c-boolean (in-environment? environment id))))) + (_ (scm-error 'cpp-error "defined" + "Invalid parameter list to `defined': ~s" + (list tokens) #f)))))) + +;; environment, tokens → environment +(define (handle-pragma environment tokens) + (match tokens + (`((preprocessing-token (identifier "STDC")) (whitespace ,_) ... + (preprocessing-token (identifier ,identifier)) (whitespace ,_) ... + (preprocessing-token (identifier ,on-off-switch)) (whitespace ,_) ...) + ;; TODO actually do something with the pragmas (probably just store them in the environment) + (format (current-error-port) + "#Pragma STDC ~a ~a" identifier on-off-switch) + environment) + (_ (format (current-error-port) + "Non-standard #Pragma: ~s~%" tokens) + environment))) + + +;; TODO +;; (define _Pragma-macro +;; (internal-macro +;; identifier: "_Pragma" +;; body: (lambda (environment tokens) +;; ))) + +;; TODO +(define (resolve-constant-expression tokens) + 'TODO + ) + +(define (resolve-token-stream environment tokens) + (let loop ((tokens tokens)) + (match tokens + ('() '()) + (`((preprocessing-token (identifier ,id)) ,rest ...) + (call-with-values (lambda () (maybe-extend-identifier environment id rest)) + (lambda (_ tokens) (loop tokens)))) + (`((whitespace ,_) ,rest ...) + (loop rest)) + ((token rest ...) + (cons token (loop rest)))))) + +;; returns a new environment +;; handle body of #if +;; environment, (list token) → environment +(define (resolve-for-if environment tokens) + (-> (extend-environment environment defined-macro) + (resolve-token-stream tokens) + resolve-constant-expression + c-boolean->boolean + (if (enter-active-if environment) + (enter-inactive-if environment)))) + +;; environment, string, (list token) → environment, (list token) +(define (maybe-extend-identifier environment identifier remaining-tokens) + (cond ((get-identifier environment identifier) + => (lambda (value) (expand-macro (join-file-line environment) value remaining-tokens))) + (else ; It wasn't an identifier, leave it as is + (values environment remaining-tokens)))) + +(define (resolve-and-include-header environment tokens) + (let loop ((%first-time #t) (tokens tokens)) + (match (drop-while whitespace-token? tokens) + ((`(header-name (h-string ,str)) rest ...) + (cond ((remove whitespace-token? rest) + (negate null?) + => (lambda (tokens) + (scm-error 'cpp-error "resolve-and-include-header" + "Unexpected tokens after #include <>: ~s" + (list tokens) #f)))) + (handle-preprocessing-tokens + environment + (-> str resolve-h-file read-file tokenize))) + + ((`(header-name (q-string ,str)) rest ...) + (cond ((remove whitespace-token? rest) + (negate null?) + => (lambda (tokens) + (scm-error 'cpp-error "resolve-and-include-header" + "Unexpected tokens after #include <>: ~s" + (list tokens) + #f)))) + (handle-preprocessing-tokens + environment + (-> str resolve-q-file read-file tokenize))) + + (tokens + (unless %first-time + (scm-error 'cpp-error "resolve-and-include-header" + "Failed parsing tokens: ~s" + (list tokens) #f)) + (loop #f (resolve-token-stream environment tokens)))))) + +;; environment, tokens → environment +(define (handle-line-directive environment tokens*) + (let loop ((%first-time #t) (tokens tokens*)) + (match tokens + (`((preprocessing-token (pp-number ,line)) (whitespace ,_) ... rest ...) + (match rest + (`((preprocessing-token (string-literal ,file)) (whitespace ,_) ...) + (-> environment + (set current-line line) + (set current-file file))) + (`((whitespace ,_) ...) + (set environment current-line line)) + (_ (unless %first-time + (scm-error 'cpp-error "handle-line-directive" + "Invalid line directive: ~s" + (list tokens*) #f)) + (loop #f (resolve-token-stream environment tokens))))) + (_ (unless %first-time + (scm-error 'cpp-error "handle-line-directive" + "Invalid line directive: ~s" + (list tokens*) #f)) + (loop #f (resolve-token-stream environment tokens)))))) + +;; environment, tokens → environment +(define (resolve-define environment tokens) + (match tokens + (`((preprocessing-token (identifier ,identifier)) tail ...) + (-> environment + bump-line + (add-identifier! + identifier + (if (equal? '(preprocessing-token (punctuator "(")) (car tail)) + ;; function like macro + (call-with-values (lambda () (break (lambda (token) (equal? token '(preprocessing-token (punctuator ")")))) + (cdr tail))) + (lambda (identifier-list replacement-list) + (let ((variadic? identifiers (parse-identifier-list identifier-list))) + (function-like-macro + identifier: identifier + variadic?: variadic? + identifier-list: identifiers + ;; NOTE 6.10.3 states that there needs to be at least on whitespace here + body: (cdr replacement-list))))) + + (object-like-macro + identifier: identifier + body: tail))))))) + + + +;; environment, tokens -> environment, tokens +(define (handle-preprocessing-tokens environment tokens) + (let loop ((environment environment) (tokens tokens)) + (define (err fmt . args) + (scm-error 'cpp-error "handle-preprocessing-tokens" + (string-append "~a:~a " fmt) + (cons* (current-file environment) + (current-line environment) + args) + #f)) + + (match tokens + ('() '()) + (`((whitespace "\n") (whitespace ,_) ... (preprocessing-token (puntuator "#")) ,rest ...) + ;; Line tokens are those in this line, + ;; while remaining tokens are the newline, follewed by the rest of the files tokens + (let ((line-tokens remaining-tokens (tokens-until-eol rest))) + ;; Actual tokens just removes all whitespace between "#" and "define" + (let ((actual-tokens (drop-while whitespace-token? line-tokens))) + (if (null? actual-tokens) + (loop (bump-line environment) remaining-tokens) + (match (car actual-tokens) + (`(preprocessing-token (identifier "if")) + (let ((environment (resolve-for-if environment actual-tokens))) + (loop environment remaining-tokens))) + + (`(preprocessing-token (identifier "ifdef")) + (match actual-tokens + (`((preprocessing-token (identifier ,id)) ,_ ...) + (loop + ((if (in-environment? environment id) + enter-active-if enter-inactive-if) + environment) + remaining-tokens)) + (_ (err "Non identifier in ifdef: ~s" actual-tokens)))) + + (`(preprocessing-token (identifier "ifndef")) + (match actual-tokens + (`((preprocessing-token (identifier ,id)) ,_ ...) + (loop + ((if (in-environment? environment id) + enter-inactive-if enter-active-if) + environment) + remaining-tokens)) + (_ (err "Non identifier in ifndef: ~s" actual-tokens)))) + + (`(preprocessing-token (identifier "else")) + ;; TODO + 'TODO + ) + + (`(preprocessing-token (identifier "elif")) + (-> environment leave-if + (resolve-for-if actual-tokens) + (loop remaining-tokens))) + + (`(preprocessing-token (identifier "endif")) + (loop (leave-if environment) remaining-tokens)) + + (`(preprocessing-token (identifier "include")) + (call-with-values + (lambda () (resolve-and-include-header environment (cdr actual-tokens))) + (lambda (environment tokens) + (loop environment (append tokens remaining-tokens))))) + + (`(preprocessing-token (identifier "define")) + (let ((env (resolve-define environment (cdr actual-tokens)))) + (loop env remaining-tokens)) + ) + + (`(preprocessing-token (identifier "undef")) + (loop (match actual-tokens + (`((preprocessing-token (identifier ,id))) + (-> environment bump-line (remove-identifier! id)))) + remaining-tokens)) + + (`(preprocessing-token (identifier "line")) + (loop (handle-line-directive environment actual-tokens) + remaining-tokens)) + + (`(preprocessing-token (identifier "error")) + ;; NOTE this is an "expected" error + (throw 'cpp-error actual-tokens)) + + (`(preprocessing-token (identifier "pragma")) + (loop (handle-pragma environment actual-tokens) + remaining-tokens))))))) + + (`((preprocessing-token (identifier ,id)) ,rest ...) + (call-with-values (lambda () (maybe-extend-identifier environment id rest)) + loop)) + + (('(whitespace "\n") rest ...) + (cons '(whitespace "\n") (loop (bump-line environment) rest))) + + ((token rest ...) (cons token (loop environment rest)))))) + + + +(define (comment->whitespace expr) + (match expr + (('comment _) '(whitespace " ")) + (other other))) + +(define (read-file path) + (call-with-input-file path (@ (ice-9 rdelim) read-string))) + +(define (comment->whitespace token) + (match token + (`(comment ,_) '(whitespace " ")) + (other other))) + +(define (comments->whitespace tokens) + (map comment->whitespace tokens)) + +;;; 5.1.11.2 Translation phases + +(define (tokenize string) + (-> string +;;; 1. trigraph replacement + replace-trigraphs +;;; 2. Line folding + fold-lines +;;; 3. Decomposition into preprocenning tokens, whitespaces, and comments + lex +;;; 4. Execution of preprocessing directives, all preprocessing directives are then deleted + comments->whitespace + ;; squeeze-whitespace-blocks + )) diff --git a/module/c/trigraph.scm b/module/c/trigraph.scm new file mode 100644 index 00000000..197e01a4 --- /dev/null +++ b/module/c/trigraph.scm @@ -0,0 +1,24 @@ +(define-module (c trigraph) + :use-module (ice-9 regex) + :export (replace-trigraphs)) + +(define rx (make-regexp "\\?\\?([=\\(\\)'!<>/-])")) + +(define (proc m) + (case (string-ref (match:substring m 2) 0) + ((#\=) "#") + ((#\() "[") + ((#\)) "]") + ((#\') "^") + ((#\<) "{") + ((#\>) "}") + ((#\!) "|") + ((#\-) "~") + ((#\/) "\\"))) + +(define (replace-trigraphs string) + (call-with-output-string + (lambda (port) + (regexp-substitute/global + port rx string + 'pre proc 'post)))) diff --git a/tests/test/cpp/lex2.scm b/tests/test/cpp/lex2.scm new file mode 100644 index 00000000..0342e25c --- /dev/null +++ b/tests/test/cpp/lex2.scm @@ -0,0 +1,64 @@ +(define-module (test cpp lex2) + :use-module (srfi srfi-64) + :use-module (srfi srfi-88) + :use-module (ice-9 peg) + :use-module (c lex2)) + + +(test-equal "Integer literal" + '(preprocessing-token (pp-number "10")) + (lex "10")) + +(test-equal "String literal" + '(preprocessing-token (string-literal "Hello")) + (lex "\"Hello\"")) + + +(test-equal "Mulitple tokens, including whitespace" + '((whitespace " ") + (preprocessing-token (pp-number "10")) + (whitespace " ")) + (lex " 10 ")) + +(test-equal "Char literal" + '(preprocessing-token (character-constant "a")) + (lex "'a'")) + + + +(test-equal "Comment inside string" + '(preprocessing-token (string-literal "Hel/*lo")) + (lex "\"Hel/*lo\"")) + +(test-equal "#define line" + '((preprocessing-token (punctuator "#")) + (preprocessing-token (identifier "define")) + (whitespace " ") + (preprocessing-token (identifier "f")) + (preprocessing-token (punctuator "(")) + (preprocessing-token (identifier "x")) + (preprocessing-token (punctuator ")")) + (whitespace " ") + (preprocessing-token (pp-number "10"))) + (lex "#define f(x) 10")) + + + +(test-equal "Nested parenthesis" + '((preprocessing-token (identifier "f")) + (preprocessing-token (punctuator "(")) + (preprocessing-token (pp-number "1")) + (preprocessing-token (punctuator ",")) + (whitespace " ") + (preprocessing-token (punctuator "(")) + (preprocessing-token (pp-number "2")) + (preprocessing-token (punctuator ",")) + (whitespace " ") + (preprocessing-token (pp-number "3")) + (preprocessing-token (punctuator ")")) + (preprocessing-token (punctuator ",")) + (whitespace " ") + (preprocessing-token (pp-number "4")) + (preprocessing-token (punctuator ")"))) + (lex "f(1, (2, 3), 4)")) + diff --git a/tests/test/cpp/preprocessor2.scm b/tests/test/cpp/preprocessor2.scm new file mode 100644 index 00000000..117b7e49 --- /dev/null +++ b/tests/test/cpp/preprocessor2.scm @@ -0,0 +1,29 @@ +(define-module (test cpp preprocessor2) + :use-module (srfi srfi-64) + :use-module (srfi srfi-88)) + + + +(test-group "Tokens until End Of Line" + (call-with-values + (lambda () + (tokens-until-eol + '(before (whitespace "\n") after))) + (lambda (bef aft) + (test-equal '(before) bef) + (test-equal '((whitespace "\n") after) aft)))) + + + +(test-equal "Squeeze whitespace" + '(bef (whitespace " ") aft) + (squeeze-whitespace + '(bef + (whitespace a) + (whitespace b) + aft))) + + + +(test-equal "(" + (stringify-token '(preprocessor-token (operator "(")))) -- cgit v1.2.3