aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorHugo Hörnquist <hugo@lysator.liu.se>2022-07-03 12:36:35 +0200
committerHugo Hörnquist <hugo@lysator.liu.se>2022-07-07 21:31:32 +0200
commitcba504b509cd59f376063f6e590362b197147a2c (patch)
tree954e90b0053ab4c0247ef242607654c862d02e48
parentMerge branch 'new-object-system' into c-parser (diff)
downloadcalp-cba504b509cd59f376063f6e590362b197147a2c.tar.gz
calp-cba504b509cd59f376063f6e590362b197147a2c.tar.xz
Major work.
-rw-r--r--module/c/compiler.scm65
-rw-r--r--module/c/cpp-environment.scm137
-rw-r--r--module/c/cpp-environment/function-like-macro.scm17
-rw-r--r--module/c/cpp-environment/internal-macro.scm11
-rw-r--r--module/c/cpp-environment/object-like-macro.scm13
-rw-r--r--module/c/eval2.scm20
-rw-r--r--module/c/lex2.scm323
-rw-r--r--module/c/line-fold.scm29
-rw-r--r--module/c/preprocessor.scm394
-rw-r--r--module/c/preprocessor2.scm496
-rw-r--r--module/c/trigraph.scm24
-rw-r--r--tests/test/cpp/lex2.scm64
-rw-r--r--tests/test/cpp/preprocessor2.scm29
13 files changed, 1622 insertions, 0 deletions
diff --git a/module/c/compiler.scm b/module/c/compiler.scm
new file mode 100644
index 00000000..121e6c07
--- /dev/null
+++ b/module/c/compiler.scm
@@ -0,0 +1,65 @@
+(define-module (c compiler)
+ :use-module ((c lex2) :select (lex))
+ :use-module ((c trigraph) :select (replace-trigraphs))
+ :use-module ((c line-fold) :select (fold-lines))
+ :use-module (hnh util)
+ :export (run-compiler))
+
+(define (comment->whitespace expr)
+ (match expr
+ (('comment _) '(whitespace " "))
+ (other other)))
+
+"
+#define __STDC__ 1
+#define __STDC_HOSTED__ 1
+#define __STDC_VERSION__ 201112L
+"
+
+(define now (localtime (current-time)))
+(define default-macros
+ (list
+ ;; 6.10.8
+ (object-like-macro
+ identifier: "__STDC__"
+ body: '(preprocessing-token (pp-number "1")))
+ (object-like-macro
+ identifier: "__STDC_HOSTED__"
+ body: '(preprocessing-token (pp-number "1")))
+ (object-like-macro
+ identifier: "__STDC_VERSION__"
+ body: '(preprocessing-token (pp-number "201112L")))
+ (object-like-macro
+ identifier: "__DATE__"
+ ;; TODO format should always be in
+ ;; english, and not tranlated
+ body: `(preprocessing-token (string-literal ,(strftime "%b %_d %Y" now))))
+ (object-like-macro
+ identifier: "__TIME__"
+ body: (preprocessing-token
+ (string-literal
+ ,(strftime "%H:%M:%S" now))))))
+
+(define environment
+ (-> (make-environment)
+ (extend-environment default-macros)))
+
+
+(define (read-file path)
+ (call-with-input-file path read-string))
+
+;;; 5.1.11.2 Translation phases
+
+
+
+(define (run-compiler path)
+ (define environment (enter-file (make-environment) path))
+ (-> (load-and-tokenize-file path)
+ (handle-preprocessing-tokens environment))
+;;; 5. (something with character sets)
+;;; 6. concatenation of string literals
+;;; 7. Whitespace tokens are discarded, each preprocessing token is converted into a token
+ ;; 6.4 paragraph 2
+ ;; Each preprocessing toket thas is converted to a token shall have the lexcal form of a keyword, an identifier, a constant, a string literal, or a puncturtor
+;;; 8. All external objects and functions are resolved
+ )
diff --git a/module/c/cpp-environment.scm b/module/c/cpp-environment.scm
new file mode 100644
index 00000000..20589b8e
--- /dev/null
+++ b/module/c/cpp-environment.scm
@@ -0,0 +1,137 @@
+(define-module (c cpp-environment)
+ :use-module (srfi srfi-1)
+ :use-module (srfi srfi-88)
+ :use-module (ice-9 hash-table)
+ :use-module (hnh util object)
+ :use-module (hnh util lens)
+ :use-module ((c cpp-environment function-like-macro) :prefix #{fun:}#)
+ :use-module ((c cpp-environment object-like-macro) :prefix #{obj:}#)
+ :use-module ((c cpp-environment internal-macro) :prefix #{int:}#)
+ :export (
+
+ macro-identifier
+ macro-body
+ macro-identifier-list
+ macro-variadic?
+ macro?
+
+ enter-active-if
+ enter-inactive-if
+ leave-if
+
+ enter-file
+ leave-file
+ bump-line
+ current-line
+ current-file
+
+ function-macro?
+ object-macro?
+ internal-macro?
+
+ cpp-environment
+ cpp-if-status cpp-variables
+
+ make-environment in-environment?
+ remove-identifier! add-identifier!
+ get-identifier
+ extend-environment
+
+ ))
+
+(define (macro-identifier x)
+ (define identifier
+ (cond ((obj:object-like-macro? x) obj:identifier)
+ ((fun:function-like-macro? x) fun:identifier)
+ ((int:internal-macro? x) int:identifier)))
+ (identifier x))
+
+
+(define (macro-body macro)
+ (define body-proc
+ (cond ((obj:object-like-macro? macro) obj:body)
+ ((fun:function-like-macro? macro) fun:body)
+ ((int:internal-macro? macro) int:body)))
+ (body-proc macro))
+
+(define macro-identifier-list fun:identifier-list)
+(define macro-variadic? fun:variadic?)
+
+(define function-macro? fun:function-like-macro?)
+(define object-macro? obj:object-like-macro?)
+(define internal-macro? int:internal-macro?)
+
+(define (macro? x)
+ (or (obj:object-like-macro? x)
+ (fun:function-like-macro? x)
+ (int:internal-macro? x)))
+
+(define-type (cpp-environment)
+ (cpp-if-status type: (list-of (memv '(outside active-if inactive-if)))
+ default: '(outside))
+ (cpp-variabes type: hash-table? default: (make-hash-table))
+ (cpp-file-stack type: list?
+ default: '()))
+
+
+
+(define (enter-active-if environment)
+ (modify environment cpp-if-status xcons 'active-if))
+
+(define (enter-inactive-if environment)
+ (modify environment cpp-if-status xcons 'inactive-if))
+
+(define (leave-if environment)
+ (modify environment cpp-if-status cdr))
+
+
+
+(define (enter-file environment filename)
+ (modify environment cpp-file-stack xcons (cons filename 1)))
+
+(define (leave-file environment)
+ (modify environment cpp-file-stack cdr))
+
+(define current-line (compose-lenses cpp-file-stack car* cdr*))
+
+(define current-file (compose-lenses cpp-file-stack car* car*))
+
+(define* (bump-line environment optional: (count 1))
+ (modify environment current-line + count))
+
+
+
+
+(define (make-environment) (cpp-environment))
+
+(define (in-envirnoment? environment key)
+ (hash-get-handle (cpp-variables environment) key))
+
+(define (remove-identifier! environment key)
+ (hash-remove! (cpp-variables environment) key)
+ environment)
+
+(define (add-identifier! environment key value)
+ (unless (string? key)
+ (scm-error 'wrong-type-arg "add-identifier!"
+ "Key must be a string, got: ~s"
+ (list key) #f))
+ (unless (macro? key)
+ (scm-error 'wrong-type-arg "add-identifier!"
+ "Value must be a macro, got: ~s"
+ (list value) #f))
+ (hash-set! (cpp-variables environment) key value)
+ environment)
+
+(define (get-identifier environment key)
+ (hash-ref (cpp-variables environment) key))
+
+(define (clone-hash-table ht)
+ (alist->hash-table (hash-map->list cons ht)))
+
+(define (extend-environment environment macros)
+ (let ((env (modify environment cpp-variables clone-hash-table)))
+ (fold (lambda (pair m)
+ (add-identifier! env (macro-identifier m) m ))
+ env macros)))
+
diff --git a/module/c/cpp-environment/function-like-macro.scm b/module/c/cpp-environment/function-like-macro.scm
new file mode 100644
index 00000000..0a0611e3
--- /dev/null
+++ b/module/c/cpp-environment/function-like-macro.scm
@@ -0,0 +1,17 @@
+(define-module (c cpp-environment function-like-macro)
+ :use-module (hnh util object)
+ :export (function-like-macro
+ function-like-macro?
+ identifier
+ identifier-list
+ body
+ variadic?))
+
+(define-type (function-like-macro)
+ (identifier type: string?)
+ (identifier-list type: (list-of string?))
+ ;; TODO import these
+ (body type: list? ; (list-of (or whitespace-token? preprocessing-token?))
+ )
+ (variadic? type: boolean?
+ default: #f))
diff --git a/module/c/cpp-environment/internal-macro.scm b/module/c/cpp-environment/internal-macro.scm
new file mode 100644
index 00000000..3c946738
--- /dev/null
+++ b/module/c/cpp-environment/internal-macro.scm
@@ -0,0 +1,11 @@
+(define-module (c cpp-environment internal-macro)
+ :use-module (hnh util object)
+ :export (internal-macro
+ internal-macro?
+ identifier body))
+
+(define-type (internal-macro)
+ (identifier type: string?)
+ (body type: procedure?
+ ;; Arity 2
+ ))
diff --git a/module/c/cpp-environment/object-like-macro.scm b/module/c/cpp-environment/object-like-macro.scm
new file mode 100644
index 00000000..5d4c8810
--- /dev/null
+++ b/module/c/cpp-environment/object-like-macro.scm
@@ -0,0 +1,13 @@
+(define-module (c cpp-environment object-like-macro)
+ :use-module (hnh util object)
+ :export (object-like-macro
+ object-like-macro?
+ identifier
+ body))
+
+
+(define-type (object-like-macro)
+ (identifier type: string?)
+ ;; TODO import these
+ (body type: list? ; (list-of (or whitespace-token? preprocessing-token?))
+ ))
diff --git a/module/c/eval2.scm b/module/c/eval2.scm
new file mode 100644
index 00000000..d58f20bf
--- /dev/null
+++ b/module/c/eval2.scm
@@ -0,0 +1,20 @@
+(define-module (c eval2)
+ :use-module ((hnh util) :select (->))
+ :export (C-TRUE
+ C-FALSE
+ boolean->c-boolean
+ c-boolean->boolean
+ c-not))
+
+
+(define C-TRUE 1)
+(define C-FALSE 0)
+
+(define (boolean->c-boolean bool)
+ (if bool C-TRUE C-FALSE))
+
+(define (c-boolean->boolean bool)
+ (not (zero? bool)))
+
+(define (c-not b)
+ (-> b c-boolean->boolean not boolean->c-boolean))
diff --git a/module/c/lex2.scm b/module/c/lex2.scm
new file mode 100644
index 00000000..23fa9da4
--- /dev/null
+++ b/module/c/lex2.scm
@@ -0,0 +1,323 @@
+(define-module (c lex2)
+ :use-module (ice-9 peg)
+ :export (lex))
+
+;;; A.1 Lexical grammar
+;;; A.1.1 Lexical elements
+
+;; (6.4)
+(define-peg-pattern token all
+ (or keyword
+ identifier
+ constant
+ string-literal
+ punctuator
+ ))
+
+;; (6.4)
+(define-peg-pattern preprocessing-token all
+ ;; string literal moved before header-name since string literals
+ ;; otherwise became q-strings
+ (or string-literal
+ header-name
+ identifier
+ pp-number
+ character-constant
+ punctuator
+ ;; Each non-white-space character that cannot be one of the above
+ ))
+
+;;; A.1.2 Keywords
+
+;; (6.4.1)
+(define-peg-pattern keyword all
+ (or "auto" "break" "case" "char" "const" "continue" "default"
+ "do" "double" "else" "enum" "extern" "float" "for" "goto"
+ "if" "inline" "int" "long" "register" "restrict" "return"
+ "short" "signed" "sizeof" "static" "struct" "switch"
+ "typedef" "union" "unsigned" "void" "volatile" "while"
+ "_Alignas" "_Alignof" "_Atomic" "_Bool" "_Complex"
+ "_Generic" "_Imaginary" "_Noreturn" "_Static_assert"
+ "_Thread_local"))
+
+;;; A.1.3 Identifiers
+
+;; (6.4.2.1)
+(define-peg-pattern identifier all
+ (and identifier-nondigit (* (or identifier-nondigit digit))))
+
+;; (6.4.2.1)
+(define-peg-pattern identifier-nondigit body
+ (or nondigit
+ universal-character-name
+ ;; TODO other implementation-defined characters
+ ))
+
+;; (6.4.2.1)
+(define-peg-pattern nondigit body
+ (or "_"
+ (range #\A #\Z)
+ (range #\a #\z)))
+
+;; (6.4.2.1)
+(define-peg-pattern digit body
+ (range #\0 #\9))
+
+;;; A.1.4 Universal character names
+
+;; (6.4.3)
+(define-peg-pattern universal-character-name all
+ (or (and "\\u" hex-quad)
+ (and "\\U" hex-quad hex-quad)))
+
+;; (6.4.3)
+(define-peg-pattern hex-quad body
+ (and hexadecimal-digit hexadecimal-digit
+ hexadecimal-digit hexadecimal-digit))
+
+;;; A.1.5 Constants
+
+;; (6.4.4)
+(define-peg-pattern constant all
+ ;; Int and float swapped from standard since we need to try parsing
+ ;; the floats beforehand
+ (or floating-constant
+ integer-constant
+ enumeration-constant
+ character-constant))
+
+;; (6.4.4.1)
+(define-peg-pattern integer-constant all
+ (and (or decimal-constant
+ octal-constant
+ hexadecimal-constant)
+ integer-suffix))
+
+;; (6.4.4.1)
+(define-peg-pattern decimal-constant all
+ (and nonzero-digit
+ (+ digit)))
+
+;; (6.4.4.1)
+(define-peg-pattern octal-constant all
+ (+ octal-digit))
+
+;; (6.4.4.1)
+(define-peg-pattern hexadecimal-constant all
+ (and hexadecimal-prefix (+ hexadecimal-digit)))
+
+;; (6.4.4.1)
+(define-peg-pattern hexadecimal-prefix none
+ (or "0x" "0X"))
+
+;; (6.4.4.1)
+(define-peg-pattern nonzero-digit body
+ (range #\1 #\9))
+
+;; (6.4.4.1)
+(define-peg-pattern octal-digit body
+ (range #\0 #\7))
+
+;; (6.4.4.1)
+(define-peg-pattern hexadecimal-digit body
+ (or (range #\0 #\9)
+ (range #\a #\f)
+ (range #\A #\F)))
+
+;; (6.4.4.1)
+(define-peg-pattern integer-suffix all
+ (or (and unsigned-suffix (? long-suffix))
+ (and long-suffix (? unsigned-suffix))))
+
+;; (6.4.4.1)
+;; This is a merger of long-suffix and long-long-suffix
+(define-peg-pattern long-suffix body
+ (or "l" "L" "ll" "LL"))
+
+;; (6.4.4.1)
+(define-peg-pattern unsigned-suffix body
+ (or "u" "U"))
+
+;; (6.4.4.2)
+(define-peg-pattern floating-constant all
+ (or decimal-floating-constant
+ hexadecimal-floating-constant))
+
+;; (6.4.4.2)
+(define-peg-pattern floating-constant all
+ (or (and fractional-constant (? exponent-part) (? floating-suffix))
+ (and digit-sequence exponent-part (? floating-suffix))))
+
+;; (6.4.4.2)
+(define-peg-pattern hexadecimal-floating-constant all
+ (and hexadecimal-prefix
+ (or hexadecimal-fractional-constant
+ hexadecimal-digit-sequence)
+ binary-exponent-part
+ (? floating-suffix)))
+
+;; (6.4.4.2)
+(define-peg-pattern fractional-constant all
+ (or (and (? digit-sequence) "." digit-sequence)
+ (and digit-sequence ".")))
+
+;; (6.4.4.2)
+(define-peg-pattern exponent-part all
+ (and (or "e" "E") (? sign) digit-sequence))
+
+;; (6.4.4.2)
+(define-peg-pattern sign all
+ (or "+" "-"))
+
+;; (6.4.4.2)
+(define-peg-pattern digit-sequence body
+ (+ digit))
+
+;; (6.4.4.2)
+(define-peg-pattern hexadecimal-fractional-constant all
+ (or (and (? hexadecimal-digit-sequence) "." hexadecimal-digit-sequence)
+ (and hexadecimal-digit-sequence ".")))
+
+;; (6.4.4.2)
+(define-peg-pattern binary-exponent-part all
+ (and (or "p" "P")
+ (? sign)
+ digit-sequence))
+
+;; (6.4.4.2)
+(define-peg-pattern hexadecimal-digit-sequence body
+ (+ hexadecimal-digit))
+
+;; (6.4.4.2)
+(define-peg-pattern floating-suffix all
+ (or "f" "l" "F" "L"))
+
+;; (6.4.4.3)
+(define-peg-pattern enumeration-constant all
+ identifier)
+
+(define-peg-pattern character-prefix all
+ (or "L" "u" "U"))
+
+;; (6.4.4.4)
+(define-peg-pattern character-constant all
+ (and (? character-prefix)
+ (ignore "'")
+ (+ c-char)
+ (ignore "'")))
+
+;; (6.4.4.4)
+(define-peg-pattern c-char body
+ (or (and (not-followed-by (or "'" "\\" "\n")) peg-any)
+ escape-sequence))
+
+;; (6.4.4.4)
+(define-peg-pattern escape-sequence all
+ (or simple-escape-sequence
+ octal-escape-sequence
+ hexadecimal-escape-sequence
+ universal-character-name))
+
+;; (6.4.4.4)
+(define-peg-pattern simple-escape-sequence all
+ (and (ignore "\\") (or "'" "\"" "?" "\\"
+ "a" "b" "f" "n" "r" "t" "v")))
+
+;; (6.4.4.4)
+(define-peg-pattern octal-escape-sequence all
+ (and (ignore "\\") octal-digit (? octal-digit) (? octal-digit)))
+
+;; (6.4.4.4)
+(define-peg-pattern hexadecimal-escape-sequence all
+ (and (ignore "\\x") (+ hexadecimal-digit)))
+
+;; A.1.6 String literals
+
+;; (6.4.5)
+(define-peg-pattern string-literal all
+ (and (? encoding-prefix)
+ (ignore "\"")
+ (* s-char)
+ (ignore "\"")))
+
+;; (6.4.5)
+(define-peg-pattern encoding-prefix all
+ (or "u8" "u" "U" "L"))
+
+;; (6.4.5)
+(define-peg-pattern s-char body
+ (or (and (not-followed-by (or "\"" "\\" "\n")) peg-any)
+ escape-sequence))
+
+;;; A.1.7
+
+;; (6.4.6)
+(define-peg-pattern punctuator all
+ (or "[" "]" "(" ")" "{" "}" "." "->"
+ "++" "--" "&" "*" "+" "-" "~" "!"
+ "/" "%" "<<" ">>" "<" ">" "<=" ">=" "==" "!=" "^" "|" "&&" "||"
+ "?" ":" ";" "..."
+ "=" "*=" "/=" "%=" "+=" "-=" "<<=" ">>=" "&=" "^=" "|="
+ "," "#" "##"
+ "<:" ":>" "<%" "%>" "%:" "%:%:"))
+
+;;; A.1.8 Header names
+
+(define-peg-pattern h-string all (+ h-char))
+(define-peg-pattern q-string all (+ q-char))
+
+;; (6.4.7)
+(define-peg-pattern header-name all
+ (or (and (ignore "<") h-string (ignore ">"))
+ (and (ignore "\"") q-string (ignore "\""))))
+
+;; (6.4.7)
+(define-peg-pattern h-char body
+ (or (and (not-followed-by (or ">" "\n")) peg-any)
+ escape-sequence))
+
+;; (6.4.7)
+(define-peg-pattern q-char body
+ (or (and (not-followed-by (or "\"" "\n")) peg-any)
+ escape-sequence))
+
+;;; A.1.9 Preprocessing numbers
+
+;; (6.4.8)
+(define-peg-pattern pp-number all
+ (and (? ".") digit
+ (* (or digit
+ identifier-nondigit
+ (and (or "e" "E" "p" "P")
+ sign)
+ "."))))
+
+
+
+(define-peg-pattern whitespace all
+ (or "\t" "\n" "\v" "\f" " "
+ ;; "\r"
+ ))
+
+(define-peg-pattern block-comment body
+ (and (ignore "/*")
+ (* (and (not-followed-by "*/")
+ peg-any))
+ (ignore "*/")))
+
+(define-peg-pattern line-comment body
+ (and (ignore "//")
+ (* (and (not-followed-by "\n")
+ peg-any))))
+
+(define-peg-pattern comment all
+ (or line-comment block-comment))
+
+(define-peg-pattern preprocessing-tokens body
+ (* (or whitespace
+ comment
+ preprocessing-token)))
+
+
+(define (lex string)
+ (peg:tree (match-pattern preprocessing-tokens string)))
diff --git a/module/c/line-fold.scm b/module/c/line-fold.scm
new file mode 100644
index 00000000..c61c2c70
--- /dev/null
+++ b/module/c/line-fold.scm
@@ -0,0 +1,29 @@
+(define-module (c line-fold)
+ :use-module (srfi srfi-1)
+ :use-module (srfi srfi-71)
+ :export (fold-lines))
+
+(define (line-continued? line)
+ (and (not (string-null? line))
+ (char=? #\\ (string-ref line (1- (string-length line))))))
+
+(define (strip-backslash line)
+ (string-drop-right line 1))
+
+(define (fold-lines string)
+ (with-output-to-string
+ (lambda ()
+ (let loop ((lines (string-split string #\newline)))
+ (cond ((null? lines) 'NOOP)
+ ((null? (cdr lines))
+ ;; TODO error message if last character is a backslash
+ (display (car lines))
+ (newline))
+ (else
+ (let ((to-merge remaining (span line-continued? lines)))
+ (for-each display (map strip-backslash to-merge))
+ (display (car remaining))
+ (newline)
+ (for-each (lambda _ (newline))
+ (iota (length to-merge)))
+ (loop (cdr remaining)))))))))
diff --git a/module/c/preprocessor.scm b/module/c/preprocessor.scm
new file mode 100644
index 00000000..49ecfa27
--- /dev/null
+++ b/module/c/preprocessor.scm
@@ -0,0 +1,394 @@
+(define-module (c preprocessor)
+ :use-module (srfi srfi-1)
+ :use-module (srfi srfi-9 gnu)
+ :use-module (ice-9 rdelim)
+ :use-module (ice-9 regex)
+ :use-module (hnh util object)
+
+ :use-module (hnh util)
+ :use-module (hnh util object)
+ )
+
+(define (read-lines port)
+ (let loop ((done '()))
+ (let ((line (read-line port)))
+ (if (eof-object? line)
+ (reverse done)
+ (loop (cons line done))))))
+
+;; The source line of a give readen line
+(define line-number (make-object-property))
+;; The source file of a given readen line
+(define line-file (make-object-property))
+
+
+(define (mark-with-property! items property property-value)
+ (for-each (lambda (item) (set! (property item) property-value))
+ items))
+
+(define trigraph-rx (make-regexp "??([=()/'<>!-])"))
+(define (expand-trigraphs line)
+ (regexp-substitute/global
+ #f trigraph-rx
+ line
+ 'pre (lambda (m) (case (string-ref (match:substring m 1) 1)
+ ((#\=) "#")
+ ((#\() "[")
+ ((#\)) "]")
+ ((#\/) "\\")
+ ((#\') "^")
+ ((#\<) "{")
+ ((#\>) "}")
+ ((#\!) "|")
+ ((#\-) "~")))
+ 'post))
+
+(define (number-lines lines)
+ (for-each (lambda (line number)
+ (set! (line-number line) number))
+ lines
+ (iota (length lines) 1))
+ lines)
+
+;; Should this line be merged with the next
+(define (line-continued? line)
+ (case (string-length line)
+ ((0) #f)
+ ((1) (string=? "\\" line))
+ (else
+ (let ((len (string-length line)))
+ ;; TODO can extra backslashes change this?
+ (and (char=? #\\ (string-ref line (- len 1)))
+ (not (char=? #\\ (string-ref line (- len 2)))))))))
+
+(define (transfer-line-number to from)
+ (set! (line-number to) (line-number from))
+ to)
+
+;; Merge two lines, assuming that upper ends with a backslash
+(define (merge-lines upper lower)
+ (let ((new-string (string-append (string-drop-right upper 1) lower)))
+ (transfer-line-number new-string upper)
+ new-string))
+
+(define (fold-lines lines)
+ (fold-right (lambda (line done)
+ (if (line-continued? line)
+ (cons (merge-lines line (car done)) (cdr done))
+ (cons line done)))
+ '()
+ lines))
+
+
+(define comment-rx (make-regexp "(//|/[*]|[*]/)"))
+
+(define (strip-comments lines)
+ (let loop ((in-comment #f) (lines lines) (done '()))
+ (if (null? lines)
+ (reverse done)
+ (let ((line (car lines)))
+ (cond ((regexp-exec comment-rx line)
+ => (lambda (m)
+ (format (current-output-port) "~s ~s substr = ~s~%" in-comment (line-number line) (match:substring m))
+ (cond ((and in-comment (string=? "*/" (match:substring m)))
+ (loop #f (cons (transfer-line-number (match:suffix m) line)
+ (cdr lines))
+ done))
+ (in-comment (loop #t (cdr lines) done))
+ ((string=? "//" (match:substring m))
+ (loop #f (cdr lines) (cons (transfer-line-number (match:prefix m) line)
+ done)))
+ ((string=? "/*" (match:substring m))
+ (loop #t (cons (transfer-line-number (match:suffix m) line) (cdr lines)) done))
+ (else
+ (scm-error 'cpp-error "strip-comments"
+ "Unexpected */ in file ~a on line ~a"
+ (list (line-file line) (line-number line))
+ #f)))))
+ (else (loop in-comment (cdr lines) (cons line done))))))))
+
+
+(define-immutable-record-type <preprocessor-directive>
+ (make-preprocessor-directive type body)
+ proprocessor-directive?
+ (type directive-type)
+ (body directive-body))
+
+(define cpp-directive-rx (make-regexp "\\s*#\\s*((\\w+)(.*))?"))
+(define (preprocessor-directive? line)
+ (cond ((regexp-exec cpp-directive-rx line)
+ => (lambda (m)
+ (if (match:substring m 2)
+ (make-preprocessor-directive
+ (string->symbol (match:substring m 2))
+ (string-trim-both (match:substring m 3) char-set:whitespace))
+ 'sort-of)))
+ (else #f)))
+
+;; defined
+
+;; TODO _Pragma
+
+
+(define (expand-function-line-macro environment macro . params)
+ (expand-macro environment (apply-macro macro (map (lambda (param) (expand-macro environment param)) params))))
+
+;; (define (environment-ref ))
+
+(define (list-of? lst predicate)
+ (every predicate lst))
+
+
+(define-type (cpp-environment)
+ (cpp-if-status type: (list-of? (lambda (x) (memv x '(outside active-if inactive-if))))
+ ;; type: (list-of? (memv '(outside active-if inactive-if)))
+ default: '(outside))
+ (cpp-variabes type: hash-table? default: (make-hash-table)))
+
+(define (make-environment) (cpp-environment))
+
+(define (in-envirnoment? environment key)
+ (hash-get-handle (cpp-variables environment) key))
+
+(define (remove-identifier! environment key)
+ (hash-remove! (cpp-variables environment) key)
+ environment)
+
+(define (add-identifier! environment key value)
+ (assert (string? key))
+ (assert (macro? value))
+ (hash-set! (cpp-variables environment) key value)
+ environment)
+
+;; Parantheses when defining macro
+(define (parse-parameter-string string)
+ (map string-trim-both
+ (string-split (string-trim-both string (char-set #\( #\)))
+ #\,)))
+
+
+(define-type (object-macro)
+ (body type: string?))
+
+(define-type (function-macro)
+ (formals type: (list-of? string?))
+ (body type: string?))
+
+(define (macro? x)
+ (or (object-macro? x)
+ (function-macro? x)))
+
+;; The interesting part
+;; environment, (list string) -> (values (list string) (list strings))
+;; multiple lines since since a function-like macro can extend over multiple lines
+(define (expand-macros environment strings)
+ )
+
+
+(define (crash-if-not-if body guilty)
+ (scm-error 'cpp-error guilty
+ "else, elif, and endif invalid outside if scope: ~s~%file: ~s line: ~s"
+ (list body (line-file body) (line-number body))))
+
+;; (environment, lines) -> environment x lines
+(define (parse-directives environment lines)
+ (let loop ((environment environment) (lines lines) (done '()))
+ (let* ((line (car line))
+ (directive? (preprocessor-directive? line)))
+ (case directive?
+ ((#f) ; regular line
+ (loop environment (cdr lines)
+ ;; TODO this doesn't work, since parse-macros works on multiple lines
+ (cons (parse-macros environment (car lines)) done)))
+ ((sort-of) ; a no-op directive
+ (loop environment (cdr lines) done))
+ (else ; an actual directive
+ (case (car (cpp-if-status environment))
+ ((outside)
+ (case (directive-type m)
+ ((ifndef endif else)
+ (scm-error 'cpp-error "parse-directives"
+ "Unexpected directive: ~s"
+ (list line) #f))
+ (else ; inside if, ifelse or else
+ ;; outside active-if inactive-if
+ (case (directive-type m)
+ ;; stack ending directives
+ ((endif)
+ (case (car (cpp-if-status environment))
+ ((outside) (crash-if-not-if (directive-body m) "endif"))
+ (else (loop (modify environment cpp-if-status cdr)
+ (cdr lines)
+ done))))
+
+ ;; stack nudging directives
+ ((else)
+ (case (car (cpp-if-status environment))
+ ((outside) (crash-if-not-if (directive-body m) "else"))
+ (else (loop (modify environment (lens-compose cpp-if-status car*)
+ (lambda (old)
+ (case old
+ ((active-if) 'inactive-if)
+ ((inactive-if) 'active-if))))
+ (cdr lines)
+ done))))
+ ((elif)
+ (case (car (cpp-if-status environment))
+ ((outside) (crash-if-not-if (directive-body m) "elif"))
+ (else ;; TODO
+ )
+ ))
+
+ ;; stack creating directives
+ ;; on inactive-if each creates a new frame, which also is inactive
+ ((ifndef)
+ (case (car (cpp-if-status environment))
+ ((inactive-if) (loop (modify environment cpp-if-status
+ xcons 'inactive-if)
+ (cdr lines)
+ done))
+ (else (loop (modify environment cpp-if-status
+ xcons (if (in-environment? environment (directive-body directive?))
+ 'inactive-if 'active-if))
+ (cdr lines)
+ done))))
+
+ ((ifdef)
+ (case (car (cpp-if-status environment))
+ ((inactive-if) (loop (modify environment cpp-if-status
+ xcons 'inactive-if)
+ (cdr lines)
+ done))
+ (else
+ (loop (modify environment cpp-if-status
+ xcons (if (in-environment? environment (directive-body directive?))
+ 'active-if 'inactive-if))
+ (cdr lines)
+ done))))
+
+ ((if)
+ (case (car (cpp-if-status environment))
+ ((inactive-if) (loop (modify environment cpp-if-status
+ xcons 'inactive-if)
+ (cdr lines)
+ done))
+ (else ;; TODO
+ )))
+
+
+ ;; other directives
+ ((include) (cond ((string-match "[<\"](.*)"
+ => (lambda (m)
+ (let ((fileneme (string-drop-right (directive-body m) 1)))
+ (case (string-ref (match:substring m 1) 0)
+ ;; TODO include-path
+ ((#\<) (handle-file environment filename))
+ ((#\") (handle-file environment filename))))))
+ (else (scm-error 'cpp-error "parse-directives"
+ "Invalid include"
+ '() #f))))
+ ((define)
+ ;; TODO what are valid names?
+ (cond ((string-match "^(\\w+)([(][^)]*[)])?\\s+(.*)" (directive-body directive?))
+ => (lambda (m)
+ (loop (let ((macro-body (string-trim-both (match:substring m 3))))
+ (add-identifier!
+ environment
+ (match:substring m 1)
+ (cond ((match:substring m 2)
+ => (lambda (parameter-string)
+ (function-macro
+ formals: (parse-parameter-string parameter-string)
+ body: macro-body)))
+ (else (object-macro body: macro-body)))))
+ (cdr lines)
+ done)))
+ (else (scm-error 'cpp-error "parse-directives"
+ "Invalid #define line, ~s"
+ (list (directive-body directive?))
+ #f))))
+
+ ((undef)
+ (case (car (cpp-if-status environment))
+ ((inactive-if) (loop environment (cdr lines) done))
+ (else (loop (remove-identifier environment (directive-body directive?))
+ (cdr lines)
+ done))))
+
+ ((line)
+ (case (car (cpp-if-status environment))
+ ((inactive-if) (loop environment (cdr lines) done))
+ ;; TODO add first-run parameter to loop, in case expand-macros still return something invalid
+ (else (let parse-line-directive ((tokens (string-tokenize (directive-body directive?))))
+ (cond ((= 1 (length tokens))
+ ;; TODO parse token
+ (if (integer? (car tokens))
+ ;; TODO update current line
+ (loop environment (cdr lines) done)
+ (parse-line-directive (expand-macros environment (directive-body directive?)))))
+ ((= 2 (length tokens))
+ ;; TODO parse tokens
+ (if (and (integer? (car tokens))
+ (string-literal? (cadr tokens)))
+ ;; TODO update current line and file
+ (loop environment (cdr lines) done)
+ (parse-line-directive (expand-macros environment (directive-body directive?)))))
+ (else (parse-line-directive (expand-macros environment (directive-body directive?)))))))))
+
+ ((error)
+ (throw 'cpp-error-directive
+ (directive-body directive?)))
+
+ ((warning)
+ (format (current-error-port) "#warning ~a~%"
+ (directive-body directive?))
+ (loop environment (cdr lines) done))
+
+ ((pragma)
+ (format (current-error-port)
+ "PRAGMA: ~s~%" (directive-body directive?))
+ (loop environment (cdr lines) done))
+
+ ((ident sccs)
+ (format (current-error-port)
+ "IDENT: ~s~%" (directive-body directive?))
+ (loop environment (cdr lines) done))
+
+ (else
+ (scm-error 'cpp-error "parse-directives"
+ "Unknown pre-processor directive: ~s"
+ (list line) #f)
+ )))))))))
+ ))
+
+
+(define* (writeln expr optional: (port (current-output-port)))
+ (write expr port)
+ (newline port))
+
+(define (handle-lines environment lines)
+ (parse-directive environment
+ (compose strip-comments fold-lines number-lines)))
+
+ ;; parse-directives environment
+
+;; Return a number of lines
+(define (read-file file-path)
+ (define raw-lines (call-with-input-file file-path read-lines))
+ (mark-with-property! line line-file file-path)
+ (handle-lines raw-lines))
+
+
+;; pre defined macros
+;; see info manual for cpp 3.7.1 Standard Predefined Macros
+;; __FILE__
+;; __LINE__
+;; __DATE__ "Feb 12 1996"
+;; __TIME__ "23:59:01"
+;; __STDC__ 1
+;; __STDC_VERSION__ 201112L
+;; __STDC_HOSTED__ 1
+
+;; __cplusplus
+;; __OBJC__
+;; __ASSEMBLER__
diff --git a/module/c/preprocessor2.scm b/module/c/preprocessor2.scm
new file mode 100644
index 00000000..19daabfb
--- /dev/null
+++ b/module/c/preprocessor2.scm
@@ -0,0 +1,496 @@
+(define-module (c preprocessor2)
+ :use-module (srfi srfi-1)
+ :use-module (srfi srfi-71)
+ :use-module (srfi srfi-88)
+ :use-module (ice-9 match)
+ :use-module (c cpp-environment)
+ :use-module (c eval2)
+ :use-module ((c cpp-environment function-like-macro) :select (function-like-macro))
+ :use-module ((c cpp-environment object-like-macro) :select (object-like-macro))
+ :use-module ((c cpp-environment internal-macro) :select (internal-macro))
+ :use-module ((hnh util) :select (->))
+ :use-module ((hnh util lens) :select (set))
+ :use-module (hnh util path)
+ :use-module ((c lex2) :select (lex))
+ :use-module ((c trigraph) :select (replace-trigraphs))
+ :use-module ((c line-fold) :select (fold-lines))
+ :export ())
+
+(define (tokens-until-eol tokens)
+ (break (lambda (token) (equal? token '(whitespace "\n")))
+ tokens))
+
+(define (whitespace-token? token)
+ (eq? 'whitespace (car token)))
+
+(define (preprocessing-token? token)
+ (eq? 'preprocessing-token token))
+
+(define (squeeze-whitespace tokens)
+ (match tokens
+ ('() '())
+ (`((whitespace ,_) (whitespace ,_) ,rest ...)
+ (squeeze-whitespace (cons '(whitespace " ") rest)))
+ (`((whitespace ,_) ,rest ...)
+ (cons '(whitespace " ") (squeeze-whitespace rest)))
+ ((token rest ...)
+ (cons token (squeeze-whitespace rest)))))
+
+(define (stringify-token token)
+ ;; TODO propperly implement this
+ `(preprocessing-token
+ (string-literal ,(with-output-to-string (lambda () (display token))))))
+
+(define (stringify-tokens tokens)
+ (with-output-to-string
+ (lambda ()
+ (for-each (compose display stringify-token)
+ (squeeze-whitespace tokens)))))
+
+;; Expand ## tokens
+;; TODO
+(define (expand-join macro tokens)
+ tokens)
+
+;; expand function like macro
+(define (apply-macro environment macro parameters)
+ (define parameter-map
+ (if (macro-variadic? macro)
+ (let ((head rest (split-at parameters (length (macro-identifier-list macro)))))
+ ;; TODO commas (,) should be interleaved with rest
+ (cons (cons "__VA_ARGS__" rest)
+ (map cons (macro-identifier-list macro) head)))
+ (map cons
+ (macro-identifier-list macro)
+ parameters)))
+
+ ;; resolve strigify operators
+ (define stringify-resolved
+ (let loop ((tokens (macro-body macro)))
+ (match tokens
+ (`((preprocessing-token (punctuator "#"))
+ (whitespace ,_) ...
+ (preprocessing-token (identifier ,x))
+ ,rest ...)
+ (unless (member x (macro-identifier-list macro))
+ (scm-error 'macro-expand-error "apply-macro"
+ "'#' is not followed by a macro parameter: ~s"
+ (list x) #f)
+ (cons (stringify-tokens (assoc-ref parameter-map x))
+ (loop rest))))
+ ('() '())
+ ((token rest ...)
+ (cons token (loop rest))))))
+
+ ;; TODO
+ ;; - resolve ##
+ (define resulting-body
+ (expand-join macro stringify-resolved))
+
+ ;; - subtitute parameters
+ ;; TODO what am I doing here?
+ (expand-macro (-> environment
+ (extend-environment parameter-map))
+ resulting-body))
+
+
+
+;; Expand object-like macro
+
+;; #define VALUE 10
+;; #define str(x) #x
+;; #define OTHER str(VALUE)
+;; OTHER
+;; ⇒ "VALUE"
+
+(define (expand-macro environment macro tokens)
+ (cond ((object-macro? macro)
+ (values environment (append (macro-body macro) tokens)))
+
+ ((function-macro? macro)
+ (let ((containing remaining newlines (parse-parameter-list tokens)))
+ (values (bump-line environment newlines)
+ ;; Macro output can be macro expanded
+ ;; TODO self-referential macros?
+ (append (apply-macro environment macro containing) remaining))))
+
+ ((internal-macro? macro)
+ (let ((containing remaining newlines (parse-parameter-list tokens)))
+ (values (bump-line environment newlines)
+ (append ((macro-body macro) environment containing)
+ remaining))))))
+
+;; Takes a list of preprocessing tokens, and returns two values
+;; if the last token was '...'
+;; and a list of strings of all token names
+;; Note that this is ONLY #define f(x) forms
+;; not usage forms
+(define (parse-identifier-list tokens)
+ (let loop ((tokens (remove whitespace-token? tokens)) (done '()))
+ (match tokens
+ ('() (values #f (reverse done)))
+
+ ((`(preprocessing-token (punctuation "...")))
+ (values #t (reverse done)))
+
+ ((`(preprocessing-token (identifier ,id)) rest ...)
+ (loop rest (cons id done)))
+
+ ((`(preprocessing-token (punctuation "...")) rest ...)
+ (scm-error 'cpp-error "parse-identifier-list"
+ "'...' only allowed as last argument in identifier list"
+ '() #f))
+
+ ((`(preprocessing-token ,other) rest ...)
+ (scm-error 'cpp-error "parse-identifier-list"
+ "Unexpected preprocessing-token in identifier list: ~s"
+ (list other) #f)))))
+
+
+;; returns three values:
+;; - a list of tokens where each is a parameter to the function like macro
+;; - the remaining tokenstream
+;; - how many newlines were encountered
+(define (parse-parameter-list tokens)
+ (let %loop ((depth 0) (newlines 0) (current '())
+ (parameters '()) (tokens tokens) (%first-iteration? #t))
+ (define* (loop tokens key:
+ (depth depth) (newlines newlines)
+ (current current) (parameters parameters))
+ (%loop depth newlines current parameters tokens #f))
+ (let ((current* (if (zero? depth)
+ current
+ (cons (car tokens) current))))
+ (match tokens
+ (`((whitespace "\n") ,rest ...)
+ (loop rest newlines: (1+ newlines) current: current*))
+ (`((whitespace ,_) ,rest ...)
+ (loop rest current: current*))
+ (`((preprocessing-token (punctuator "(")) ,rest ...)
+ (loop rest depth: (1+ depth) current: current*))
+ (`((preprocessing-token (punctuator ")")) ,rest ...)
+ (if (= 1 depth)
+ (values (reverse (cons (reverse current) parameters))
+ rest
+ newlines)
+ (loop rest
+ depth: (1- depth)
+ current: current*)))
+ (`((preprocessing-token (punctuator ",")) ,rest ...)
+ (if (= 1 depth)
+ (loop rest
+ current: '()
+ parameters: (cons (reverse current) parameters))
+ (loop rest current: current*)))))))
+
+
+(define (join-file-line environment)
+ (define file (current-file environment))
+ (define line (current-line environment))
+ (extend-environment
+ environment
+ ;; 6.10.8
+ `(("__FILE__" . (preprocessing-token (string-literal ,file)))
+ ("__LINE__" . (preprocessing-token (pp-number ,(number->string line)))))))
+
+(define (c-search-path) (make-parameter (list "." "/usr/include")))
+
+;; #include <stdio.h>
+(define (resolve-h-file string)
+ (cond ((path-absolute? string) string)
+ (else
+ (let ((filename
+ (find file-exists?
+ (map (lambda (path-prefix)
+ (path-append path-prefix string))
+ (c-search-path)))))
+ (if filename filename
+ (scm-error 'cpp-error "resolve-h-file"
+ "Can't resolve file: ~s"
+ (list string) #f))))))
+
+;; #include "myheader.h"
+(define (resolve-q-file string)
+ ;; This should always be a fallback (6.10.2, p. 3)
+ (cond (else (resolve-h-file string))))
+
+(define defined-macro
+ (internal-macro
+ identifier: "defined"
+ body: (lambda (environment tokens)
+ (match tokens
+ (`((preprocessor-token (identifier ,id)))
+ `(preprocessor-token (pp-number ,(boolean->c-boolean (in-environment? environment id)))))
+ (_ (scm-error 'cpp-error "defined"
+ "Invalid parameter list to `defined': ~s"
+ (list tokens) #f))))))
+
+;; environment, tokens → environment
+(define (handle-pragma environment tokens)
+ (match tokens
+ (`((preprocessing-token (identifier "STDC")) (whitespace ,_) ...
+ (preprocessing-token (identifier ,identifier)) (whitespace ,_) ...
+ (preprocessing-token (identifier ,on-off-switch)) (whitespace ,_) ...)
+ ;; TODO actually do something with the pragmas (probably just store them in the environment)
+ (format (current-error-port)
+ "#Pragma STDC ~a ~a" identifier on-off-switch)
+ environment)
+ (_ (format (current-error-port)
+ "Non-standard #Pragma: ~s~%" tokens)
+ environment)))
+
+
+;; TODO
+;; (define _Pragma-macro
+;; (internal-macro
+;; identifier: "_Pragma"
+;; body: (lambda (environment tokens)
+;; )))
+
+;; TODO
+(define (resolve-constant-expression tokens)
+ 'TODO
+ )
+
+(define (resolve-token-stream environment tokens)
+ (let loop ((tokens tokens))
+ (match tokens
+ ('() '())
+ (`((preprocessing-token (identifier ,id)) ,rest ...)
+ (call-with-values (lambda () (maybe-extend-identifier environment id rest))
+ (lambda (_ tokens) (loop tokens))))
+ (`((whitespace ,_) ,rest ...)
+ (loop rest))
+ ((token rest ...)
+ (cons token (loop rest))))))
+
+;; returns a new environment
+;; handle body of #if
+;; environment, (list token) → environment
+(define (resolve-for-if environment tokens)
+ (-> (extend-environment environment defined-macro)
+ (resolve-token-stream tokens)
+ resolve-constant-expression
+ c-boolean->boolean
+ (if (enter-active-if environment)
+ (enter-inactive-if environment))))
+
+;; environment, string, (list token) → environment, (list token)
+(define (maybe-extend-identifier environment identifier remaining-tokens)
+ (cond ((get-identifier environment identifier)
+ => (lambda (value) (expand-macro (join-file-line environment) value remaining-tokens)))
+ (else ; It wasn't an identifier, leave it as is
+ (values environment remaining-tokens))))
+
+(define (resolve-and-include-header environment tokens)
+ (let loop ((%first-time #t) (tokens tokens))
+ (match (drop-while whitespace-token? tokens)
+ ((`(header-name (h-string ,str)) rest ...)
+ (cond ((remove whitespace-token? rest)
+ (negate null?)
+ => (lambda (tokens)
+ (scm-error 'cpp-error "resolve-and-include-header"
+ "Unexpected tokens after #include <>: ~s"
+ (list tokens) #f))))
+ (handle-preprocessing-tokens
+ environment
+ (-> str resolve-h-file read-file tokenize)))
+
+ ((`(header-name (q-string ,str)) rest ...)
+ (cond ((remove whitespace-token? rest)
+ (negate null?)
+ => (lambda (tokens)
+ (scm-error 'cpp-error "resolve-and-include-header"
+ "Unexpected tokens after #include <>: ~s"
+ (list tokens)
+ #f))))
+ (handle-preprocessing-tokens
+ environment
+ (-> str resolve-q-file read-file tokenize)))
+
+ (tokens
+ (unless %first-time
+ (scm-error 'cpp-error "resolve-and-include-header"
+ "Failed parsing tokens: ~s"
+ (list tokens) #f))
+ (loop #f (resolve-token-stream environment tokens))))))
+
+;; environment, tokens → environment
+(define (handle-line-directive environment tokens*)
+ (let loop ((%first-time #t) (tokens tokens*))
+ (match tokens
+ (`((preprocessing-token (pp-number ,line)) (whitespace ,_) ... rest ...)
+ (match rest
+ (`((preprocessing-token (string-literal ,file)) (whitespace ,_) ...)
+ (-> environment
+ (set current-line line)
+ (set current-file file)))
+ (`((whitespace ,_) ...)
+ (set environment current-line line))
+ (_ (unless %first-time
+ (scm-error 'cpp-error "handle-line-directive"
+ "Invalid line directive: ~s"
+ (list tokens*) #f))
+ (loop #f (resolve-token-stream environment tokens)))))
+ (_ (unless %first-time
+ (scm-error 'cpp-error "handle-line-directive"
+ "Invalid line directive: ~s"
+ (list tokens*) #f))
+ (loop #f (resolve-token-stream environment tokens))))))
+
+;; environment, tokens → environment
+(define (resolve-define environment tokens)
+ (match tokens
+ (`((preprocessing-token (identifier ,identifier)) tail ...)
+ (-> environment
+ bump-line
+ (add-identifier!
+ identifier
+ (if (equal? '(preprocessing-token (punctuator "(")) (car tail))
+ ;; function like macro
+ (call-with-values (lambda () (break (lambda (token) (equal? token '(preprocessing-token (punctuator ")"))))
+ (cdr tail)))
+ (lambda (identifier-list replacement-list)
+ (let ((variadic? identifiers (parse-identifier-list identifier-list)))
+ (function-like-macro
+ identifier: identifier
+ variadic?: variadic?
+ identifier-list: identifiers
+ ;; NOTE 6.10.3 states that there needs to be at least on whitespace here
+ body: (cdr replacement-list)))))
+
+ (object-like-macro
+ identifier: identifier
+ body: tail)))))))
+
+
+
+;; environment, tokens -> environment, tokens
+(define (handle-preprocessing-tokens environment tokens)
+ (let loop ((environment environment) (tokens tokens))
+ (define (err fmt . args)
+ (scm-error 'cpp-error "handle-preprocessing-tokens"
+ (string-append "~a:~a " fmt)
+ (cons* (current-file environment)
+ (current-line environment)
+ args)
+ #f))
+
+ (match tokens
+ ('() '())
+ (`((whitespace "\n") (whitespace ,_) ... (preprocessing-token (puntuator "#")) ,rest ...)
+ ;; Line tokens are those in this line,
+ ;; while remaining tokens are the newline, follewed by the rest of the files tokens
+ (let ((line-tokens remaining-tokens (tokens-until-eol rest)))
+ ;; Actual tokens just removes all whitespace between "#" and "define"
+ (let ((actual-tokens (drop-while whitespace-token? line-tokens)))
+ (if (null? actual-tokens)
+ (loop (bump-line environment) remaining-tokens)
+ (match (car actual-tokens)
+ (`(preprocessing-token (identifier "if"))
+ (let ((environment (resolve-for-if environment actual-tokens)))
+ (loop environment remaining-tokens)))
+
+ (`(preprocessing-token (identifier "ifdef"))
+ (match actual-tokens
+ (`((preprocessing-token (identifier ,id)) ,_ ...)
+ (loop
+ ((if (in-environment? environment id)
+ enter-active-if enter-inactive-if)
+ environment)
+ remaining-tokens))
+ (_ (err "Non identifier in ifdef: ~s" actual-tokens))))
+
+ (`(preprocessing-token (identifier "ifndef"))
+ (match actual-tokens
+ (`((preprocessing-token (identifier ,id)) ,_ ...)
+ (loop
+ ((if (in-environment? environment id)
+ enter-inactive-if enter-active-if)
+ environment)
+ remaining-tokens))
+ (_ (err "Non identifier in ifndef: ~s" actual-tokens))))
+
+ (`(preprocessing-token (identifier "else"))
+ ;; TODO
+ 'TODO
+ )
+
+ (`(preprocessing-token (identifier "elif"))
+ (-> environment leave-if
+ (resolve-for-if actual-tokens)
+ (loop remaining-tokens)))
+
+ (`(preprocessing-token (identifier "endif"))
+ (loop (leave-if environment) remaining-tokens))
+
+ (`(preprocessing-token (identifier "include"))
+ (call-with-values
+ (lambda () (resolve-and-include-header environment (cdr actual-tokens)))
+ (lambda (environment tokens)
+ (loop environment (append tokens remaining-tokens)))))
+
+ (`(preprocessing-token (identifier "define"))
+ (let ((env (resolve-define environment (cdr actual-tokens))))
+ (loop env remaining-tokens))
+ )
+
+ (`(preprocessing-token (identifier "undef"))
+ (loop (match actual-tokens
+ (`((preprocessing-token (identifier ,id)))
+ (-> environment bump-line (remove-identifier! id))))
+ remaining-tokens))
+
+ (`(preprocessing-token (identifier "line"))
+ (loop (handle-line-directive environment actual-tokens)
+ remaining-tokens))
+
+ (`(preprocessing-token (identifier "error"))
+ ;; NOTE this is an "expected" error
+ (throw 'cpp-error actual-tokens))
+
+ (`(preprocessing-token (identifier "pragma"))
+ (loop (handle-pragma environment actual-tokens)
+ remaining-tokens)))))))
+
+ (`((preprocessing-token (identifier ,id)) ,rest ...)
+ (call-with-values (lambda () (maybe-extend-identifier environment id rest))
+ loop))
+
+ (('(whitespace "\n") rest ...)
+ (cons '(whitespace "\n") (loop (bump-line environment) rest)))
+
+ ((token rest ...) (cons token (loop environment rest))))))
+
+
+
+(define (comment->whitespace expr)
+ (match expr
+ (('comment _) '(whitespace " "))
+ (other other)))
+
+(define (read-file path)
+ (call-with-input-file path (@ (ice-9 rdelim) read-string)))
+
+(define (comment->whitespace token)
+ (match token
+ (`(comment ,_) '(whitespace " "))
+ (other other)))
+
+(define (comments->whitespace tokens)
+ (map comment->whitespace tokens))
+
+;;; 5.1.11.2 Translation phases
+
+(define (tokenize string)
+ (-> string
+;;; 1. trigraph replacement
+ replace-trigraphs
+;;; 2. Line folding
+ fold-lines
+;;; 3. Decomposition into preprocenning tokens, whitespaces, and comments
+ lex
+;;; 4. Execution of preprocessing directives, all preprocessing directives are then deleted
+ comments->whitespace
+ ;; squeeze-whitespace-blocks
+ ))
diff --git a/module/c/trigraph.scm b/module/c/trigraph.scm
new file mode 100644
index 00000000..197e01a4
--- /dev/null
+++ b/module/c/trigraph.scm
@@ -0,0 +1,24 @@
+(define-module (c trigraph)
+ :use-module (ice-9 regex)
+ :export (replace-trigraphs))
+
+(define rx (make-regexp "\\?\\?([=\\(\\)'!<>/-])"))
+
+(define (proc m)
+ (case (string-ref (match:substring m 2) 0)
+ ((#\=) "#")
+ ((#\() "[")
+ ((#\)) "]")
+ ((#\') "^")
+ ((#\<) "{")
+ ((#\>) "}")
+ ((#\!) "|")
+ ((#\-) "~")
+ ((#\/) "\\")))
+
+(define (replace-trigraphs string)
+ (call-with-output-string
+ (lambda (port)
+ (regexp-substitute/global
+ port rx string
+ 'pre proc 'post))))
diff --git a/tests/test/cpp/lex2.scm b/tests/test/cpp/lex2.scm
new file mode 100644
index 00000000..0342e25c
--- /dev/null
+++ b/tests/test/cpp/lex2.scm
@@ -0,0 +1,64 @@
+(define-module (test cpp lex2)
+ :use-module (srfi srfi-64)
+ :use-module (srfi srfi-88)
+ :use-module (ice-9 peg)
+ :use-module (c lex2))
+
+
+(test-equal "Integer literal"
+ '(preprocessing-token (pp-number "10"))
+ (lex "10"))
+
+(test-equal "String literal"
+ '(preprocessing-token (string-literal "Hello"))
+ (lex "\"Hello\""))
+
+
+(test-equal "Mulitple tokens, including whitespace"
+ '((whitespace " ")
+ (preprocessing-token (pp-number "10"))
+ (whitespace " "))
+ (lex " 10 "))
+
+(test-equal "Char literal"
+ '(preprocessing-token (character-constant "a"))
+ (lex "'a'"))
+
+
+
+(test-equal "Comment inside string"
+ '(preprocessing-token (string-literal "Hel/*lo"))
+ (lex "\"Hel/*lo\""))
+
+(test-equal "#define line"
+ '((preprocessing-token (punctuator "#"))
+ (preprocessing-token (identifier "define"))
+ (whitespace " ")
+ (preprocessing-token (identifier "f"))
+ (preprocessing-token (punctuator "("))
+ (preprocessing-token (identifier "x"))
+ (preprocessing-token (punctuator ")"))
+ (whitespace " ")
+ (preprocessing-token (pp-number "10")))
+ (lex "#define f(x) 10"))
+
+
+
+(test-equal "Nested parenthesis"
+ '((preprocessing-token (identifier "f"))
+ (preprocessing-token (punctuator "("))
+ (preprocessing-token (pp-number "1"))
+ (preprocessing-token (punctuator ","))
+ (whitespace " ")
+ (preprocessing-token (punctuator "("))
+ (preprocessing-token (pp-number "2"))
+ (preprocessing-token (punctuator ","))
+ (whitespace " ")
+ (preprocessing-token (pp-number "3"))
+ (preprocessing-token (punctuator ")"))
+ (preprocessing-token (punctuator ","))
+ (whitespace " ")
+ (preprocessing-token (pp-number "4"))
+ (preprocessing-token (punctuator ")")))
+ (lex "f(1, (2, 3), 4)"))
+
diff --git a/tests/test/cpp/preprocessor2.scm b/tests/test/cpp/preprocessor2.scm
new file mode 100644
index 00000000..117b7e49
--- /dev/null
+++ b/tests/test/cpp/preprocessor2.scm
@@ -0,0 +1,29 @@
+(define-module (test cpp preprocessor2)
+ :use-module (srfi srfi-64)
+ :use-module (srfi srfi-88))
+
+
+
+(test-group "Tokens until End Of Line"
+ (call-with-values
+ (lambda ()
+ (tokens-until-eol
+ '(before (whitespace "\n") after)))
+ (lambda (bef aft)
+ (test-equal '(before) bef)
+ (test-equal '((whitespace "\n") after) aft))))
+
+
+
+(test-equal "Squeeze whitespace"
+ '(bef (whitespace " ") aft)
+ (squeeze-whitespace
+ '(bef
+ (whitespace a)
+ (whitespace b)
+ aft)))
+
+
+
+(test-equal "("
+ (stringify-token '(preprocessor-token (operator "("))))