23 files changed, 3882 insertions, 138 deletions
diff --git a/module/c/compiler.scm b/module/c/compiler.scm
new file mode 100644
index 00000000..09d49578
--- /dev/null
+++ b/module/c/compiler.scm
@@ -0,0 +1,64 @@
+(define-module (c compiler)
+  :use-module ((c lex2) :select (lex))
+  :use-module ((c trigraph) :select (replace-trigraphs))
+  :use-module ((c line-fold) :select (fold-lines))
+  :use-module (c cpp-environment object-like-macro)
+  :use-module ((c cpp-environment)
+               :select (make-environment
+                         extend-environment
+                         enter-file))
+  :use-module (hnh util)
+  ;; TODO importort
+  ;; handle-preprocessing-tokens
+  ;; load-and-tokenize-file
+  :export (run-compiler))
+
+"
+#define __STDC__ 1
+#define __STDC_HOSTED__ 1
+#define __STDC_VERSION__ 201112L
+"
+
+(define now (localtime (current-time)))
+(define default-macros
+  (list
+   ;; 6.10.8
+   (object-like-macro
+    identifier: "__STDC__"
+    body: (lex "1"))
+   (object-like-macro
+    identifier: "__STDC_HOSTED__"
+    body: (lex "1"))
+   (object-like-macro
+    identifier: "__STDC_VERSION__"
+    body: (lex "201112L"))
+   (object-like-macro
+    identifier: "__DATE__"
+    ;; TODO format should always be in
+    ;; english, and not tranlated
+    body: (lex (strftime "\"%b %_d %Y\"" now)))
+   (object-like-macro
+    identifier: "__TIME__"
+    body: (lex (strftime "\"%H:%M:%S\"" now)))))
+
+(define environment
+  (-> (make-environment)
+      (extend-environment default-macros)))
+
+
+
+;;; 5.1.11.2 Translation phases
+
+
+
+(define (run-compiler path)
+  (define environment (enter-file (make-environment) path))
+  (-> (load-and-tokenize-file path)
+      (handle-preprocessing-tokens environment))
+;;; 5. (something with character sets)
+;;; 6. concatenation of string literals
+;;; 7. Whitespace tokens are discarded, each preprocessing token is converted into a token
+  ;; 6.4 paragraph 2
+  ;; Each preprocessing toket thas is converted to a token shall have the lexcal form of a keyword, an identifier, a constant, a string literal, or a puncturtor
+;;; 8. All external objects and functions are resolved
+  )
diff --git a/module/c/cpp-environment.scm b/module/c/cpp-environment.scm
new file mode 100644
index 00000000..a6401e71
--- /dev/null
+++ b/module/c/cpp-environment.scm
@@ -0,0 +1,215 @@
+(define-module (c cpp-environment)
+  :use-module (srfi srfi-1)
+  :use-module (srfi srfi-88)
+  :use-module (ice-9 hash-table)
+  :use-module (hnh util object)
+  :use-module (hnh util type)
+  :use-module (hnh util lens)
+  :use-module ((c cpp-environment function-like-macro) :prefix #{fun:}#)
+  :use-module ((c cpp-environment object-like-macro)   :prefix #{obj:}#)
+  :use-module ((c cpp-environment internal-macro)      :prefix #{int:}#)
+  :use-module ((c unlex) :select (unlex))
+  :export (
+
+           macro-identifier
+           macro-body
+           macro-identifier-list
+           macro-variadic?
+           cpp-macro?
+           ;; pprint-macro
+
+           enter-active-if
+           enter-inactive-if
+           flip-flop-if
+           leave-if
+           in-conditional/active?
+           in-conditional/inactive?
+           in-conditional?
+
+           enter-file
+           leave-file
+           bump-line
+           current-line
+           current-file
+
+           function-macro?
+           object-macro?
+           internal-macro?
+
+           cpp-environment
+           cpp-environment?
+           cpp-if-status
+           ;; cpp-variables
+           cpp-file-stack
+
+           make-environment in-environment?
+           remove-identifier add-identifier
+           get-identifier
+           extend-environment
+           disjoin-macro
+
+           pprint-environment
+           pprint-macro
+           ))
+
+(define (macro-identifier x)
+  (define identifier
+   (cond ((obj:object-like-macro? x)   obj:identifier)
+         ((fun:function-like-macro? x) fun:identifier)
+         ((int:internal-macro? x)      int:identifier)
+         (else (scm-error 'wrong-type-arg "macro-identifier"
+                          "Not a macro: ~s"
+                          (list x) #f))))
+  (identifier x))
+
+
+(define (macro-body-proc macro)
+  (cond ((obj:object-like-macro? macro)   obj:body)
+        ((fun:function-like-macro? macro) fun:body)
+        ((int:internal-macro? macro)      int:body)
+        (else (scm-error 'wrong-type-arg "macro-body"
+                         "Not a macro: ~s"
+                         (list macro) #f))))
+
+(define macro-body
+  (case-lambda ((macro)       ((macro-body-proc macro) macro))
+               ((macro value) ((macro-body-proc macro) macro value))))
+
+(define macro-identifier-list fun:identifier-list)
+(define macro-variadic? fun:variadic?)
+
+(define function-macro? fun:function-like-macro?)
+(define object-macro?   obj:object-like-macro?)
+(define internal-macro? int:internal-macro?)
+
+(define (cpp-macro? x)
+  (or (obj:object-like-macro? x)
+      (fun:function-like-macro? x)
+      (int:internal-macro? x)))
+
+
+
+
+(define-type (cpp-environment)
+  (cpp-if-status type: (list-of (memv '(outside active-if inactive-if)))
+                 default: '(outside))
+  ;; not exported since type signatures don't hold inside the hash table
+  ;; TODO replace hash table with something that doesn't require copying the
+  ;; entire structure every time
+  (cpp-variables type: hash-table? default: (make-hash-table))
+  (cpp-file-stack type: (and (not null?)
+                             (list-of (pair-of string? exact-integer?)))
+                  default: '(("*outside*" . 1))))
+
+
+
+
+(define (enter-active-if environment)
+  (modify environment cpp-if-status xcons 'active-if))
+
+(define (enter-inactive-if environment)
+  (modify environment cpp-if-status xcons 'inactive-if))
+
+;; for #else
+(define (flip-flop-if environment)
+  ((if (in-conditional/inactive? environment)
+       enter-active-if
+       enter-inactive-if)
+   (leave-if environment)))
+
+(define (leave-if environment)
+  (modify environment cpp-if-status cdr))
+
+(define (in-conditional/inactive? environment)
+  (eq? 'inactive-if (get environment cpp-if-status car*)))
+
+(define (in-conditional/active? environment)
+  (eq? 'active-if (get environment cpp-if-status car*)))
+
+(define (in-conditional? environment)
+  (or (in-conditional/inactive? environment)
+      (in-conditional/active? environment)))
+
+
+
+(define (enter-file environment filename)
+  (modify environment cpp-file-stack xcons (cons filename 1)))
+
+(define (leave-file environment)
+  (modify environment cpp-file-stack cdr))
+
+(define current-line (compose-lenses cpp-file-stack car* cdr*))
+
+(define current-file (compose-lenses cpp-file-stack car* car*))
+
+(define* (bump-line environment optional: (count 1))
+  (modify environment current-line + count))
+
+
+
+
+(define (make-environment) (cpp-environment))
+
+(define (clone-hash-table ht)
+  (alist->hash-table (hash-map->list cons ht)))
+
+(define (clone-environment environment)
+  (modify environment cpp-variables clone-hash-table))
+
+(define (in-environment? environment key)
+  (hash-get-handle (cpp-variables environment) key))
+
+(define (remove-identifier environment key)
+  (typecheck key string?)
+
+  (let ((environment (clone-environment environment)))
+    (hash-remove! (cpp-variables environment) key)
+    environment))
+
+(define (add-identifier environment key value)
+  (typecheck key string?)
+  (typecheck value cpp-macro?)
+
+  (let ((environment (clone-environment environment)))
+    (hash-set! (cpp-variables environment) key value)
+    environment))
+
+(define (get-identifier environment key)
+  (hash-ref (cpp-variables environment) key))
+
+
+(define (extend-environment environment macros)
+  (typecheck macros (list-of cpp-macro?))
+  (fold (lambda (m env) (add-identifier env (macro-identifier m) m))
+        environment macros))
+
+(define (disjoin-macro environment name)
+  (typecheck name string?)
+  (remove-identifier environment name))
+
+
+
+
+(define* (pprint-environment environment optional: (port (current-error-port)))
+  (display "== Environment ==\n")
+  (hash-for-each (lambda (key macro)
+                   (pprint-macro macro port)
+                   (newline port))
+                 (cpp-variables environment)))
+
+(define* (pprint-macro x optional: (p (current-output-port)))
+  (cond ((internal-macro? x)
+         (format p "/* ~a INTERNAL MACRO */"
+                 (macro-identifier x)))
+        ((object-macro? x)
+         (format p "#define ~a ~a"
+                 (macro-identifier x)
+                 (unlex (macro-body x))))
+        ((function-macro? x)
+         (format p "#define ~a(~a) ~a"
+                 (macro-identifier x)
+                 (string-join (append (macro-identifier-list x)
+                                      (if (macro-variadic? x)
+                                          '("...") '()))
+                              "," 'infix)
+                 (unlex (macro-body x))))))
diff --git a/module/c/cpp-environment/function-like-macro.scm b/module/c/cpp-environment/function-like-macro.scm
new file mode 100644
index 00000000..59b47c9c
--- /dev/null
+++ b/module/c/cpp-environment/function-like-macro.scm
@@ -0,0 +1,25 @@
+(define-module (c cpp-environment function-like-macro)
+  :use-module (hnh util object)
+  :use-module (hnh util type)
+  :use-module ((c lex2) :select (lexeme?))
+  :use-module ((c unlex) :select (unlex))
+  :export (function-like-macro
+           function-like-macro?
+           identifier
+           identifier-list
+           body
+           variadic?))
+
+(define-type (function-like-macro
+              printer: (lambda (r p)
+                         (format p "#<#define ~a~a ~a>"
+                                 (identifier r)
+                                 (append (identifier-list r)
+                                         (if (variadic? r)
+                                             '("...") '()))
+                                 (unlex (body r)))))
+  (identifier type: string?)
+  (identifier-list type: (list-of string?))
+  (body type: (list-of lexeme?))
+  (variadic? type: boolean?
+             default: #f))
diff --git a/module/c/cpp-environment/internal-macro.scm b/module/c/cpp-environment/internal-macro.scm
new file mode 100644
index 00000000..3c946738
--- /dev/null
+++ b/module/c/cpp-environment/internal-macro.scm
@@ -0,0 +1,11 @@
+(define-module (c cpp-environment internal-macro)
+  :use-module (hnh util object)
+  :export (internal-macro
+           internal-macro?
+           identifier body))
+
+(define-type (internal-macro)
+  (identifier type: string?)
+  (body type: procedure?
+        ;; Arity 2
+        ))
diff --git a/module/c/cpp-environment/object-like-macro.scm b/module/c/cpp-environment/object-like-macro.scm
new file mode 100644
index 00000000..90a3ad23
--- /dev/null
+++ b/module/c/cpp-environment/object-like-macro.scm
@@ -0,0 +1,18 @@
+(define-module (c cpp-environment object-like-macro)
+  :use-module (hnh util object)
+  :use-module (hnh util type)
+  :use-module ((c lex2) :select (lexeme?))
+  :use-module ((c unlex) :select (unlex))
+  :export (object-like-macro
+           object-like-macro?
+           identifier
+           body))
+
+
+(define-type (object-like-macro
+              printer: (lambda (r p)
+                         (format p "#<#define ~a ~a>"
+                                 (identifier r)
+                                 (unlex (body r)))))
+  (identifier type: string?)
+  (body type: (list-of lexeme?)))
diff --git a/module/c/cpp-types.scm b/module/c/cpp-types.scm
new file mode 100644
index 00000000..6dad061e
--- /dev/null
+++ b/module/c/cpp-types.scm
@@ -0,0 +1,94 @@
+(define-module (c cpp-types)
+  :use-module (c lex2)
+  :use-module (ice-9 match)
+  :use-module (c cpp-util)
+  :use-module (hnh util type)
+  :export (whitespace-token?
+           comment-token?
+           preprocessing-token?
+           other-token?
+           placemaker-token?
+           newline-token?
+           identifier-token?
+           punctuator-token?
+           pp-number?
+           string-token?
+           h-string-token?
+           q-string-token?
+           character-constant?
+           comment->whitespace
+           comments->whitespace
+           make-string-literal
+           ))
+
+(define (whitespace-token? x)
+  (and (lexeme? x)
+       (eq? 'whitespace (lexeme-type x))))
+
+(define (comment-token? x)
+  (and (lexeme? x)
+       (eq? 'comment (lexeme-type x))))
+
+(define (preprocessing-token? x)
+  (and (lexeme? x)
+       (eq? 'preprocessing-token (lexeme-type x))))
+
+(define (other-token? x)
+  (and (lexeme? x)
+       (eq? 'other (lexeme-type x))))
+
+(define (placemaker-token? x)
+  (and (lexeme? x)
+       (eq? 'placemaker (lexeme-type x))))
+
+(define (newline-token? x)
+  (and (whitespace-token? x)
+       (string=? "\n" (lexeme-body x))))
+
+(define (identifier-token? token)
+  (and (preprocessing-token? token)
+       (match (lexeme-body token)
+         (`(identifier ,id) id)
+         (_ #f))))
+
+(define (punctuator-token? token)
+  (and (preprocessing-token? token)
+       (match (lexeme-body token)
+         (`(punctuator ,x) x)
+         (_ #f))))
+
+(define (pp-number? token)
+  (and (preprocessing-token? token)
+       (match (lexeme-body token)
+         (`(pp-number ,x) x)
+         (_ #f))))
+
+;; TODO rename to string-literal-token?
+(define (string-token? token)
+  (and (preprocessing-token? token)
+       (match (lexeme-body token)
+         (('string-literal x ...) (apply values x))
+         (_ #f))))
+
+(define (character-constant? token)
+  (and (preprocessing-token? token)
+       (match (lexeme-body token)
+         (('character-constant x ...) (apply values x))
+         (_ #f))))
+
+
+(define (h-string-token? token)
+  (and (preprocessing-token? token)
+       (match (lexeme-body token)
+         (`(header-name (h-string ,x)) x)
+         (_ #f))))
+
+;; NOTE q-string tokens are never produced by the lexer,
+;; since they instead are treated as regular strings
+(define (q-string-token? token)
+  (string-token? token))
+
+(define (make-string-literal parts)
+  (typecheck parts (list-of (or string? list?)))
+  (lexeme type: 'preprocessing-token
+          body: (cons 'string-literal parts)))
diff --git a/module/c/cpp-util.scm b/module/c/cpp-util.scm
new file mode 100644
index 00000000..9674317b
--- /dev/null
+++ b/module/c/cpp-util.scm
@@ -0,0 +1,130 @@
+(define-module (c cpp-util)
+  :use-module ((srfi srfi-1) :select (drop-while break))
+  :use-module (srfi srfi-71)
+  :use-module ((hnh util) :select (->))
+  :use-module (hnh util type)
+  :use-module ((hnh util lens) :select (modify ref))
+  :use-module ((c lex2) :select (lex lexeme?))
+  :use-module ((c unlex) :select (unlex))
+  :use-module (c cpp-types)
+  :export (tokens-until-eol
+           tokens-until-cpp-directive
+           next-token-matches?
+           squeeze-whitespace
+           drop-whitespace
+           drop-whitespace-right
+           drop-whitespace-both
+           cleanup-whitespace
+           concatenate-tokens
+           merge-string-literals))
+
+
+;; Does the next non-whitespace token in the stream satisfy the predicate?
+(define (next-token-matches? predicate tokens)
+  (let ((tokens (drop-whitespace tokens)))
+    (if (null? tokens)
+        #f
+        (predicate (car tokens)))))
+
+(define (next-token-matches/line? predicate tokens)
+  (let ((tokens (drop-whitespace/line tokens)))
+    (if (null? tokens)
+        #f
+        (predicate (car tokens)))))
+
+;; Returns two values:
+;; - tokens until a newline token is met
+;; - (potentially the newline token) and the remaining tokens
+(define (tokens-until-eol tokens)
+  ;; (typecheck tokens (list-of lexeme?))
+  (break newline-token? tokens))
+
+;; call predicate with the remaining token stream, until we run out of token, or
+;; predicate matches
+(define (break-lexemes predicate lex-list)
+  (let loop ((rem lex-list) (done '()))
+    (cond ((null? rem)     (values (reverse done) '()))
+          ((predicate rem) (values (reverse done) rem))
+          (else (loop (cdr rem) (cons (car rem) done))))))
+
+;; Finds the next instance of "\n#" (possibly with inbetween whitespace)
+;; and return the values before and after (inclusive)
+(define (tokens-until-cpp-directive tokens)
+  (break-lexemes
+   (lambda (tokens)
+     (and (newline-token? (car tokens))
+          (next-token-matches/line?
+           (lambda (token) (equal? "#" (punctuator-token? token)))
+           (cdr tokens))))
+   tokens))
+
+;; Replace all whitespace with single spaces.
+(define (squeeze-whitespace tokens)
+  (cond ((null? tokens) '())
+        ((null? (cdr tokens))
+         (list
+          (if (whitespace-token? (car tokens))
+              (car (lex " "))
+              (car tokens))))
+        ((and (whitespace-token? (car tokens))
+              (whitespace-token? (cadr tokens)))
+         (squeeze-whitespace (cons (car (lex " "))
+                                   (cddr tokens))))
+        (else (cons (car tokens)
+                    (squeeze-whitespace (cdr tokens))))))
+
+;; Drop leading whitespace tokens
+(define (drop-whitespace tokens)
+  ;; (typecheck tokens (list-of lexeme?))
+  (drop-while whitespace-token? tokens))
+
+(define (drop-whitespace/line tokens)
+  ;; (typecheck tokens (list-of lexeme?))
+  (drop-while (lambda (t)
+                (and (whitespace-token? t)
+                     (not (newline-token? t))))
+              tokens))
+
+(define (drop-whitespace-right tokens)
+  ;; (typecheck tokens (list-of lexeme?))
+  (-> tokens reverse drop-whitespace reverse))
+
+(define (drop-whitespace-both tokens)
+  ;; (typecheck tokens (list-of lexeme?))
+  (-> tokens
+      drop-whitespace
+      drop-whitespace-right))
+
+;; helper procedure to parse-parameter-list.
+;; If a parameter is complex then whitespace is kept, but squeezed to single spaces. Surounding whitespace is removed.
+;; Example:
+;; #define str(x, y) #y
+;; str(x,   (   2,     4    )     )
+;; expands to:
+;; "( 2, 4 )"
+;; 6.10.3.2 p 2
+(define (cleanup-whitespace tokens)
+  ;; (typecheck tokens (list-of lexeme?))
+  (-> tokens drop-whitespace-both squeeze-whitespace))
+
+(define (concatenate-tokens a b)
+  (car (lex (string-append (unlex (list a))
+                           (unlex (list b))))))
+
+
+(define (merge-string-literals tokens)
+  (cond ((null? tokens) '())
+        ((null? (cdr tokens)) tokens)
+        ((string-token? (car tokens))
+         (lambda (a . _) a)
+         => (lambda (prefix-a . parts-a)
+              (cond ((string-token? (cadr tokens))
+                     (lambda (a . _) a)
+                     => (lambda (prefix-b . parts-b)
+                          (merge-string-literals
+                           ;; TODO check validity of prefixes
+                           (cons (make-string-literal (cons prefix-a (append parts-a parts-b)))
+                                 (cddr tokens)))))
+                    (else (cons (car tokens)
+                                (merge-string-literals (cdr tokens)))))))
+        (else (cons (car tokens) (merge-string-literals (cdr tokens))))))
diff --git a/module/c/cpp.scm b/module/c/cpp.scm
index a2935352..aed496f2 100644
--- a/module/c/cpp.scm
+++ b/module/c/cpp.scm
@@ -5,47 +5,37 @@
   :use-module (ice-9 match)
   :use-module (ice-9 regex)
   :use-module ((rnrs io ports) :select (call-with-port))
+  :use-module ((rnrs bytevectors) :select (bytevector?))
   :use-module (ice-9 format)
   :use-module ((hnh util io) :select (read-lines))
   :use-module (hnh util graph)
   :use-module (c lex)
   :use-module (c parse)
   :use-module (c operators)
-  :export (do-funcall replace-symbols include#)
+  :export (replace-symbols include#)
   )
 
 
 ;; input "#define F(x, y) x + y"
-;; 1 full define | F(x, y)
+;; 1 full define | F(x,y)
 ;; 2 macro name  | F
-;; 3 macro args  | (x, y)
-;; 4 macro body  | x + y
-(define define-re (make-regexp "^#define ((\\w+)(\\([^)]*\\))?) (.*)"))
+;; 3 macro args  | (x,y)
+;; 5 macro body  | x + y or #f
+(define define-re (make-regexp "^#define ((\\w+)([(][^)]*[)])?)( (.*))?"))
 
 (define (tokenize-define-line header-line)
   (aif (regexp-exec define-re header-line)
     (cons (match:substring it 1)
-          (match:substring it 4))
+          (let ((body (match:substring it 5)))
+            (if (or (eqv? body #f)
+                    (string-null? body))
+                "1" body)))
     (scm-error 'c-parse-error
                "tokenize-define-line"
                "Line dosen't match: ~s"
                (list header-line) #f)))
 
 
-(define (do-funcall function arguments)
-  (if (list? arguments)
-      (apply function arguments)
-      (function arguments)))
-
-(define symb-map
-  `((,(symbol #\|) . logior)
-    (funcall . (@ (c cpp) do-funcall))
-    (&& . and)
-    (& . logand)
-    (== . =)
-    (!= . (negate =))
-    ))
-
 (define (replace-symbols tree dict)
   (if (not (list? tree))
       (or (assoc-ref dict tree) tree)
@@ -55,12 +45,27 @@
 ;; Direct values. Lisp also has quoted symbols in this group.
 (define (immediate? x)
   (or (number? x)
-      (char? x)
-      (string? x)))
+      (bytevector? x)))
 
+;; TODO replace this with something sensible
+;; like a correct list extracted from (c eval)
+;; and not thinging that types are variables
 ;; built in symbols. Should never be marked as dependencies
 (define (primitive? x)
-  (memv x (cons 'funcall binary-operators)))
+  (memv x `(
+            ;; language primitives
+            sizeof
+
+            ;; special forms introduced by parser
+            funcall ternary struct-type as-type
+
+            ;; unary operatons which aren't also binary operators
+            ++ -- ! ~
+            not compl dereference pointer
+            pre-increment pre-decrement
+            post-increment post-decrement
+            ,@binary-operators
+            )))
 
 
 
@@ -77,7 +82,6 @@
       [arg (list arg)]))
 
   (define right (f (cdr pair)))
-  (define alt-right (replace-symbols right symb-map))
   (define dependencies
     (lset-difference
      eq?
@@ -91,12 +95,12 @@
    dependencies
    (match left
      [('funcall name ('#{,}# args ...))
-      (cons name `(lambda ,args ,alt-right))]
+      (cons name `(lambda ,args ,right))]
 
      [('funcall name arg)
-      (cons name `(lambda (,arg) ,alt-right))]
+      (cons name `(lambda (,arg) ,right))]
 
-     [name (cons name alt-right)])))
+     [name (cons name right)])))
 
 
 (define (parse-cpp-file lines)
@@ -104,7 +108,9 @@
          (catch #t
            (lambda () (parse-cpp-define line))
            (lambda (err caller fmt args data)
-             (format #t "~a ~?~%" fmt args)
+             (format #t "~a in ~a: ~?~%"
+                     err caller fmt args)
+             (format #t "~s~%" line)
              #f)))
        lines))
 
@@ -114,29 +120,32 @@
 (define (tokenize-header-file header-file)
   (map tokenize-define-line
        (call-with-port
-        (open-input-pipe
-         (string-append "cpp -dM " header-file))
+        (open-pipe* OPEN_READ "cpp" "-dM" header-file)
         read-lines)))
 
-(define-macro (include# header-file . args)
-
-  (define define-form (if (null? args) 'define (car args)))
-
-  (define lines (remove (compose private-c-symbol? car)
-                        (tokenize-header-file header-file)))
+(define (load-cpp-file header-file)
 
+  (define lines (tokenize-header-file header-file))
   (define forms (parse-cpp-file lines))
 
-  (define graph*
-    (fold (lambda (node graph)
-            (add-node graph (cdr node) (car node)))
-          (make-graph car)
-          (filter identity forms)))
+  (fold (lambda (node graph)
+          (add-node graph (cdr node) (car node)))
+        (make-graph car)
+        (filter identity forms)))
 
+(define (include% header-file)
+  (define graph* (load-cpp-file header-file))
   ;; Hack for termios since this symbol isn't defined.
   ;; (including in the above removed private c symbols)
-  (define graph (add-node graph* (cons '_POSIX_VDISABLE #f) '()))
+  (define graph (add-node graph* (cons '_POSIX_VDISABLE 0) '()))
+  ;; TODO expand bodies
+  ;; (remove (compose private-c-symbol? car))
+  (resolve-dependency-graph graph))
+
+(define-macro (include# header-file . args)
+
+  (define define-form (if (null? args) 'define (car args)))
 
   `(begin
      ,@(map (lambda (pair) `(,define-form ,(car pair) ,(cdr pair)))
-            (resolve-dependency-graph graph))))
+            (include% header-file))))
diff --git a/module/c/eval.scm b/module/c/eval.scm
new file mode 100644
index 00000000..67d0075d
--- /dev/null
+++ b/module/c/eval.scm
@@ -0,0 +1,265 @@
+(define-module (c eval)
+  :use-module (hnh util)
+  :use-module (srfi srfi-1)
+  :use-module (srfi srfi-71)
+  :use-module (srfi srfi-88)
+  :use-module (ice-9 match)
+  :use-module (ice-9 curried-definitions)
+  :use-module ((rnrs bytevectors)
+               :select (bytevector?))
+  :use-module ((rnrs arithmetic bitwise)
+               :select (bitwise-not
+                        bitwise-and
+                        bitwise-ior
+                        bitwise-xor
+                        bitwise-arithmetic-shift-left
+                        bitwise-arithmetic-shift-right))
+  :use-module (c eval environment)
+  :export (c-procedure?
+           procedure-formals
+           procedure-body
+           procedure-arity
+
+           c-eval
+           ))
+
+(define C-TRUE 1)
+(define C-FALSE 0)
+
+(define (boolean->c-boolean bool)
+  (if bool C-TRUE C-FALSE))
+
+(define (c-boolean->boolean bool)
+  (not (zero? bool)))
+
+(define (c-not b)
+  (-> b c-boolean->boolean not boolean->c-boolean))
+
+(define (c-procedure? expr)
+  (and (list? expr)
+       (not (null? expr))
+       (eq? 'lambda (car expr))))
+
+(define* (ensure-c-procedure expr optional: calling-procedure)
+  (unless (c-procedure? expr)
+    (scm-error 'c-eval-error calling-procedure
+               "Value not a procedure: ~s"
+               (list procedure #f))))
+
+(define (procedure-formals procedure)
+  (ensure-c-procedure procedure "procedure-formals")
+  (list-ref procedure 1))
+
+(define (procedure-body procedure)
+  (ensure-c-procedure procedure "procedure-body")
+  (list-ref procedure 2))
+
+(define (procedure-arity procedure)
+  (length (procedure-formals procedure)))
+
+(define (literal? expression)
+  (or (number? expression)
+      (bytevector? expression)))
+
+
+
+;; Internal helper procedures
+
+(define (mod-set operation)
+  (lambda (env slot value)
+    ;; a += b
+    ;; a = a + b
+    (c-eval env `(= ,slot (,operation ,slot ,value)))))
+
+(define (fold-2 proc init lst)
+  (car+cdr
+   (fold (lambda (arg env+done)
+           (let ((env* arg* (proc (car env+done) arg)))
+             (cons* env* arg* (cdr env+done))))
+         init
+         lst)))
+
+;; TODO this disregards
+;; - floating point convertions
+;; - integer truncation
+(define ((simple-infix operation) env . operands)
+  (let ((env done (fold-2 c-eval (cons env '()) operands)))
+    (values env (apply operation (reverse done)))))
+
+(define ((binary-operator proc) env i c)
+  (let ((env* i* (c-eval env i)))
+    (let ((env** c* (c-eval env* c)))
+      (values env** (proc i* c*)))))
+
+
+
+
+;; The order of evaluation for a number of these is undefined, meaning
+;; that any side effects without sequence points is undefined.
+;; However, for many of these I do the sensible thing and evaluate them
+;; from left to right, instead of ignoring all side effects.
+
+;; TODO double check these with a C standard
+
+
+;; Operators have their own namespace. They are called without funcall
+;; in the pseudo-lisp which C is compiled to, and they expand more like
+;; lisp macros, since its up to each operator what to do with its operands.
+;; This to allow assignment and short circuting.
+(define primitives
+  `((and . ,(lambda (env . operands)
+              (let loop ((env env) (operands operands))
+                (if (null? operands)
+                    (values env C-TRUE)
+                    (let* ((env* result (c-eval env (car operands))))
+                      (if (c-boolean->boolean result)
+                          (loop env* (cdr operands))
+                          (values env* result)))))))
+    (or . ,(lambda (env . operands)
+             (let loop ((env env) (operands operands))
+               (if (null? operands)
+                   (values env C-FALSE)
+                   (let* ((env* result (c-eval env (car operands))))
+                     (if (false? result)
+                         (values env* result)
+                         (loop env* (cdr operands))))))))
+    (= . ,(lambda (env slot value)
+            ;; TOOD if slot isn't a variable, but a field (or array index)
+            ;; then it needs to be resolved...
+            (let ((env* result (c-eval env value)))
+              (values (env-set! env* slot result)
+                      result))))
+    (and_eq ,(mod-set 'bitand))         ; &=
+    (or_eq  ,(mod-set 'bitor))          ; |=
+    (xor_eq ,(mod-set 'xor))            ; ^=
+    (+=  ,(mod-set '+))
+    (-=  ,(mod-set '-))
+    (*=  ,(mod-set '*))
+    (/=  ,(mod-set '/))
+    (<<= ,(mod-set '<<))
+    (>>= ,(mod-set '>>))
+    (%=  ,(mod-set '%))
+    (+ . ,(simple-infix +))
+    (* . ,(simple-infix *))
+    (/ . ,(simple-infix /))
+    (- . ,(lambda (env op . operands)
+            (if (null? operands)
+                (let ((env* value (c-eval env op)))
+                  (values env* (- value)))
+                (apply (simple-infix -)
+                       env op operands))))
+    (bitor  . ,(simple-infix bitwise-ior))
+    (bitand . ,(simple-infix bitwise-and))
+    (xor    . ,(simple-infix bitwise-xor))
+    (not_eq . ,(lambda (env a b) (c-eval env `(not (== ,a ,b))))) ; !=
+    (<< . ,(binary-operator bitwise-arithmetic-shift-left))
+    (>> . ,(binary-operator bitwise-arithmetic-shift-right))
+    (< . ,(binary-operator (compose boolean->c-boolean <)))
+    (> . ,(binary-operator (compose boolean->c-boolean >)))
+    ;; this assumes that = handles pointers
+    (== . ,(binary-operator (compose boolean->c-boolean =)))
+    (<= . ,(binary-operator (compose boolean->c-boolean <=)))
+    (>= . ,(binary-operator (compose boolean->c-boolean >=)))
+    (% .  ,(binary-operator modulo))
+
+    (not . ,(lambda (env value)
+              (let ((env* result (c-eval env value)))
+                (values env* (c-not result)))))
+    (compl . ,(lambda (env value)
+                (let ((env* result (c-eval env value)))
+                  (values env* (bitwise-not result)))))
+
+    ;; ++C
+    (pre-increment . ,(lambda (env slot) (c-eval env `(+= ,slot 1))))
+    (pre-decrement . ,(lambda (env slot) (c-eval env `(-= ,slot 1))))
+    ;; TODO these (C++, C--) need to handle if slot isn't a direct variabl
+    (post-increment . ,(lambda (env slot)
+                         (let ((value (env-ref env slot)))
+                           (values (env-set! env slot (1+ value))
+                                   value))))
+    (pre-decrement . ,(lambda (env slot)
+                        (let ((value (env-ref env slot)))
+                          (values (env-set! env slot (1+ value))
+                                  value))))
+
+    (ternary . ,(lambda (env test true-clause false-clause)
+                  (let ((env* value (c-eval env test)))
+                    (c-eval env*
+                            (if (c-boolean->boolean value)
+                                true-clause false-clause)))))
+
+    ;; TODO remaining operations
+    (as-type . ,(lambda (env target-type value)
+                  (format (current-error-port) "cast<~s>(~s)~%" target-type value)
+                  (values env value)))
+
+    (object-slot . ,(lambda (env object slot)
+                      (scm-error 'not-implemented "object-slot"
+                                 "Object slots are not implemented, when accessing ~s.~s"
+                                 (list object slot) #f)))
+    (dereference-slot ,(lambda (env ptr slot)
+                          (scm-error 'not-implemented "dereference-slot"
+                                 "Object slots are not implemented, when accessing ~s->~s"
+                                 (list object slot) #f)))
+    (dereference . ,(lambda (env ptr)
+                      (scm-error 'not-implemented "dereference"
+                                 "Poiner dereferencing is not implemented: *~s"
+                                 (list ptr) #f)))
+    (pointer . ,(lambda (env value)
+                  (scm-error 'not-implemented "pointer"
+                             "Pointer of is not implemented: &~s"
+                             (list value) #f)))))
+
+;; TODO |,|
+
+
+(define (c-eval environment expression)
+  (match expression
+    (('lambda formals body) (values environment `(lambda ,formals ,body)))
+    ;; hack since sizeof really should be a operator
+    (('funcall 'sizeof arg)
+     ;; TODO
+     (format (current-error-port) "sizeof ~s~%" arg)
+     (values environment 1))
+
+    (('funcall procedure-name ('#{,}# args ...))
+     (let ((procedure (env-ref environment procedure-name)))
+       (ensure-c-procedure procedure "c-eval")
+       (unless (= (length args) (procedure-arity procedure))
+         (scm-error 'c-eval-error "c-eval"
+                    "Procedure arity mismatch for ~s, expected ~s, got ~s"
+                    (list procedure-name
+                          (procedure-arity procedure)
+                          (length args))
+                    #f))
+       (let ((env args* (fold-2 c-eval (cons environment '()) args )))
+         (let ((inner-environment
+                (fold (lambda (name value env) (env-set! env name value))
+                      (push-frame! env)
+                      (procedure-formals procedure) args*)))
+           (let ((resulting-environment
+                  result-value
+                  (c-eval inner-environment (procedure-body procedure))))
+             (values (pop-frame! resulting-environment)
+                     result-value))))))
+
+    (('funcall procedure arg)
+     (c-eval environment `(funcall ,procedure (#{,}# ,arg))))
+
+    ((operator operands ...)
+     (apply (or (assoc-ref primitives operator)
+                (scm-error 'c-eval-error "c-eval"
+                           "Applying non-existant primitive operator: ~s, operands: ~s"
+                           (list operator operands) #f))
+            environment operands))
+
+    ;; "f()" gets compiled to simply f
+    ;; meaning that we instead use the environment to determine
+    ;; if something is a variable or procedure
+    (expr
+     (if (literal? expr)
+         (values environment expr)
+         (let ((value (env-ref environment expr)))
+           (if (c-procedure? value)
+               (c-eval environment `(funcall ,value (#{,}#)))
+               (values environment value)))))))
diff --git a/module/c/eval/environment.scm b/module/c/eval/environment.scm
new file mode 100644
index 00000000..12eefaf7
--- /dev/null
+++ b/module/c/eval/environment.scm
@@ -0,0 +1,34 @@
+(define-module (c eval environment)
+  :use-module (srfi srfi-1)
+  :export (make-environment
+           env-set! env-ref push-frame! pop-frame!))
+
+(define (make-frame)
+  (make-hash-table))
+
+(define (make-environment)
+  (list (make-frame)))
+
+;; Returns an updated environment, linear update
+(define (env-set! env key value)
+  ;; get handle to differentiate #f
+  ;; (even though #f should never be stored since it's not a C value)
+  (cond ((find (lambda (frame) (hashq-get-handle frame key)) env)
+         => (lambda (frame) (hashq-set! frame key value)))
+        (else (hashq-set! (car env) key value)))
+  env)
+
+(define (env-ref env key)
+  (cond ((null? env)
+         (scm-error 'misc-error "env-ref"
+                    "~s unbound"
+                    (list key)
+                    #f))
+        ((hashq-get-handle (car env) key) => cdr)
+        (else (env-ref (cdr env) key))))
+
+(define (push-frame! environment)
+  (cons (make-frame) environment))
+
+(define (pop-frame! environment)
+  (cdr environment))
diff --git a/module/c/eval2.scm b/module/c/eval2.scm
new file mode 100644
index 00000000..d58f20bf
--- /dev/null
+++ b/module/c/eval2.scm
@@ -0,0 +1,20 @@
+(define-module (c eval2)
+  :use-module ((hnh util) :select (->))
+  :export (C-TRUE
+           C-FALSE
+           boolean->c-boolean
+           c-boolean->boolean
+           c-not))
+
+
+(define C-TRUE 1)
+(define C-FALSE 0)
+
+(define (boolean->c-boolean bool)
+  (if bool C-TRUE C-FALSE))
+
+(define (c-boolean->boolean bool)
+  (not (zero? bool)))
+
+(define (c-not b)
+  (-> b c-boolean->boolean not boolean->c-boolean))
diff --git a/module/c/lex.scm b/module/c/lex.scm
index 34e52d88..0bde5c9e 100644
--- a/module/c/lex.scm
+++ b/module/c/lex.scm
@@ -43,8 +43,23 @@
 (define-peg-pattern integer all
   (and (or base-8 base-16 base-10) (? integer-suffix)))
 
+(define-peg-pattern float-suffix all
+  (* (or "f" "F" "l" "L")))
+
+(define-peg-pattern exponent all
+  (and (ignore (or "e" "E")) (? (or "+" "-")) base-10))
+
+;; Helper patterns for creating named groups in float
+(define-peg-pattern float-integer all base-10)
+(define-peg-pattern float-decimal all base-10)
+
+(define-peg-pattern float all
+  (or (and    float-integer                                exponent  (? float-suffix))
+      (and (? float-integer) (ignore ".") float-decimal (? exponent) (? float-suffix))
+      (and    float-integer  (ignore ".")               (? exponent) (? float-suffix))))
+
 (define-peg-pattern number body
-  (or integer))
+  (or float integer))
 
 (define-peg-pattern group all
   (and (ignore "(") expr (ignore ")")))
@@ -65,11 +80,16 @@
 (define-peg-pattern char all
   (and (ignore "'") (or escaped-char peg-any) (ignore "'")))
 
+(define-peg-pattern quot none "\"")
+
+(define-peg-pattern string all
+  (and quot (* (or escaped-char (and (not-followed-by "\"") peg-any))) quot))
 
 (define-peg-pattern* operator all
   `(or ,@(map symbol->string symbol-binary-operators)
        ,@(map (lambda (op) `(and ,(symbol->string op) ws))
-              wordy-binary-operators)))
+              wordy-binary-operators)
+       "?" ":"))
 
 ;; whitespace
 (define-peg-pattern ws none
@@ -89,17 +109,23 @@
               base-10-digit))))
 
 (define-peg-pattern prefix-operator all
-  (or "!" "~" "*" "&" "++" "--" "+" "-"))
+  ;; It's important that ++ and -- are BEFORE + and -
+  ;; otherwise the first + is found, leaving the second +, which fails
+  ;; to lex since it's an invalid token
+  ;; TODO sizeof can be written as a prefix operator
+  ;; (without parenthesis) if the operand is an expression.
+  (or "*" "&" "++" "--"
+      "!" "~" "+" "-"))
+
 
 ;;; Note that stacked pre or postfix operators without parenthesis
 ;;; dosen't work. So `*&C' is invalid, while `*(&C)' is valid.
 
 (define-peg-pattern prefix all
-  (and prefix-operator sp (or variable group funcall #; postfix
-                              )))
+  (and prefix-operator sp (or variable group funcall literal)))
 
 (define-peg-pattern postfix-operator all
-  (or "++" "--"))
+  (or "++" "--" "*"))
 
 (define-peg-pattern postfix all
   ;; literals can't be in-place incremented and decremented
@@ -111,15 +137,25 @@
   ;; first case is "same" as expr, but in different order to prevent
   ;; infinite self reference. Pre and postfix not here, solved by having
   ;; them before infix in expr
-  (and (or funcall postfix prefix group char number variable)
+  (and (or funcall postfix prefix group literal variable)
        sp operator sp expr))
 
 (define-peg-pattern funcall all
   (and variable sp group))
 
+(define-peg-pattern literal body
+  (or char string number))
+
 ;;; main parser
 (define-peg-pattern expr body
-  (+ (and sp (or infix postfix prefix funcall group char number variable)
+  (+ (and sp (or
+              ;; float must be BEFORE infix, otherwise 3.2 is parsed as (infix 3 (operator ".") 2)
+              ;; that however breaks the infix logic, meaning that floating point numbers can't be
+              ;; used in basic arithmetic.
+              ;; TODO remove all implicit order of operations handling in the lexer, and move it to
+              ;; the parser. This should also fix the case of typecasts being applied incorrectly.
+              float
+              infix postfix prefix funcall group literal variable)
           sp)))
 
 
diff --git a/module/c/lex2.scm b/module/c/lex2.scm
new file mode 100644
index 00000000..af90dcce
--- /dev/null
+++ b/module/c/lex2.scm
@@ -0,0 +1,549 @@
+(define-module (c lex2)
+  :use-module (ice-9 peg)
+  :use-module (ice-9 match)
+  :use-module ((hnh util) :select (->))
+  :use-module (hnh util object)
+  :use-module (hnh util type)
+  :use-module ((srfi srfi-1) :select (fold))
+  :use-module (srfi srfi-88)
+  :use-module ((c trigraph) :select (replace-trigraphs))
+  :use-module ((c line-fold) :select (fold-lines))
+  :export (lex
+           lexeme lexeme?
+           placemaker
+           (type . lexeme-type)
+           (body . lexeme-body)
+           (noexpand . lexeme-noexpand)
+
+           parse-c-number
+
+           tokenize
+           ))
+
+;;; A.1 Lexical grammar
+;;; A.1.1 Lexical elements
+
+;; (6.4)
+(define-peg-pattern token all
+  (or keyword
+      identifier
+      constant
+      string-literal
+      punctuator
+      ))
+
+;; (6.4)
+(define-peg-pattern preprocessing-token all
+  ;; string literal moved before header-name since string literals
+  ;; otherwise became q-strings
+  (or string-literal
+      header-name
+      character-constant
+      identifier
+      pp-number
+      punctuator
+      ;; Each non-white-space character that cannot be one of the above
+      ))
+
+;;; A.1.2 Keywords
+
+;; (6.4.1)
+(define-peg-pattern keyword all
+  (or "auto" "break" "case" "char" "const" "continue" "default"
+      "do" "double" "else" "enum" "extern" "float" "for" "goto"
+      "if" "inline" "int" "long" "register" "restrict" "return"
+      "short" "signed" "sizeof" "static" "struct" "switch"
+      "typedef" "union" "unsigned" "void" "volatile" "while"
+      "_Alignas" "_Alignof" "_Atomic" "_Bool" "_Complex"
+      "_Generic" "_Imaginary" "_Noreturn" "_Static_assert"
+      "_Thread_local"))
+
+;;; A.1.3 Identifiers
+
+;; (6.4.2.1)
+(define-peg-pattern identifier all
+  (and identifier-nondigit (* (or identifier-nondigit digit))))
+
+;; (6.4.2.1)
+(define-peg-pattern identifier-nondigit body
+  (or nondigit
+      universal-character-name
+      ;; TODO other implementation-defined characters
+      ))
+
+;; (6.4.2.1)
+(define-peg-pattern nondigit body
+  (or "_"
+      (range #\A #\Z)
+      (range #\a #\z)))
+
+;; (6.4.2.1)
+(define-peg-pattern digit body
+  (range #\0 #\9))
+
+;;; A.1.4 Universal character names
+
+;; (6.4.3)
+(define-peg-pattern universal-character-name all
+  (or (and (ignore "\\u") hex-quad)
+      (and (ignore "\\U") hex-quad hex-quad)))
+
+;; (6.4.3)
+(define-peg-pattern hex-quad body
+  (and hexadecimal-digit hexadecimal-digit
+       hexadecimal-digit hexadecimal-digit))
+
+;;; A.1.5 Constants
+
+;; (6.4.4)
+(define-peg-pattern constant all
+  ;; Int and float swapped from standard since we need to try parsing
+  ;; the floats beforehand
+  (or floating-constant
+      integer-constant
+      enumeration-constant
+      character-constant))
+
+;; (6.4.4.1)
+(define-peg-pattern integer-constant all
+  (and (or decimal-constant
+           hexadecimal-constant
+           octal-constant)
+       (? integer-suffix)))
+
+;; (6.4.4.1)
+(define-peg-pattern decimal-constant all
+  (and nonzero-digit (* digit)))
+
+;; (6.4.4.1)
+(define-peg-pattern octal-constant all
+  (and "0" (* octal-digit)))
+
+;; (6.4.4.1)
+(define-peg-pattern hexadecimal-constant all
+  (and hexadecimal-prefix (+ hexadecimal-digit)))
+
+;; (6.4.4.1)
+(define-peg-pattern hexadecimal-prefix none
+  (or "0x" "0X"))
+
+;; (6.4.4.1)
+(define-peg-pattern nonzero-digit body
+  (range #\1 #\9))
+
+;; (6.4.4.1)
+(define-peg-pattern octal-digit body
+  (range #\0 #\7))
+
+;; (6.4.4.1)
+(define-peg-pattern hexadecimal-digit body
+  (or (range #\0 #\9)
+      (range #\a #\f)
+      (range #\A #\F)))
+
+;; (6.4.4.1)
+(define-peg-pattern integer-suffix all
+  (or (and unsigned-suffix (? long-suffix))
+      (and long-suffix (? unsigned-suffix))))
+
+;; (6.4.4.1)
+;; This is a merger of long-suffix and long-long-suffix
+(define-peg-pattern long-suffix body
+  (or "l" "L" "ll" "LL"))
+
+;; (6.4.4.1)
+(define-peg-pattern unsigned-suffix body
+  (or "u" "U"))
+
+;; (6.4.4.2)
+(define-peg-pattern floating-constant all
+  (or decimal-floating-constant
+      hexadecimal-floating-constant))
+
+;; (6.4.4.2)
+(define-peg-pattern decimal-floating-constant all
+  (or (and fractional-constant (? exponent-part) (? floating-suffix))
+      (and digit-sequence exponent-part (? floating-suffix))))
+
+;; (6.4.4.2)
+(define-peg-pattern hexadecimal-floating-constant all
+  (and hexadecimal-prefix
+       (or hexadecimal-fractional-constant
+           hexadecimal-digit-sequence)
+       binary-exponent-part
+       (? floating-suffix)))
+
+;; (6.4.4.2)
+(define-peg-pattern fractional-constant all
+  (or (and (? digit-sequence) "." digit-sequence)
+      (and digit-sequence ".")))
+
+;; (6.4.4.2)
+(define-peg-pattern exponent-part all
+  (and (or "e" "E") (? sign) digit-sequence))
+
+;; (6.4.4.2)
+(define-peg-pattern sign all
+  (or "+" "-"))
+
+;; (6.4.4.2)
+(define-peg-pattern digit-sequence body
+  (+ digit))
+
+;; (6.4.4.2)
+(define-peg-pattern hexadecimal-fractional-constant all
+  (or (and (? hexadecimal-digit-sequence) "." hexadecimal-digit-sequence)
+      (and    hexadecimal-digit-sequence  ".")))
+
+;; (6.4.4.2)
+(define-peg-pattern binary-exponent-part all
+  (and (ignore (or "p" "P"))
+       (? sign)
+       digit-sequence))
+
+;; (6.4.4.2)
+(define-peg-pattern hexadecimal-digit-sequence body
+  (+ hexadecimal-digit))
+
+;; (6.4.4.2)
+(define-peg-pattern floating-suffix all
+  (or "f" "l" "F" "L"))
+
+;; (6.4.4.3)
+(define-peg-pattern enumeration-constant all
+  identifier)
+
+(define-peg-pattern character-prefix all
+  (or "L" "u" "U"))
+
+;; (6.4.4.4)
+(define-peg-pattern character-constant all
+  (and (? character-prefix)
+       (ignore "'")
+       (+ c-char)
+       (ignore "'")))
+
+;; (6.4.4.4)
+(define-peg-pattern c-char body
+  (or (and (not-followed-by (or "'" "\\" "\n"))  peg-any)
+      escape-sequence))
+
+;; (6.4.4.4)
+(define-peg-pattern escape-sequence all
+  (or simple-escape-sequence
+      octal-escape-sequence
+      hexadecimal-escape-sequence
+      universal-character-name))
+
+;; (6.4.4.4)
+(define-peg-pattern simple-escape-sequence all
+  (and (ignore "\\") (or "'" "\"" "?" "\\"
+                         "a" "b" "f" "n" "r" "t" "v")))
+
+;; (6.4.4.4)
+(define-peg-pattern octal-escape-sequence all
+  (and (ignore "\\") octal-digit (? octal-digit) (? octal-digit)))
+
+;; (6.4.4.4)
+(define-peg-pattern hexadecimal-escape-sequence all
+  (and (ignore "\\x") (+ hexadecimal-digit)))
+
+;; A.1.6 String literals
+
+;; (6.4.5)
+(define-peg-pattern string-literal all
+  (and (? encoding-prefix)
+       (ignore "\"")
+       (* s-char)
+       (ignore "\"")))
+
+;; (6.4.5)
+(define-peg-pattern encoding-prefix all
+  (or "u8" "u" "U" "L"))
+
+;; (6.4.5)
+(define-peg-pattern s-char body
+  (or (and (not-followed-by (or "\"" "\\" "\n")) peg-any)
+      escape-sequence))
+
+;;; A.1.7
+
+;; (6.4.6)
+(define-peg-pattern punctuator all
+  (or "[" "]" "(" ")" "{" "}"
+      "..."                             ; Moved to be before "."
+      "." "->"
+      "&&" "||"
+      "!="
+      "++" "--"
+      "*=" "/=" "%=" "+=" "-=" "<<=" ">>=" "&=" "^=" "|="
+      "<=" ">=" "=="
+      "="
+      "/" "%" "<<" ">>" "<" ">"   "^" "|"
+      "?" ":" ";"
+      "&" "*" "+" "-" "~" "!"
+      "," "##" "#" ; # and ## flipped
+      "<:" ":>" "<%" "%>" "%:%:" "%:" ; %: and %:%: flipped
+      ))
+
+;;; A.1.8 Header names
+
+(define-peg-pattern h-string all (+ h-char))
+(define-peg-pattern q-string all (+ q-char))
+
+;; (6.4.7)
+(define-peg-pattern header-name all
+  (or (and (ignore "<")  h-string (ignore ">"))
+      ;; NOTE this case will never be reached, since it's treated as a regular
+      ;; string instead
+      (and (ignore "\"") q-string (ignore "\""))))
+
+;; (6.4.7)
+(define-peg-pattern h-char body
+  (or (and (not-followed-by (or ">" "\n")) peg-any)
+      escape-sequence))
+
+;; (6.4.7)
+(define-peg-pattern q-char body
+  (or (and (not-followed-by (or "\"" "\n")) peg-any)
+      escape-sequence))
+
+;;; A.1.9 Preprocessing numbers
+
+;; (6.4.8)
+(define-peg-pattern pp-number all
+  (and (? ".") digit
+       (* (or digit
+              identifier-nondigit
+              (and (or "e" "E" "p" "P")
+                   sign)
+              "."))))
+
+
+
+(define-peg-pattern whitespace all
+  (or "\t" "\n" "\v" "\f" " "
+      ;; "\r"
+      ))
+
+(define-peg-pattern block-comment body
+  (and (ignore "/*")
+       (* (and (not-followed-by "*/")
+               peg-any))
+       (ignore "*/")))
+
+(define-peg-pattern line-comment body
+  (and (ignore "//")
+       (* (and (not-followed-by "\n")
+               peg-any))))
+
+(define-peg-pattern comment all
+  (or line-comment block-comment))
+
+(define-peg-pattern non-whitespace all
+  (and (not-followed-by whitespace)
+       peg-any))
+
+(define-peg-pattern preprocessing-tokens all
+  (* (or whitespace
+         comment
+         preprocessing-token
+         non-whitespace)))
+
+
+
+;; comment could be merged with whitespace, but then unlex would have to know that
+
+;; other is the "each non-white-space character that cannot be one of the above"
+;; clause from 6.4 p. 1
+
+(define-type (lexeme)
+  (type type: (memv '(whitespace comment preprocessing-token other placemaker)))
+  (body type: (or string? list?))
+  (noexpand type: (list-of string?)
+            default: '()))
+
+(define (placemaker)
+  (lexeme type: 'placemaker body: '()))
+
+(define (lex-output->lexeme-object x)
+  (match x
+    (`(non-whitespace ,body)
+     (lexeme body: body type: 'other))
+    (`(whitespace ,body)
+     (lexeme body: body type: 'whitespace ))
+    (`(comment ,body)
+     (lexeme body: body type: 'comment ))
+    (`(preprocessing-token ,body)
+     (match body
+       ('string-literal
+        ;; Unflatten case
+        (lexeme body: '(string-literal (encoding-prefix) "")
+                type: 'preprocessing-token))
+       (('string-literal `(encoding-prefix ,px) args ...)
+        (lexeme body: `(string-literal (encoding-prefix . ,px) ,@args)
+                type: 'preprocessing-token))
+       (('string-literal args ...)
+        (lexeme body: `(string-literal (encoding-prefix) ,@args)
+                type: 'preprocessing-token))
+       (('character-constant `(character-prefix ,px) args ...)
+        (lexeme body: `(character-constant (character-prefix . ,px)
+                                           ,@args)
+                type: 'preprocessing-token))
+       (('character-constant args ...)
+        (lexeme body: `(character-constant (character-prefix) ,@args)
+                type: 'preprocessing-token))
+       (body (lexeme body: body type: 'preprocessing-token))))
+
+    ;; "unflatten"
+    ('comment (lexeme body: "" type: 'comment))))
+
+
+
+
+;; At a number of places I chose token depending on the order of the rule. The
+;; standard however says that the longest possible choice should be used.
+;; 6.4 p. 4
+
+;; returns a list of lexemes
+(define (lex string)
+  (if (string-null? string)
+      '()
+      (map lex-output->lexeme-object
+           (let ((result (match-pattern preprocessing-tokens string)))
+             (let ((trailing (substring (peg:string result)
+                                        (peg:end result))))
+               (unless (string-null? trailing)
+                 (scm-error 'cpp-lex-error "lex"
+                            "Failed to lex string, remaining trailing characters: ~s"
+                            (list trailing) #f)))
+             (unless (list? (peg:tree result))
+               (scm-error 'cpp-lex-error "lex"
+                          "Parsing just failed. Chars: ~s"
+                          (list (peg:string result)) #f))
+             (cdr (peg:tree result))))))
+
+
+
+
+
+;; (parse-decimals "555" 10)
+;; ⇒ 0.5549999999999999
+;; (parse-decimals "8" 16)
+;; ⇒ 0.5
+(define (parse-decimals str base)
+  (/ (fold (lambda (digit done)
+             (let ((v (string->number digit base)))
+               (+ v (/ done base))))
+           0.0
+           (map string (string->list str)))
+     base))
+
+;; parse a number on form <digits>.<digits>
+(define (parse-fractional str base)
+  (let* ((pair (string-split str #\.))
+         (integer (list-ref pair 0))
+         (decimals (list-ref pair 1)))
+    (+ (if (string-null? integer)
+           0 (string->number integer 16))
+       (if (string-null? decimals)
+           0 (parse-decimals decimals 16)))))
+
+
+(define (parse-float body)
+  (define (fractional-constant x)
+    (case x
+      ((decimal-floating-constant)                 'fractional-constant)
+      ((hexadecimal-floating-constant) 'hexadecimal-fractional-constant)))
+
+  (define (exponent-part x)
+    (case x
+      ((decimal-floating-constant)            'exponent-part)
+      ((hexadecimal-floating-constant) 'binary-exponent-part)))
+
+  (define (expt-base x)
+    (case x
+      ((decimal-floating-constant)     10)
+      ((hexadecimal-floating-constant)  2)))
+
+  (define (base x)
+    (case x
+      ((decimal-floating-constant)     10)
+      ((hexadecimal-floating-constant) 16)))
+
+  (let ((type (car body))
+        (body (cdr body)))
+    (* 1.0
+       (cond ((assoc-ref body (fractional-constant type))
+              => (lambda (fc) (parse-fractional (car fc) (base type))))
+             (else (string->number (car body) (base type))))
+       (cond ((assoc-ref body (exponent-part type))
+              => (lambda (x) (expt (expt-base type)
+                              (string->number (car x) (base type)))))
+             (else 1)))
+    ;; TODO do something with (possible) suffix
+    ;; (assoc-ref body 'floating-suffix)
+    ))
+
+(define (parse-integer body)
+  (let* (;; (suffix (assoc-ref body 'integer-suffix))
+         (value (cadr (car body)))
+         (value-type (car (car body))))
+    ;; TODO do something with suffix
+    (string->number
+     value
+     (case value-type
+       ((octal-constant)        8)
+       ((decimal-constant)     10)
+       ((hexadecimal-constant) 16)))))
+
+;; (parse-c-number "0x1.8p0")
+;; ⇒ 1.5
+
+;; TODO is "5ul" equivalent to "((unsigned long) 5)"
+(define (parse-c-number string)
+  (cond ((match-pattern constant string)
+         => (lambda (m)
+              (let ((m (cadr (peg:tree m))))       ; Strip 'constant wrapper
+                (case (car m)
+                  ((floating-constant)
+                   (parse-float (cadr m)))
+
+                  ((integer-constant)
+                   (parse-integer (cdr m)))
+
+                  ((enumeration-constant character-constant)
+                   (scm-error 'misc-error "parse-c-number"
+                              "Couldn't parse [~a] as a /number/ (~s)"
+                              (list string m) #f))))))
+
+        (else (scm-error 'misc-error "parse-c-number"
+                         "Couldn't parse [~a] as a number"
+                         (list string) #f))))
+
+
+
+
+;;; 5.1.11.2 Translation phases
+
+(define (tokenize string)
+  (-> string
+;;; 1. trigraph replacement
+      replace-trigraphs
+;;; 2. Line folding
+      fold-lines
+;;; 3. Decomposition into preprocenning tokens, whitespaces, and comments
+      lex
+      comments->whitespace))
+
+;; These really belong in (c cpp-types), but that would create a dependency cycle
+
+(define (comment->whitespace token)
+  (if ;; (comment-token? token)
+   (and (lexeme? token)
+        (eq? 'comment (type token)))
+      (car (lex " "))
+      token))
+
+(define (comments->whitespace tokens)
+  (map comment->whitespace tokens))
diff --git a/module/c/line-fold.scm b/module/c/line-fold.scm
new file mode 100644
index 00000000..c61c2c70
--- /dev/null
+++ b/module/c/line-fold.scm
@@ -0,0 +1,29 @@
+(define-module (c line-fold)
+  :use-module (srfi srfi-1)
+  :use-module (srfi srfi-71)
+  :export (fold-lines))
+
+(define (line-continued? line)
+  (and (not (string-null? line))
+       (char=? #\\ (string-ref line (1- (string-length line))))))
+
+(define (strip-backslash line)
+  (string-drop-right line 1))
+
+(define (fold-lines string)
+  (with-output-to-string
+    (lambda ()
+      (let loop ((lines (string-split string #\newline)))
+        (cond ((null? lines) 'NOOP)
+              ((null? (cdr lines))
+               ;; TODO error message if last character is a backslash
+               (display (car lines))
+               (newline))
+              (else
+               (let ((to-merge remaining (span line-continued? lines)))
+                 (for-each display (map strip-backslash to-merge))
+                 (display (car remaining))
+                 (newline)
+                 (for-each (lambda _ (newline))
+                           (iota (length to-merge)))
+                 (loop (cdr remaining)))))))))
diff --git a/module/c/operators.scm b/module/c/operators.scm
index ab1b3e7c..910dc8a9 100644
--- a/module/c/operators.scm
+++ b/module/c/operators.scm
@@ -9,8 +9,9 @@
   `(+ - * / & ,(symbol #\|) ^ << >> % < > =))
 
 ;; apparently part of C
+;; https://en.cppreference.com/w/cpp/language/operator_alternative
 (define wordy-binary-operators
-  '(bitand and_eq and bitor or_eq or xor_eq xor))
+  '(bitand and_eq and bitor or_eq or xor_eq xor not_eq))
 
 (define symbol-binary-operators
   (append (map (lambda (x) (symbol-append x '=)) simple-operators)
diff --git a/module/c/parse.scm b/module/c/parse.scm
index 8030da77..7d11ea17 100644
--- a/module/c/parse.scm
+++ b/module/c/parse.scm
@@ -1,11 +1,14 @@
 (define-module (c parse)
   :use-module (hnh util)
   :use-module (srfi srfi-1)
+  :use-module (srfi srfi-71)
   :use-module (ice-9 match)
+  :use-module ((rnrs io ports)
+               :select (string->bytevector make-transcoder utf-8-codec))
+  :use-module (rnrs bytevectors)
   :export (parse-lexeme-tree))
 
-;;; Rename this
-(define (perms set)
+(define (permutations set)
   (concatenate
    (map (lambda (key)
           (map (lambda (o) (cons key o))
@@ -21,23 +24,124 @@
   (define valid-sequences
     (delete 'dummy
             (lset-union eq? '(dummy)
-                        (map symbol-concat (perms '(() U L)))
-                        (map symbol-concat (perms '(() U LL))))))
+                        (map symbol-concat (permutations '(() U L)))
+                        (map symbol-concat (permutations '(() U LL))))))
 
   ;; => (LLU ULL LL LU UL L U)
 
   (aif (memv (string->symbol (string-upcase str))
           valid-sequences)
        (case (car it)
-         [(LLU ULL) '(unsigned long-long)]
+         [(LLU ULL) '(unsigned long long)]
          [(LU UL) '(unsigned long)]
-         [(LL) '(long-long)]
+         [(LL) '(long long)]
          [(L) '(long)]
          [(U) '(unsigned)])
        (scm-error 'c-parse-error "parse-integer-suffix"
                   "Invalid integer suffix ~s"
                   (list str) #f)))
 
+(define (parse-float-suffix str)
+  (case (string->symbol str)
+    ((f F) '(float))
+    ((l L) '(long double))))
+
+(define (group-body->type vars)
+  (concatenate
+   (map
+    (match-lambda (('variable var) (list (parse-lexeme-tree `(variable ,var))))
+                  (('postfix ('variable var)
+                             ('postfix-operator "*"))
+                   (list (parse-lexeme-tree `(variable ,var))
+                         '*))
+                  (else (scm-error 'c-parse-error "parse-lexeme-tree"
+                                   "Invalid token ~s in typecast form: ~s"
+                                   (list else vars) #f)))
+    vars)))
+
+;; Takes a list of strings and integers, and merges it all into a single
+;; bytevector representing a c string
+(define* (string-fragments->c-string
+          fragments optional: (transcoder (make-transcoder (utf-8-codec))))
+
+  (define fragments-fixed
+    (map (lambda (frag)
+           (if (string? frag)
+               (string->bytevector frag transcoder)
+               frag))
+         fragments))
+
+  (define bv-length
+    (fold (lambda (item sum) (+ sum (if (bytevector? item)
+                                   (bytevector-length item)
+                                   1)))
+          0 fragments-fixed))
+
+  (define bv (make-bytevector (1+ bv-length)))
+  ;; trailing null byte
+  (bytevector-u8-set! bv bv-length 0)
+  (fold (lambda (item idx)
+          (cond ((bytevector? item)
+                 (bytevector-copy! item 0
+                                   bv idx
+                                   (bytevector-length item))
+                 (+ idx (bytevector-length item)))
+                (else (bytevector-u8-set! bv idx item)
+                      (+ idx 1))))
+        0
+        fragments-fixed)
+  bv)
+
+(define (parse-float-form float-form)
+  (let ((float-string
+         (fold (lambda (arg str)
+                 (string-append
+                  str
+                  (match arg
+                    (('float-integer ('base-10 n)) n)
+                    (('float-decimal ('base-10 n)) (string-append "." n))
+                    (('exponent "+"  ('base-10 n)) (string-append "e"  n))
+                    (('exponent      ('base-10 n)) (string-append "e"  n))
+                    (('exponent "-"  ('base-10 n)) (string-append "e-" n)))))
+               "" float-form)))
+    ;; exact->inexact is a no-op if we already have an inexact number, but
+    ;; ensures we get an inexact number when we have an exact number (which we
+    ;; can get from the "1." case). Returning an inexact number here is important
+    ;; to avoid arithmetic suprises later.
+    (exact->inexact
+     (or (string->number float-string)
+         (scm-error 'c-parse-error "parse-lexeme-tree"
+                    "Couldn't parse expression as float: ~s"
+                    (list `(float ,@args)) #f)))))
+
+
+(define (resolve-escaped-char form)
+  (match form
+    (('base-8-char n)  (string->number n 8))
+    (('base-16-char n) (string->number n 16))
+    (c (char->integer
+        (case (string-ref c 0)
+          ((#\a) #\alarm)
+          ((#\b) #\backspace)
+          ((#\e) #\esc) ;; non-standard
+          ((#\f) #\page)
+          ((#\n) #\newline)
+          ((#\r) #\return)
+          ((#\t) #\tab)
+          ((#\v) #\vtab)
+          ((#\\) #\\)
+          ;; These are valid in both strings and chars
+          ((#\') #\')
+          ((#\") #\"))))))
+
+;; Takes a list of strings and escaped-char form
+;; and returns a list of strings and integers
+(define (resolve-string-fragment fragment)
+  (match fragment
+    (('escaped-char c)
+     (resolve-escaped-char c))
+    (fargment fragment)))
+
 (define (parse-lexeme-tree tree)
   (match tree
     ['() '()]
@@ -50,55 +154,67 @@
     [('integer n ('integer-suffix suffix))
      `(as-type
        ,(parse-integer-suffix suffix)
-       ,(parse-lexeme-tree n))
-     ]
+       ,(parse-lexeme-tree n))]
+
     [('integer n)
      (parse-lexeme-tree n)]
 
+
+    [('float args ... ('float-suffix suffix))
+     `(as-type ,(parse-float-suffix suffix)
+               ;; parse rest of float as if it lacked a suffix
+               ,(parse-lexeme-tree `(float ,@args)))]
+
+    [('float args ...) (parse-float-form args)]
+
     ;; Character literals, stored as raw integers
     ;; so mathematical operations keep working on them.
-    [('char ('escaped-char ('base-8-char n)))
-     (-> n (string->number 8) #; integer->char)]
-    [('char ('escaped-char ('base-16-char n)))
-     (-> n (string->number 16) #; integer->char)]
-    [('char ('escaped-char c))
-     (char->integer
-      (case (string-ref c 0)
-        ((#\a) #\alarm)
-        ((#\b) #\backspace)
-        ((#\e) #\esc)
-        ((#\f) #\page)
-        ((#\n) #\newline)
-        ((#\r) #\return)
-        ((#\t) #\tab)
-        ((#\v) #\vtab)
-        ((#\\) #\\)
-        ((#\') #\')))]
+    [('char ('escaped-char c)) (resolve-escaped-char c)]
+
     [('char c) (char->integer (string-ref c 0))]
 
     [('variable var) (string->symbol var)]
+
+    ;; normalize some binary operators to their wordy equivalent
+    ;; (which also happens to match better with scheme)
+    [('operator "&&") 'and]
+    [('operator "&=") 'and_eq]
+    [('operator "&")  'bitand]
+    [('operator "|")  'bitor]
+    [('operator "!=") 'not_eq]
+    [('operator "||") 'or]
+    [('operator "|=") 'or_eq]
+    [('operator "^")  'xor]
+    [('operator "^=") 'xor_eq]
+    ;; Change these names to something scheme can handle better
+    [('operator ".") 'object-slot]
+    [('operator "->") 'dereference-slot]
     [('operator op)  (string->symbol op)]
+
     [('prefix-operator op)
      (case (string->symbol op)
+       ((!) 'not)
+       ((~) 'compl)
        ((*) 'dereference)
        ((&) 'pointer)
        ((++) 'pre-increment)
        ((--) 'pre-decrement)
-       (else => identity))]
+       ((-) '-)
+       (else (scm-error 'c-parse-error "parse-lexeme-tree"
+                        "Unknown prefix operator ~s"
+                        (list op) #f)))]
     [('postfix-operator op)
      (case (string->symbol op)
        [(++) 'post-increment]
        [(--) 'post-decrement]
-       [else => identity])]
+       [else (scm-error 'c-parse-error "parse-lexeme-tree"
+                        "Unknown postfix operator ~s"
+                        (list op) #f)])]
 
     ;; Parenthesis grouping
-    [('group args)
+    [('group args ...)
      (parse-lexeme-tree args)]
 
-    ;; Atomic item. Used by flatten-infix
-    [('atom body)
-     (parse-lexeme-tree body)]
-
     [('prefix op arg)
      `(,(parse-lexeme-tree op)
        ,(parse-lexeme-tree arg))]
@@ -107,81 +223,204 @@
      `(,(parse-lexeme-tree op)
        ,(parse-lexeme-tree arg))]
 
+
+
+
+
+    ;; resolved-operator and ternary are the return "types"
+    ;; of resolve-order-of-operations
+    [(('resolved-operator op) args ...)
+     `(,op ,@(map parse-lexeme-tree args))]
+
+    [('ternary a b c)
+     `(ternary ,(parse-lexeme-tree a)
+               ,(parse-lexeme-tree b)
+               ,(parse-lexeme-tree c))]
+
+
+
+
+    ;; Is it OK for literal strings to be "stored" inline?
+    ;; Or must they be a pointer?
+    ['string #vu8(0)]
+    [('string str ...)
+     (-> (map resolve-string-fragment str)
+         string-fragments->c-string)]
+
+    ;; implicit concatenation of string literals
+    [(('string str ...) ...)
+     (-> (map resolve-string-fragment (concatenate str))
+         string-fragments->c-string)]
+
     [('infix args ...)
-     (resolve-order-of-operations
-      (flatten-infix (cons 'infix args)))]
+     (let ((r (resolve-order-of-operations
+               (flatten-infix (cons 'infix args)))))
+       (parse-lexeme-tree r))]
+
 
     [('funcall function ('group arguments))
      `(funcall ,(parse-lexeme-tree function)
                ,(parse-lexeme-tree arguments))]
 
-    [bare (scm-error 'c-parse-error
-                     "parse-lexeme-tree"
-                     "Naked literal in lex-tree: ~s"
-                     (list bare)
-                     #f)]))
+    [(('variable "struct") ('variable value) ..1)
+     `(struct-type ,@(map string->symbol value))
+     ]
+
+    ;; A list of variables. Most likely a type signature
+    ;; [(('variable value) ..1)
+    ;;  ]
+
+    ;; A typecast with only variables must (?) be a typecast?
+    [(('group groups ..1) ... value)
+     (fold-right (lambda (type done) `(as-type ,type ,done))
+                 (parse-lexeme-tree value)
+                 (map group-body->type groups))]
+
+    ;; Type name resolution?
+    ;; https://en.wikipedia.org/wiki/C_data_types
+    ;; base types with spaces:
+    ;; =======================
+    ;; [[un]signed] char
+    ;; [[un]signed] short [int]
+    ;; [[un]signed] int
+    ;; [un]signed [int]
+    ;; [[un]signed] long [int]
+    ;; [[un]signed] long long [int]
+    ;; float
+    ;; [long] double
+
+    ;; https://en.wikipedia.org/wiki/Type_qualifier
+    ;; qualifiers
+    ;; const
+    ;; volatile
+    ;; restrict
+    ;; _Atomic
+
+
+    ;; Storage specifiers
+    ;; auto
+    ;; register
+    ;; static
+    ;; extern
+
+    ;; struct <typename>
+    ;; enum <typename>
+    ;; union <typename>
+
+    ;; https://en.wikipedia.org/wiki/C_syntax
+    ;; int (*ptr_to_array)[100]
+
+
+    [(? symbol? bare)
+     (scm-error 'c-parse-error
+                "parse-lexeme-tree"
+                "Naked literal in lex-tree: ~s"
+                (list bare)
+                #f)]
+
+    [form
+     (scm-error 'c-parse-error
+                "parse-lexeme-tree"
+                "Unknown form in lex-tree: ~s"
+                (list form) #f)
+     ]))
 
 ;; https://en.wikipedia.org/wiki/Operators_in_C_and_C%2B%2B
+;; https://en.cppreference.com/w/c/language/operator_precedence
 
 (define order-of-operations
   (reverse
-   (concatenate
-    ;; This is only for binary operations
-    `((-> ,(symbol #\.))
-      (* / %)
-      (+ -)
-      (<< >>)
-      (< <= > >=)
-      (== !=)
-      (&)
-      (^)
-      (,(symbol #\|))
-      (&&)
-      (,(symbol #\| #\|))
-      (= += -= *= /= %= <<= >>= &= ^= ,(symbol #\| #\=))
-      (,(symbol #\,))
-      ))))
-
-(define (mark-other form)
-  (if (list? form) (cons '*other* form) form))
+   ;; This is only for binary operations
+   `((-> ,(symbol #\.))
+     ;; All unary procedures go here, incnluding typecasts, and sizeof
+     (* / %)
+     (+ -)
+     (<< >>)
+     (< <= > >=)
+     (== != not_eq)
+     (& bitand)
+     (^ xorg)
+     (,(symbol #\|) bitor)
+     (&& and)
+     (,(symbol #\| #\|) or)
+     (? :)
+     (= += -= *= /= %= <<= >>= &= ^= ,(symbol #\| #\=)
+        and_eq or_eq xor_eq)
+     (,(symbol #\,))
+     )))
+
+;; a.b->c.d (. (-> (. a b) c) d)
+;; 2 * 3 / 4 * 5 => (* (/ (* 2 3) 4) 5)
+;;         eller => (* 2 (/ 3 4) 5)
 
 (define* (resolve-order-of-operations
           tree optional: (order order-of-operations))
 
   (if (null? order)
-      (car tree)
+      (scm-error 'c-parse-error
+                 "resolve-order-of-operations"
+                 "Out of operations to resolve when resolving expression ~s"
+                 (list tree) #f)
       (match tree
-        [('*other* body ...) body]
-        [(form) (resolve-order-of-operations form order)]
-        [(forms ...)
-         (match (split-by forms (car order))
-           [(group) (resolve-order-of-operations group (cdr order))]
-           [groups
-            (cons (car order)
-                  (map (lambda (form) (resolve-order-of-operations
-                                  form order-of-operations))
-                       groups))])]
-        [a a])))
+        [('fixed-infix form) form]
+        [('fixed-infix forms ...)
+         (match (split-by-one-of forms (car order))
+           [(group)
+            (resolve-order-of-operations (cons 'fixed-infix group)
+                                         (cdr order))]
+           [(a ('? b ...) (': c ...))
+            `(ternary ,(resolve-order-of-operations (cons 'fixed-infix a) (cdr order))
+                      ,(resolve-order-of-operations (cons 'fixed-infix b) (cdr order))
+                      ,(resolve-order-of-operations (cons 'fixed-infix c) (cdr order)))]
+           [(first rest ...)
+            ;; TODO this is only valid for the associative operators (+, ...)
+            ;; but not some other (<, ...)
+            (if (apply eq? (map car rest))
+                (let ((op (caar rest)))
+                  `((resolved-operator ,op)
+                    ,@(map (lambda (x) (resolve-order-of-operations (cons 'fixed-infix x)
+                                                               (cdr order)))
+                           (cons first (map cdr rest)))))
+                (fold (lambda (item done)
+                        (let ((operator args (car+cdr item)))
+                          `((resolved-operator ,operator)
+                            ,done ,(resolve-order-of-operations
+                                    (cons 'fixed-infix args)
+                                    (cdr order)))))
+                      (resolve-order-of-operations (cons 'fixed-infix first)
+                                                   (cdr order))
+                      rest))])])))
+
+;; 1 * 2 / 3 * 4
+;; ⇒ ((1) (* 2) (/ 3) (* 4))
+;; (1)
+;; (* (1) 2)
+;; (/ (* (1) 2) 3)
+;; (* (/ (* (1) 2) 3) 4)
 
 ;; Flatens a tree of infix triples. Stops when it should.
 ;; (parenthesis, function calls, ...)
 (define (flatten-infix form)
-  (match form
-    [('infix left op ('infix right ...))
-     (cons* (parse-lexeme-tree left)
-            (parse-lexeme-tree op)
-            (flatten-infix (cons 'infix right)))]
-
-    [('infix left op right)
-     (list (mark-other (parse-lexeme-tree left))
-           (parse-lexeme-tree op)
-           (mark-other (parse-lexeme-tree right)))]
-
-    [other (scm-error 'c-parse-error
-                      "flatten-infix"
-                      "Not an infix tree ~a"
-                      (list other)
-                      #f)]))
+  (cons 'fixed-infix
+   (let loop ((form form))
+     (match form
+       [('infix left op ('infix right ...))
+        (cons* left
+               (parse-lexeme-tree op)
+               (loop (cons 'infix right)))]
+
+       [('infix left op right)
+        (list left
+              (parse-lexeme-tree op)
+              right)]
+
+       [('infix form) form]
+
+       [other (scm-error 'c-parse-error
+                         "flatten-infix"
+                         "Not an infix tree ~a"
+                         (list other)
+                         #f)]))))
 
 
 
diff --git a/module/c/parse2.scm b/module/c/parse2.scm
new file mode 100644
index 00000000..fad2ffd8
--- /dev/null
+++ b/module/c/parse2.scm
@@ -0,0 +1,554 @@
+(define-module (c parse2)
+  :use-module (hnh util)
+  :use-module (system base lalr))
+
+(define (make-parser)
+  (lalr-parser
+   (#{out-table:}# "/tmp/c-parser.txt")
+   (#{output:}# c-parser "/tmp/c-parser.scm")
+   ;; (#{driver:}# glr)
+
+
+   (
+    ;; keywords
+    auto break case char const continue default do double else enum
+         extern float for goto if inline int long register restrict
+         return short signed sizeof static struct switch typedef union
+         unsigned void volatile while _Alignas _Alignof _Atomic _Bool
+         _Complex _Generic _Imaginary _Noreturn _Static_assert
+         _Thread_local
+
+         ;; punctuators
+         ;; 6.4.6
+         lbrack rbrack ; []
+         lparen rparen ; ()
+         lbrace rbrace ; {}
+         dot ; .
+         ->
+         ++ -- & * + - ~ !
+         / % << >> < > <= >= == != ^
+         pipe pipe2 ; | and || scheme handles these fine, but not emacs
+         &&
+         ? :
+         semicolon
+         ...
+         = *= /= %= += -= <<= >>= &= ^=
+         pipe= ; |=
+         comma ; ,
+         hash  ; #
+         hash2 ; ##
+         ;; TODO digraphs
+
+         ;; 6.4
+         ;; keyword - already translated
+         identifier
+         constant
+         string-literal
+         ;; punctuator - already translated
+         )
+
+   ;; Primitives
+
+   ;; (identifier) : $1
+   ;; (constant) : $1
+   ;; (string-literal) : $1
+
+
+   ;; compounds
+
+   (primary-expression
+
+    ;; 6.5.1
+    (identifier)
+    (constant)
+    (string-literal)
+    (lparen expression rparen)
+    (generic-selection))
+
+   (enumeration-constant
+    (identifier))
+
+
+   ;; 6.5.1.1
+   (generic-selection
+    (_Generic lparen assignment-expression comma generic-assoc-list))
+
+   (generic-assoc-list
+    (generic-association)
+    (generic-assoc-list comma generic-association))
+
+   (generic-association
+    (type-name : assignment-expression)
+    (default   : assignment-expression))
+
+   ;; 6.5.2
+   (postfix-expression
+    (primary-expression)
+    (postfix-expression lbrack expression rbrack)
+    (postfix-expression lparen rparen)
+    (postfix-expression lparen argument-expression-list rparen)
+    (postfix-expression dot identifier)
+    (postfix-expression -> identifier)
+    (postfix-expression ++)
+    (postfix-expression --)
+    (lparen type-name rparen lbrace initializer-list rbrace)
+    (lparen type-name rparen lbrace initializer-list comma rbrace))
+
+   (argument-expression-list
+    (assignment-expression)
+    (argument-expression-list comma assignment-expression))
+
+   ;; 6.5.3
+   (unary-expression
+    (postfix-expression)
+    (++ unary-expression)
+    (-- unary-expression)
+    (unary-operator cast-expression)
+    (sizeof unary-expression)
+    (sizeof lparen type-name rparen)
+    (_Alignof lparen type-name rparen))
+
+   (unary-operator
+    (&)
+    (*)
+    (+)
+    (-)
+    (~)
+    (!))
+
+   ;; 6.5.4
+   (cast-expression
+    (unary-expression)
+    (lparen type-name rparen cast-expression))
+
+   ;; 6.5.5
+   (multiplicative-expression
+    (cast-expression)
+    (multiplicative-expression * cast-expression)
+    (multiplicative-expression / cast-expression)
+    (multiplicative-expression % cast-expression))
+
+   ;; 6.5.6
+   (additive-expression
+    (multiplicative-expression)
+    (additive-expression + multiplicative-expression)
+    (additive-expression - multiplicative-expression))
+
+
+   ;; 6.5.7
+   (shift-expression
+    (additive-expression)
+    (shift-expression << additive-expression)
+    (shift-expression >> additive-expression))
+
+   ;; 6.5.8
+   (relational-expression
+    (shift-expression)
+    (relational-expression < shift-expression)
+    (relational-expression > shift-expression)
+    (relational-expression <= shift-expression)
+    (relational-expression >= shift-expression))
+
+   ;; 6.5.9
+   (equality-expression
+    (relational-expression)
+    (equality-expression == relational-expression)
+    (equality-expression != relational-expression))
+
+
+   ;; 6.5.10
+   (AND-expression
+    (equality-expression)
+    (AND-expression & equality-expression))
+
+   ;; 6.5.11
+   (exclusive-OR-expression
+    (AND-expression)
+    (exclusive-OR-expression ^ AND-expression))
+
+   ;; 6.5.12
+   (inclusive-OR-expression
+    (exclusive-OR-expression)
+    (inclusive-OR-expression pipe exclusive-OR-expression))
+
+   ;; 6.5.13
+   (logical-AND-expression
+    (inclusive-OR-expression)
+    (logical-AND-expression && inclusive-OR-expression))
+
+   ;; 6.5.14
+   (logical-OR-expression
+    (logical-AND-expression)
+    (logical-OR-expression pipe2 logical-AND-expression))
+
+   ;; 6.5.15
+   (conditional-expression
+    (logical-OR-expression)
+    (logical-OR-expression ? expression : conditional-expression))
+
+   ;; 6.5.16
+   (assignment-expression
+    (conditional-expression)
+    (unary-expression assignment-operator assignment-expression))
+
+   (assignment-operator
+    (=)
+    (*=)
+    (/=)
+    (%=)
+    (+=)
+    (-=)
+    (<<=)
+    (>>=)
+    (&=)
+    (^=)
+    (pipe=))
+
+   ;; 6.5.17
+   (expression
+    (assignment-expression)
+    (expression comma assignment-expression))
+
+   ;; 6.6 constant expression
+   (constant-expression
+    (expression))
+
+   ;; 6.7
+   (declaration
+    (declaration-specifiers semicolon)
+    (declaration-specifiers init-declarator-list semicolon)
+    (static_assert-declaration))
+
+   (declaration-specifiers
+    (storage-class-specifier)
+    (storage-class-specifier declaration-specifiers)
+
+    (type-specifier)
+    (type-specifier declaration-specifiers)
+
+    (type-qualifier)
+    (type-qualifier declaration-specifiers)
+
+    (function-specifier)
+    (function-specifier declaration-specifiers)
+
+    (alignment-specifier)
+    (alignment-specifier declaration-specifiers))
+
+   (init-declarator-list
+    (init-declarator)
+    (init-declarator-list comma init-declarator))
+
+   (init-declarator
+    (declarator)
+    (declarator = initializer))
+
+
+   ;; 6.7.1
+   (storage-class-specifier
+    (typedef)
+    (extern)
+    (static)
+    (_Thread_local)
+    (auto)
+    (register))
+
+
+   ;; 6.7.2
+   (type-specifier
+    (void)
+    (char)
+    (short)
+    (int)
+    (long)
+    (float)
+    (double)
+    (signed)
+    (unsigned)
+    (_Bool)
+    (_Complex)
+    (atomic-type-specifier)
+    (struct-or-union-specifier)
+    (enum-specifier)
+    (typedef-name))
+
+   ;; 6.7.2.1
+   (struct-or-union-specifier
+    (struct-or-union lbrace struct-declaration-list rbrace)
+    (struct-or-union identifier lbrace struct-declaration-list rbrace)
+    (struct-or-union identifier))
+
+   (struct-or-union
+    (struct)
+    (union))
+
+   (struct-declaration-list
+    (struct-declaration)
+    (struct-declaration-list struct-declaration))
+
+   (struct-declaration
+    (specifier-qualifier-list semicolon)
+    (specifier-qualifier-list struct-declarator-list semicolon)
+    (static_assert-declaration))
+
+   (specifier-qualifier-list
+    (type-specifier)
+    (type-specifier specifier-qualifier-list)
+
+    (type-qualifier)
+    (type-qualifier specifier-qualifier-list))
+
+   (struct-declarator-list
+    (struct-declarator)
+    (struct-declarator-list comma struct-declarator))
+
+   (struct-declarator
+    (declarator)
+    (: constant-expression)
+    (declarator : constant-expression))
+
+   ;; 6.7.2.2
+   (enum-specifier
+    (enum identifier lbrace enumerator-list rbrace)
+    (enum lbrace enumerator-list rbrace)
+
+    (enum identifier lbrace enumerator-list comma rbrace)
+    (enum lbrace enumerator-list comma rbrace)
+
+    (enum identifier))
+
+   (enumerator-list
+    (enumerator)
+    (enumerator-list comma enumerator))
+
+   (enumerator
+    (enumeration-constant)
+    (enumeration-constant = constant-expression))
+
+   ;; 6.7.2.4
+   (atomic-type-specifier
+    (_Atomic lparen type-name rparen))
+
+   ;; 6.7.3
+   (type-qualifier
+    (const)
+    (restrict)
+    (volatile)
+    (_Atomic))
+
+   ;; 6.7.4
+   (function-specifier
+    (inline)
+    (_Noreturn))
+
+   ;; 6.7.5
+   (alignment-specifier
+    (_Alignas lparen type-name rparen)
+    (_Alignas lparen constant-expression rparen))
+
+   ;; 6.7.6
+
+   (declarator
+    (pointer direct-declarator)
+    (direct-declarator))
+
+   (direct-declarator
+    (identifier)
+    ( lparen declarator rparen )
+
+    (direct-declarator lbrack type-qualifier-list assignment-expression rbrack )
+    (direct-declarator lbrack                     assignment-expression rbrack )
+    (direct-declarator lbrack type-qualifier-list                       rbrack )
+    (direct-declarator lbrack                                           rbrack )
+
+    (direct-declarator lbrack static type-qualifier-list assignment-expression rbrack)
+    (direct-declarator lbrack static                     assignment-expression rbrack)
+
+    (direct-declarator lbrack type-qualifier-list static assignment-expression rbrack)
+
+    (direct-declarator lbrack type-qualifier-list * rbrack)
+    (direct-declarator lbrack                     * rbrack)
+
+    (direct-declarator lparen parameter-type-list rparen )
+    (direct-declarator lparen identifier-list rparen )
+    (direct-declarator lparen rparen ))
+
+   (pointer
+    (* type-qualifier-list)
+    (*)
+    (* type-qualifier-list pointer)
+    (* pointer))
+
+   (type-qualifier-list
+    (type-qualifier)
+    (type-qualifier-list type-qualifier))
+
+   (parameter-type-list
+    (parameter-list)
+    (parameter-list comma ...))
+
+   (parameter-list
+    (parameter-declaration)
+    (parameter-list comma parameter-declaration))
+
+   (parameter-declaration
+    (declaration-specifiers declarator)
+    (declaration-specifiers abstract-declarator)
+    (declaration-specifiers))
+
+   (identifier-list
+    (identifier)
+    (identifier-list comma identifier))
+
+   ;; 6.7.7
+   (type-name
+    (specifier-qualifier-list)
+    (specifier-qualifier-list abstract-declarator))
+
+   (abstract-declarator
+    (pointer)
+    (pointer direct-abstract-declarator)
+    (        direct-abstract-declarator))
+
+   (direct-abstract-declarator
+    ( lparen abstract-declarator rparen )
+    (direct-abstract-declarator lbrack type-qualifier-list assignment-expression rbrack )
+    (direct-abstract-declarator lbrack type-qualifier-list                       rbrack )
+    (direct-abstract-declarator lbrack                     assignment-expression rbrack )
+    (                           lbrack                                           rbrack )
+    (                           lbrack type-qualifier-list assignment-expression rbrack )
+    (                           lbrack type-qualifier-list                       rbrack )
+    (direct-abstract-declarator lbrack * rbrack)
+    (                           lbrack * rbrack)
+    (direct-abstract-declarator lparen parameter-type-list rparen )
+    (direct-abstract-declarator lparen                     rparen )
+    (                           lparen parameter-type-list rparen )
+    (                           lparen                     rparen ))
+
+   ;; 6.7.8
+   (typedef-name
+    (identifier))
+
+   ;; 6.7.9
+   (initializer
+    (assignment-expression)
+    (lbrace initializer-list rbrace)
+    (lbrace initializer-list comma rbrace))
+
+   (initializer-list
+    (designation initializer)
+    (initializer)
+    (initializer-list comma designation initializer)
+    (initializer-list comma             initializer))
+
+   (designation
+    (designator-list =))
+
+   (designator-list
+    (designator)
+    (designator-list designator))
+
+   (designator
+    (lbrack constant-expression rbrack)
+    (dot identifier))
+
+   ;; 6.7.10
+   (static_assert-declaration
+    (_Static_assert lparen constant-expression comma string-literal rparen semicolon))
+
+   ;; 6.8
+   (statement
+    (labeled-statement)
+    (compound-statement)
+    (expression-statement)
+    (selection-statement)
+    (iteration-statement)
+    (jump-statement))
+
+   ;; 6.8.1
+   (labeled-statement
+    (identifier : statement)
+    (case constant-expression : statement)
+    (default : statement))
+
+   ;; 6.8.2
+   (compound-statement
+    (lbrace block-item-list rbrace)
+    (lbrace rbrace))
+
+   (block-item-list
+    (block-item)
+    (block-item-list block-item))
+
+   (block-item
+    (declaration)
+    (statement))
+
+   ;; 6.8.3
+   (expression-statement
+    (expression semicolon)
+    (semicolon))
+
+   (selection-statement
+    (if lparen expression rparen statement)
+    (if lparen expression rparen statement else statement)
+    (switch lparen expression rparen statement))
+
+   ;; 6.8.5
+   (iteration-statement
+    (while lparen expression rparen statement)
+    (do statement while lparen expression rparen semicolon)
+    (for lparen expression semicolon expression semicolon expression rparen statement)
+    (for lparen expression semicolon expression semicolon            rparen statement)
+    (for lparen expression semicolon            semicolon expression rparen statement)
+    (for lparen            semicolon            semicolon            rparen statement)
+    (for lparen            semicolon expression semicolon expression rparen statement)
+    (for lparen            semicolon expression semicolon            rparen statement)
+    (for lparen declaration expression  semicolon expression rparen statement)
+    (for lparen declaration expression  semicolon            rparen statement)
+    (for lparen declaration             semicolon expression rparen statement)
+    (for lparen declaration             semicolon            rparen statement))
+
+   ;; 6.8.6
+   (jump-statement
+    (goto identifier semicolon)
+    (continue semicolon)
+    (break semicolon)
+    (return expression semicolon)
+    (return semicolon))
+
+
+   ;; 6.9
+   (translation-unit
+    (external-declaration)
+    (translation-unit external-declaration))
+
+   (external-declaration
+    (function-definition)
+    (declaration))
+
+   ;; 6.9.1
+   (function-definition
+    (declaration-specifiers declarator declaration-list compound-statement)
+    (declaration-specifiers declarator                  compound-statement))
+
+   (declaration-list
+    (declaration)
+    (declaration-list declaration))))
+
+
+(define (build-lexical-analyzer tokens)
+  (let ((tokens tokens))
+    (lambda ()
+      (if (null? tokens)
+          '*eoi*
+          (begin1 (car tokens)
+                  (set! tokens (cdr tokens)))))))
+
+
+;; (build-lexical-analyzer (list (cons 'string "hello")))
+
+(define (error-procedure a b)
+  (throw 'parse-error a b))
+
+;; (parser lexical-analyzer error-procedure)
diff --git a/module/c/preprocessor.scm b/module/c/preprocessor.scm
new file mode 100644
index 00000000..71712b17
--- /dev/null
+++ b/module/c/preprocessor.scm
@@ -0,0 +1,370 @@
+(define-module (c preprocessor)
+  :use-module (srfi srfi-1)
+  :use-module (srfi srfi-9 gnu)
+  :use-module (ice-9 rdelim)
+  :use-module (ice-9 regex)
+  :use-module (hnh util object)
+
+  :use-module (hnh util)
+  :use-module (hnh util object)
+  )
+
+(define (read-lines port)
+  (let loop ((done '()))
+    (let ((line (read-line port)))
+      (if (eof-object? line)
+          (reverse done)
+          (loop (cons line done))))))
+
+;; The source line of a give readen line
+(define line-number (make-object-property))
+;; The source file of a given readen line
+(define line-file (make-object-property))
+
+
+(define (mark-with-property! items property property-value)
+  (for-each (lambda (item) (set! (property item) property-value))
+            items))
+
+(define trigraph-rx (make-regexp "??([=()/'<>!-])"))
+(define (expand-trigraphs line)
+  (regexp-substitute/global
+   #f trigraph-rx
+   line
+   'pre (lambda (m) (case (string-ref (match:substring m 1) 1)
+                 ((#\=) "#")
+                 ((#\() "[")
+                 ((#\)) "]")
+                 ((#\/) "\\")
+                 ((#\') "^")
+                 ((#\<) "{")
+                 ((#\>) "}")
+                 ((#\!) "|")
+                 ((#\-) "~")))
+   'post))
+
+(define (number-lines lines)
+  (for-each (lambda (line number)
+              (set! (line-number line) number))
+            lines
+            (iota (length lines) 1))
+  lines)
+
+;; Should this line be merged with the next
+(define (line-continued? line)
+  (case (string-length line)
+    ((0) #f)
+    ((1) (string=? "\\" line))
+    (else
+     (let ((len (string-length line)))
+       ;; TODO can extra backslashes change this?
+       (and (char=? #\\ (string-ref line (- len 1)))
+            (not (char=? #\\ (string-ref line (- len 2)))))))))
+
+(define (transfer-line-number to from)
+  (set! (line-number to) (line-number from))
+  to)
+
+;; Merge two lines, assuming that upper ends with a backslash
+(define (merge-lines upper lower)
+  (let ((new-string (string-append (string-drop-right upper 1) lower)))
+    (transfer-line-number new-string upper)
+    new-string))
+
+(define (fold-lines lines)
+  (fold-right (lambda (line done)
+                (if (line-continued? line)
+                    (cons (merge-lines line (car done)) (cdr done))
+                    (cons line done)))
+              '()
+              lines))
+
+
+(define comment-rx (make-regexp "(//|/[*]|[*]/)"))
+
+(define (strip-comments lines)
+  (let loop ((in-comment #f) (lines lines) (done '()))
+    (if (null? lines)
+        (reverse done)
+        (let ((line (car lines)))
+          (cond ((regexp-exec comment-rx line)
+                 => (lambda (m)
+                      (format (current-output-port) "~s ~s substr = ~s~%" in-comment (line-number line) (match:substring m))
+                      (cond ((and in-comment (string=? "*/" (match:substring m)))
+                             (loop #f (cons (transfer-line-number (match:suffix m) line)
+                                            (cdr lines))
+                                   done))
+                            (in-comment (loop #t (cdr lines) done))
+                            ((string=? "//" (match:substring m))
+                             (loop #f (cdr lines) (cons (transfer-line-number (match:prefix m) line)
+                                                        done)))
+                            ((string=? "/*" (match:substring m))
+                             (loop #t (cons (transfer-line-number (match:suffix m) line) (cdr lines)) done))
+                            (else
+                             (scm-error 'cpp-error "strip-comments"
+                                        "Unexpected */ in file ~a on line ~a"
+                                        (list (line-file line) (line-number line))
+                                        #f)))))
+                (else (loop in-comment (cdr lines) (cons line done))))))))
+
+
+(define-immutable-record-type <preprocessor-directive>
+  (make-preprocessor-directive type body)
+  proprocessor-directive?
+  (type directive-type)
+  (body directive-body))
+
+(define cpp-directive-rx (make-regexp "\\s*#\\s*((\\w+)(.*))?"))
+(define (preprocessor-directive? line)
+  (cond ((regexp-exec cpp-directive-rx line)
+         => (lambda (m)
+              (if (match:substring m 2)
+                  (make-preprocessor-directive
+                   (string->symbol (match:substring m 2))
+                   (string-trim-both (match:substring m 3) char-set:whitespace))
+                  'sort-of)))
+        (else #f)))
+
+;; defined
+
+;; TODO _Pragma
+
+
+(define (expand-function-line-macro environment macro . params)
+  (expand-macro environment (apply-macro macro (map (lambda (param) (expand-macro environment param)) params))))
+
+;; (define (environment-ref ))
+
+(define (list-of? lst predicate)
+  (every predicate lst))
+
+
+;; Parantheses when defining macro
+(define (parse-parameter-string string)
+  (map string-trim-both
+       (string-split (string-trim-both string (char-set #\( #\)))
+                     #\,)))
+
+
+(define-type (object-macro)
+  (body type: string?))
+
+(define-type (function-macro)
+  (formals type: (list-of? string?))
+  (body type: string?))
+
+
+;; The interesting part
+;; environment, (list string) -> (values (list string) (list strings))
+;; multiple lines since since a function-like macro can extend over multiple lines
+;; (define (expand-macros environment strings)
+;;   )
+
+
+(define (crash-if-not-if body guilty)
+  (scm-error 'cpp-error guilty
+             "else, elif, and endif invalid outside if scope: ~s~%file: ~s line: ~s"
+             (list body (line-file body) (line-number body))))
+
+;; (environment, lines) -> environment x lines
+(define (parse-directives environment lines)
+  (let loop ((environment environment) (lines lines) (done '()))
+    (let* ((line (car line))
+           (directive? (preprocessor-directive? line)))
+      (case directive?
+        ((#f)                ; regular line
+         (loop environment (cdr lines)
+               ;; TODO this doesn't work, since parse-macros works on multiple lines
+               (cons (parse-macros environment (car lines)) done)))
+        ((sort-of)           ; a no-op directive
+         (loop environment (cdr lines) done))
+        (else                ; an actual directive
+         (case (car (cpp-if-status environment))
+           ((outside)
+            (case (directive-type m)
+              ((ifndef endif else)
+               (scm-error 'cpp-error "parse-directives"
+                          "Unexpected directive: ~s"
+                          (list line) #f))
+              (else          ; inside if, ifelse or else
+               ;; outside active-if inactive-if
+               (case (directive-type m)
+                 ;; stack ending directives
+                 ((endif)
+                  (case (car (cpp-if-status environment))
+                    ((outside) (crash-if-not-if (directive-body m) "endif"))
+                    (else (loop (modify environment cpp-if-status cdr)
+                                (cdr lines)
+                                done))))
+
+                 ;; stack nudging directives
+                 ((else)
+                  (case (car (cpp-if-status environment))
+                    ((outside) (crash-if-not-if (directive-body m) "else"))
+                    (else (loop (modify environment (lens-compose cpp-if-status car*)
+                                        (lambda (old)
+                                          (case old
+                                            ((active-if)   'inactive-if)
+                                            ((inactive-if) 'active-if))))
+                                (cdr lines)
+                                done))))
+                 ((elif)
+                  (case (car (cpp-if-status environment))
+                    ((outside) (crash-if-not-if (directive-body m) "elif"))
+                    (else 'TODO ;; TODO
+                     )
+                    ))
+
+                 ;; stack creating directives
+                 ;; on inactive-if each creates a new frame, which also is inactive
+                 ((ifndef)
+                  (case (car (cpp-if-status environment))
+                    ((inactive-if) (loop (modify environment cpp-if-status
+                                                 xcons 'inactive-if)
+                                         (cdr lines)
+                                         done))
+                    (else (loop (modify environment cpp-if-status
+                                        xcons (if (in-environment? environment (directive-body directive?))
+                                                  'inactive-if 'active-if))
+                                (cdr lines)
+                                done))))
+
+                 ((ifdef)
+                  (case (car (cpp-if-status environment))
+                    ((inactive-if) (loop (modify environment cpp-if-status
+                                                 xcons 'inactive-if)
+                                         (cdr lines)
+                                         done))
+                    (else
+                     (loop (modify environment cpp-if-status
+                                   xcons (if (in-environment? environment (directive-body directive?))
+                                             'active-if 'inactive-if))
+                           (cdr lines)
+                           done))))
+
+                 ((if)
+                  (case (car (cpp-if-status environment))
+                    ((inactive-if) (loop (modify environment cpp-if-status
+                                                 xcons 'inactive-if)
+                                         (cdr lines)
+                                         done))
+                    (else 'TODO ;; TODO
+                     )))
+
+
+                 ;; other directives
+                 ((include) (cond ((string-match "[<\"](.*)"
+                                                 => (lambda (m)
+                                                      (let ((fileneme (string-drop-right (directive-body m) 1)))
+                                                        (case (string-ref (match:substring m 1) 0)
+                                                          ;; TODO include-path
+                                                          ((#\<) (handle-file environment filename))
+                                                          ((#\") (handle-file environment filename))))))
+                                   (else (scm-error 'cpp-error "parse-directives"
+                                                    "Invalid include"
+                                                    '() #f)))))
+                  ((define)
+                   ;; TODO what are valid names?
+                   (cond ((string-match "^(\\w+)([(][^)]*[)])?\\s+(.*)" (directive-body directive?))
+                          => (lambda (m)
+                               (loop (let ((macro-body (string-trim-both (match:substring m 3))))
+                                       (add-identifier!
+                                        environment
+                                        (match:substring m 1)
+                                        (cond ((match:substring m 2)
+                                               => (lambda (parameter-string)
+                                                    (function-macro
+                                                     formals: (parse-parameter-string parameter-string)
+                                                     body: macro-body)))
+                                              (else (object-macro body: macro-body)))))
+                                     (cdr lines)
+                                     done)))
+                         (else (scm-error 'cpp-error "parse-directives"
+                                          "Invalid #define line, ~s"
+                                          (list (directive-body directive?))
+                                          #f))))
+
+                  ((undef)
+                   (case (car (cpp-if-status environment))
+                     ((inactive-if) (loop environment (cdr lines) done))
+                     (else (loop (remove-identifier environment (directive-body directive?))
+                                 (cdr lines)
+                                 done))))
+
+                  ((line)
+                   (case (car (cpp-if-status environment))
+                     ((inactive-if) (loop environment (cdr lines) done))
+                     ;; TODO add first-run parameter to loop, in case expand-macros still return something invalid
+                     (else (let parse-line-directive ((tokens (string-tokenize (directive-body directive?))))
+                             (cond ((= 1 (length tokens))
+                                    ;; TODO parse token
+                                    (if (integer? (car tokens))
+                                        ;; TODO update current line
+                                        (loop environment (cdr lines) done)
+                                        (parse-line-directive (expand-macros environment (directive-body directive?)))))
+                                   ((= 2 (length tokens))
+                                    ;; TODO parse tokens
+                                    (if (and (integer? (car tokens))
+                                             (string-literal? (cadr tokens)))
+                                        ;; TODO update current line and file
+                                        (loop environment (cdr lines) done)
+                                        (parse-line-directive (expand-macros environment (directive-body directive?)))))
+                                   (else (parse-line-directive (expand-macros environment (directive-body directive?)))))))))
+
+                  ((error)
+                   (throw 'cpp-error-directive
+                          (directive-body directive?)))
+
+                  ((warning)
+                   (format (current-error-port) "#warning ~a~%"
+                           (directive-body directive?))
+                   (loop environment (cdr lines) done))
+
+                  ((pragma)
+                   (format (current-error-port)
+                           "PRAGMA: ~s~%" (directive-body directive?))
+                   (loop environment (cdr lines) done))
+
+                  ((ident sccs)
+                   (format (current-error-port)
+                           "IDENT: ~s~%" (directive-body directive?))
+                   (loop environment (cdr lines) done))
+
+                  (else
+                   (scm-error 'cpp-error "parse-directives"
+                              "Unknown pre-processor directive: ~s"
+                              (list line) #f)
+                   )))))))))
+      ))
+
+
+(define* (writeln expr optional: (port (current-output-port)))
+  (write expr port)
+  (newline port))
+
+(define (handle-lines environment lines)
+  (parse-directive environment
+                   (compose strip-comments fold-lines number-lines)))
+
+  ;; parse-directives environment
+
+;; Return a number of lines
+(define (read-file file-path)
+  (define raw-lines (call-with-input-file file-path read-lines))
+  (mark-with-property! line line-file file-path)
+  (handle-lines raw-lines))
+
+
+;; pre defined macros
+;; see info manual for cpp 3.7.1 Standard Predefined Macros
+;; __FILE__
+;; __LINE__
+;; __DATE__ "Feb 12 1996"
+;; __TIME__ "23:59:01"
+;; __STDC__ 1
+;; __STDC_VERSION__ 201112L
+;; __STDC_HOSTED__ 1
+
+;; __cplusplus
+;; __OBJC__
+;; __ASSEMBLER__
diff --git a/module/c/preprocessor2.scm b/module/c/preprocessor2.scm
new file mode 100644
index 00000000..3f9552c5
--- /dev/null
+++ b/module/c/preprocessor2.scm
@@ -0,0 +1,752 @@
+(define-module (c preprocessor2)
+  :use-module (srfi srfi-1)
+  :use-module (srfi srfi-71)
+  :use-module (srfi srfi-88)
+
+  :use-module (c cpp-environment)
+  :use-module (c eval2)
+  :use-module ((c cpp-environment function-like-macro)
+               :select (function-like-macro variadic? identifier-list))
+  :use-module ((c cpp-environment object-like-macro) :select (object-like-macro object-like-macro?))
+  :use-module ((c cpp-environment internal-macro) :select (internal-macro))
+  :use-module ((hnh util) :select (-> ->> intersperse aif swap unless unval break/all))
+  :use-module ((hnh util lens) :select (set modify cdr*))
+  :use-module (hnh util path)
+  :use-module (hnh util type)
+  :use-module (hnh util object)
+  :use-module ((hnh util values) :select (abort* on-fst on-snd apply/values))
+  :use-module ((c lex2)
+               :select (lex
+                        placemaker
+                        lexeme?
+                        lexeme-body
+                        lexeme-noexpand
+
+                        tokenize
+                        ))
+  :use-module (c unlex)
+  :use-module (c cpp-types)
+  :use-module (c cpp-util)
+  :export (_Pragma-macro
+           defined-macro
+           c-search-path
+           handle-preprocessing-tokens))
+
+
+(define (read-file path)
+  (call-with-input-file path (@ (ice-9 rdelim) read-string)))
+
+
+
+(define-syntax-rule (alist-of variable key-type value-type)
+  (build-validator-body variable (list-of (pair-of key-type value-type))))
+
+(define (list-of-length lst n)
+  (= n (length lst)))
+
+(define parameter-map? (of-type? (alist-of string? (list-of lexeme?))))
+
+(define (concat-token? token)            (and (equal? "##"  (punctuator-token? token))
+                                              (not (member "##" (lexeme-noexpand token)))))
+(define (stringify-token? token)         (equal? "#"   (punctuator-token? token)))
+(define (left-parenthesis-token? token)  (equal? "("   (punctuator-token? token)))
+(define (right-parenthesis-token? token) (equal? ")"   (punctuator-token? token)))
+(define (comma-token? token)             (equal? ","   (punctuator-token? token)))
+(define (ellipsis-token? token)          (equal? "..." (punctuator-token? token)))
+
+
+;; TODO
+;; > #if defined X
+;; is equivalent to
+;; > #if defined(X)
+
+
+;; parameters is a lexeme list, as returned by parse-parameter-list
+(define (build-parameter-map macro parameters)
+  (typecheck macro cpp-macro?)
+  (typecheck parameters (list-of (list-of lexeme?)))
+  (map (lambda (pair) (modify pair cdr* drop-whitespace-both))
+       (if (macro-variadic? macro)
+           (let ((head rest (split-at parameters (length (macro-identifier-list macro)))))
+             (cons (cons "__VA_ARGS__" (concatenate (intersperse
+                                                     (lex ",")
+                                                     rest)))
+                   (map cons (macro-identifier-list macro) head)))
+           (map cons
+                (macro-identifier-list macro)
+                parameters))))
+
+(define (expand# macro parameter-map)
+  (typecheck macro cpp-macro?)
+  (typecheck parameter-map parameter-map?)
+  (let loop ((tokens (macro-body macro)))
+    (cond ((null? tokens) '())
+          ((stringify-token? (car tokens))
+           (let* ((head rest (car+cdr (drop-whitespace (cdr tokens))))
+                  (x (identifier-token? head)))
+             (cond ((assoc-ref parameter-map x)
+                    => (lambda (tokens)
+                         (cons (stringify-tokens tokens)
+                               (loop rest))))
+                   (else
+                    (scm-error 'macro-expand-error "expand#"
+                               "'#' is not followed by a macro parameter: ~s"
+                               (list x) #f)))))
+          (else (cons (car tokens)
+                      (loop (cdr tokens)))))))
+
+
+;; 6.10.3.3
+(define (expand## tokens)
+  ;; (typecheck tokens (list-of lexeme?))
+
+  (let loop ((left '())
+             (right tokens))
+    (cond ((null? right)
+           (reverse left))
+          ((concat-token? (car right))
+           (let ((l (drop-whitespace left))
+                 (r (drop-whitespace (cdr right))))
+             (cond ((or (null? l) (null? r))
+                    (scm-error 'cpp-error "expand##"
+                               "## can't be first or last token: ~s"
+                               (list (unlex tokens)) #f))
+                   ((and (placemaker-token? (car l))
+                         (placemaker-token? (car r)))
+                    (loop (cdr l) (cons (placemaker) (cdr r))))
+                   ((placemaker-token? (car l))
+                    (loop (cdr l) r))
+                   ((placemaker-token? (car r))
+                    (loop (cdr l) (cons (car l) (cdr r))))
+                   (else
+                    ;; 6.10.3.3 p. 3
+                    ;; I believe that ## is the only special case where the
+                    ;; result of concatenation is differente from the token directly.
+                    (let ((token (concatenate-tokens (car l) (car r))))
+                      (let ((token (if (concat-token? token)
+                                       (modify token lexeme-noexpand xcons "##")
+                                       token)))
+                        (loop (cdr l) (cons token (cdr r)))))))))
+          (else
+           (let ((pre post (break concat-token? right)))
+             (loop (append left (reverse pre)) post))))))
+
+
+(define (check-arity macro parameters)
+  (if (variadic? macro)
+      (unless (>= (length parameters)
+                 (length (macro-identifier-list macro)))
+        (scm-error 'cpp-arity-error "apply-macro"
+                   "Too few arguments to variadic macro ~s, expected at least ~s, got ~s"
+                   (list (macro-identifier macro)
+                         (length (macro-identifier-list macro))
+                         (length parameters))
+                   (list macro)))
+      (unless (or (and (= 0 (length (macro-identifier-list macro)))
+                       (= 1 (length parameters))
+                       (null? (car parameters)))
+                  (= (length (macro-identifier-list macro))
+                     (length parameters)))
+        (scm-error 'cpp-arity-error "apply-macro"
+                   "Wrong number of arguments to macro ~s, expected ~s, got ~s"
+                   (list (macro-identifier macro)
+                         (length (macro-identifier-list macro))
+                         (length parameters))
+                   (list macro)))))
+
+;; expand function like macro
+;; parameter is a list of lexeme-lists, each "top level" element matching one
+;; argument to the macro
+(define (apply-macro environment macro parameters)
+  (typecheck environment cpp-environment?)
+  ;; Each element should be the lexeme list for that argument
+  (typecheck parameters (list-of (list-of lexeme?)))
+  (typecheck macro cpp-macro?)
+  (check-arity macro parameters)
+
+  (let ()
+
+    (define (resolve-cpp-variables tokens parameter-map)
+      (define (bound-identifier? id)
+        (assoc-ref parameter-map id))
+
+      ;; expand parameters, and place placemaker tokens
+      (let loop ((tokens tokens) (last #f))
+        (cond ((null? tokens) '())
+              ((identifier-token? (car tokens))
+               bound-identifier?
+               => (lambda (id)
+                    (let ((replacement (assoc-ref parameter-map id)))
+                      (if (null? replacement)
+                          (cons (placemaker) (loop (cdr tokens) #f))
+                          ;; macroexpand replacement here! But only if the token isn't used with ## (or #)
+                          (append
+                           (if (or (concat-token? last)
+                                   (next-token-matches? concat-token? tokens))
+                               replacement
+                               ;; resolve-token-stream only modifies environment by updating current line
+                               ;; that can't happen in a macro body
+                               ((unval resolve-token-stream 1) environment replacement once?: #t))
+                           (loop (cdr tokens) #f))))))
+              ((whitespace-token? (car tokens))
+               (cons (car tokens) (loop (cdr tokens) last)))
+              (else (cons (car tokens) (loop (cdr tokens) (car tokens)))))))
+
+
+    (define parameter-map (build-parameter-map macro parameters))
+    (remove placemaker-token?
+            (-> macro
+                (expand# parameter-map)
+                (resolve-cpp-variables parameter-map)
+                expand##))))
+
+
+
+;; Expand object-like macro
+
+;; #define VALUE 10
+;; #define str(x) #x
+;; #define OTHER str(VALUE)
+;; OTHER
+;; ⇒ "VALUE"
+
+;; remaining-tokens should be the token stream just after the name of the macro
+(define (expand-macro environment macro noexpand-list remaining-tokens)
+  (typecheck environment cpp-environment?)
+  (typecheck macro cpp-macro?)
+  ;; (typecheck remaining-tokens (list-of lexeme?))
+  (typecheck noexpand-list (list-of string?))
+
+  (let ((name (macro-identifier macro)))
+    (cond ((object-macro? macro)
+           (values environment (append (fold (swap mark-noexpand)
+                                             (expand## (macro-body macro))
+                                             (cons name noexpand-list))
+                                       remaining-tokens)))
+
+          ((function-macro? macro)
+           (if (next-token-matches? left-parenthesis-token? remaining-tokens)
+               (let ((containing remaining newlines (parse-parameter-list remaining-tokens)))
+                 (values (bump-line environment newlines)
+                         (append (fold (swap mark-noexpand)
+                                       (apply-macro environment macro containing)
+                                       (cons name noexpand-list))
+                                 remaining)))
+               (values environment
+                       ;; TODO#1 the token shouldn't be expanded here, but it should neither be marked no-expand?
+                       ;; Consider the case
+                       ;; #define m(a) a(0,1)
+                       ;; #define f(a) f(2 * (a))
+                       ;; m(f)
+                       (append (mark-noexpand (lex (macro-identifier macro)) (macro-identifier macro))
+                               remaining-tokens))))
+
+          ((internal-macro? macro)
+           (if (next-token-matches? left-parenthesis-token? remaining-tokens)
+               (let ((containing remaining newlines (parse-parameter-list remaining-tokens)))
+                 (let ((env* tokens* ((macro-body macro) environment containing)))
+                   (values (bump-line env* newlines)
+                           (append (fold (swap mark-noexpand)
+                                         tokens*
+                                         (cons name noexpand-list))
+                                   remaining))))
+               (values environment
+                       (append (mark-noexpand (lex (macro-identifier macro)) (macro-identifier macro))
+                               remaining-tokens))))
+
+          (else
+           (scm-error 'wrong-type-arg "expand-macro"
+                      "Macro isn't a macro: ~s"
+                      (list macro) #f)))))
+
+
+
+(define-type (parenthesis-group)
+  (parenthesis-group-tokens
+   type: (list-of (or lexeme? parenthesis-group?))))
+
+(define (make-parenthesis-group tokens)
+  (parenthesis-group parenthesis-group-tokens: tokens))
+
+
+(define (flatten-group tokens)
+  (cond ((null? tokens) '())
+        ((lexeme? (car tokens))
+         (cons (car tokens) (flatten-group (cdr tokens))))
+        ((parenthesis-group? (car tokens))
+         (append (lex "(")
+                 (flatten-group (parenthesis-group-tokens (car tokens)))
+                 (lex ")")
+                 (flatten-group (cdr tokens))))))
+
+
+;; Takes a list of preprocessing tokens, and returns three values
+;; - if the last token was '...'
+;; - a list of strings of all token names
+;; - the remaining tokens
+;; Note that this is ONLY #define f(x) forms
+;; not usage forms
+(define (parse-identifier-list tokens)
+  ;; (typecheck tokens (list-of lexeme?))
+  (let* ((group remaining (parse-group (drop-whitespace tokens)))
+         (groups (reverse (map drop-whitespace-both
+                               (break/all comma-token? (parenthesis-group-tokens group))))))
+    ;; Checks that there where no nested parenthesis
+    (cond ((equal? '(()) groups)
+           (values #f '() remaining))
+          (else
+           (typecheck groups (list-of (and (list-of-length 1)
+                                           (list-of lexeme?))))
+
+           (let ((variadic? groups (if (ellipsis-token? (caar groups))
+                                       (values #t (cdr groups))
+                                       (values #f groups))))
+             (values
+              variadic?
+              (map (lambda (x) (or (identifier-token? x)
+                              (scm-error 'cpp-error "parse-identifier-list"
+                                         "Unexpected preprocessing-token in identifier list: ~s"
+                                         (list x) #f)))
+                   (map car (reverse groups)))
+              remaining))))))
+
+
+
+(define (newline-count group)
+  (let loop ((tokens (parenthesis-group-tokens group)))
+    (fold (lambda (item nls)
+            (+ nls
+               (cond ((newline-token? item) 1)
+                     ((parenthesis-group? item) (newline-count item))
+                     (else 0))))
+          0 tokens)))
+
+;; tokens ⇒ parenthesis-group, remaining-tokens
+(define (parse-group tokens)
+  (typecheck tokens (not null?))
+  (typecheck (car tokens) left-parenthesis-token?)
+
+  (let loop ((stack '()) (remaining tokens))
+    (cond ((and (not (null? stack))
+                (null? (cdr stack))
+                (car stack))
+           parenthesis-group?
+           => (lambda (item) (values item remaining)))
+          ((null? remaining)
+           (scm-error 'misc-error "parse-group"
+                      "Ran out of tokens while parsing: ~s (stack: ~s)"
+                      (list (unlex tokens) stack) #f))
+          (else
+           (let ((token remaining (car+cdr remaining)))
+             (loop (cond ((right-parenthesis-token? token)
+                          (let ((group rest (break left-parenthesis-token? stack)))
+                            (cons (make-parenthesis-group (reverse group))
+                                  ;; Remove left-parenthesis
+                                  (cdr rest))))
+                         (else (cons token stack)))
+                   remaining))))))
+
+
+;; returns three values:
+;; - a list of tokens where each is a parameter to the function like macro
+;; - the remaining tokenstream
+;; - how many newlines were encountered
+;; The standard might call these "replacement lists"
+;; Note that each returned token-list might have padding whitespace which should be trimmed.
+;; It's kept to allow __VA_ARGS__ to "remember" its whitespace
+(define (parse-parameter-list tokens)
+  (let ((group remaining (parse-group (drop-whitespace tokens))))
+    ;; Checks that no inner groups where here
+    ;; (typecheck tokens (list-of lexeme?))
+    (values (map flatten-group
+                 (break/all comma-token? (parenthesis-group-tokens group)))
+            remaining
+            (newline-count group))))
+
+
+;; Add __FILE__ and __LINE__ object macros to the environment
+(define (join-file-line environment)
+  (extend-environment
+   environment
+   ;; 6.10.8
+   (list
+    (object-like-macro
+     identifier: "__FILE__"
+     body: (lex (format #f "~s" (current-file environment))))
+    (object-like-macro
+     identifier: "__LINE__"
+     body: (lex (number->string (current-line environment)))))))
+
+
+(define defined-macro
+  (internal-macro
+   identifier: "defined"
+   body: (lambda (environment arguments)
+           (typecheck arguments (and (list-of (list-of lexeme?))
+                                     (not null?)))
+           (aif (identifier-token? (car (list-ref arguments 0)))
+                (let ((in-env (boolean->c-boolean (in-environment? environment it))))
+                  (values environment (lex (number->string in-env))))
+                (scm-error 'cpp-error "defined"
+                           "Invalid parameter list to `defined': ~s"
+                           (list arguments) #f)))))
+
+(define _Pragma-macro
+  (internal-macro
+   identifier: "_Pragma"
+   body: (lambda (environment arguments)
+           (typecheck arguments (and (list-of (list-of lexeme?))
+                                     (not null?)))
+           (cond ((string-token? (caar arguments))
+                  (lambda (a . _) a)
+                  ;; TODO handle rest
+                  => (lambda (encoding it . rest)
+                       (values (handle-pragma environment (lex it))
+                               '())))
+                 (else (scm-error 'cpp-pragma-error "_Pragma"
+                                  "Invalid argument to _Pragma: ~s"
+                                  (list (car arguments)) #f))))))
+
+
+
+;; environment, tokens → environment
+(define (handle-pragma environment tokens)
+  (typecheck environment cpp-environment?)
+  ;; (typecheck tokens (list-of lexeme?))
+
+  (let ((err (lambda ()
+               (scm-error 'cpp-pragma-error "handle-pragma"
+                          "Invalid pragma directive: ~a"
+                          (list (unlex tokens)) #f))))
+
+    (cond ((null? tokens) (err))
+          ((equal? "STDC" (identifier-token? (car tokens)))
+           (call-with-values (lambda () (apply values (filter identifier-token? (cdr tokens))))
+             (case-lambda ((identifier on-off-switch)
+                           (format (current-output-port)
+                                   "#Pragma STDC ~a ~a"
+                                   (unlex (list identifier))
+                                   (unlex (list on-off-switch)))
+                           environment)
+                          (_ (err)))))
+          (else
+           (format (current-output-port)
+                   "Non-standard #Pragma: ~a"
+                   (unlex tokens))
+           environment))))
+
+
+;; 6.10.1 p. 4
+(define (resolve-constant-expression cpp-tokens)
+  ;; (typecheck tokens (list-of lexeme?))
+  (define zero (car (lex "0")))
+  #;
+  (define tokens
+   (map preprocessing-token->token
+        (map (lambda (token)
+               (cond ((identifier-token? token) zero)
+                     (else token)))
+             (remove whitespace-token? tokens))))
+
+  'TODO
+  ;; eval as per 6.6
+  )
+
+
+
+(define (mark-noexpand1 token name)
+  (modify token lexeme-noexpand xcons name))
+
+(define (mark-noexpand tokens name)
+  ;; (typecheck tokens (list-of lexeme?))
+  ;; (typecheck name string?)
+  (map (lambda (token) (mark-noexpand1 token name)) tokens))
+
+(define (marked-noexpand? token)
+  (cond ((identifier-token? token)
+         => (lambda (id) (member id (lexeme-noexpand token))))
+        (else #f)))
+
+;; Expands a token-stream which contains no pre-processing directives (#if:s, ...)
+;; If @var{once?} is true then the resulting body won't be scanned again for tokens to expand
+;; environment, tokens, [boolean] → environment, tokens
+(define* (resolve-token-stream environment tokens key: once?)
+  (typecheck environment cpp-environment?)
+  ;; (typecheck tokens (list-of lexeme?))
+  ;; (pprint-environment environment)
+  ;; (format (current-error-port) "~a~%~%" (unlex tokens))
+  (let loop ((environment environment) (tokens tokens))
+    (cond ((null? tokens) (values environment '()))
+          ((newline-token? (car tokens))
+           (on-snd (cons (car tokens) (abort* (loop (bump-line environment) (cdr tokens))))))
+          ((and (identifier-token? (car tokens))
+                (not (marked-noexpand? (car tokens))))
+           ;; Here is the loop after expansion
+           (apply/values (if once? values loop)
+                         (maybe-extend-identifier environment
+                                                  (identifier-token? (car tokens))
+                                                  (lexeme-noexpand (car tokens))
+                                                  (cdr tokens))))
+          (else (on-snd (cons (car tokens) (abort* (loop environment (cdr tokens)))))))))
+
+
+
+;; returns a new environment
+;; handle body of #if
+;; environment, (list token) → environment
+(define (resolve-for-if environment tokens)
+  (typecheck environment cpp-environment?)
+  ;; (typecheck tokens (list-of lexeme?))
+
+  (-> (extend-environment environment (list defined-macro))
+      ;; no newlines in #if line
+      ((unval resolve-token-stream 1) tokens)
+      resolve-constant-expression
+      c-boolean->boolean
+      (if (enter-active-if environment)
+          (enter-inactive-if environment))))
+
+;; environment, string, (list token) → environment, (list token)
+(define (maybe-extend-identifier environment identifier noexpand-list remaining-tokens)
+  (typecheck environment cpp-environment?)
+  (typecheck identifier string?)
+  ;; (typecheck remaining-tokens (list-of lexeme?))
+  (typecheck noexpand-list (list-of string?))
+  (cond ((get-identifier (join-file-line environment) identifier)
+         => (lambda (value)
+              (expand-macro (join-file-line environment)
+                            value
+                            noexpand-list
+                            remaining-tokens)))
+        (else ; It wasn't an identifier, leave it as is
+         (values environment
+                 (append (mark-noexpand (lex identifier)
+                                        identifier)
+                         remaining-tokens)))))
+
+;; 'gcc -xc -E -v /dev/null' prints GCC:s search path
+(define c-search-path
+  (make-parameter (list "/usr/include"
+                        "/usr/local/include")))
+
+;; #include <stdio.h>
+(define (resolve-h-file string)
+  (typecheck string string?)
+  (cond
+   ;; NOTE do I want this case?
+   ;; GCC has it
+   ((path-absolute? string) string)
+   (else
+    (or
+     (find file-exists?
+           (map (lambda (path-prefix)
+                  (path-append path-prefix string))
+                (c-search-path)))
+     (scm-error 'cpp-error "resolve-h-file"
+                "Can't resolve file: ~s"
+                (list string) #f)))))
+
+;; #include "myheader.h"
+(define (resolve-q-file string)
+  (typecheck string string?)
+  (cond ((file-exists? string) string)
+        ;; This should always be a fallback (6.10.2, p. 3)
+        (else (resolve-h-file string))))
+
+
+(define (resolve-header environment tokens)
+  (typecheck environment cpp-environment?)
+  ;; (typecheck tokens (list-of lexeme?))
+
+  (let ((err (lambda (msg . args)
+               (scm-error 'cpp-error "resolve-and-include-header"
+                          (string-append msg ", tokens: ~s")
+                          (append args (list (unlex tokens))) #f))))
+    (let loop ((%first-time #t) (tokens tokens))
+      (cond ((null? tokens) (err "Invalid #include line"))
+            ((h-string-token? (car tokens))
+             => (lambda (str)
+                  (unless (null? (drop-whitespace (cdr tokens)))
+                    (err "Unexpected tokens after #include <>"))
+                  (resolve-h-file str)))
+            ((q-string-token? (car tokens))
+             => (lambda (str)
+                  (unless (null? (drop-whitespace (cdr tokens)))
+                    (err "Unexpected tokens after #include \"\""))
+                  (resolve-q-file str)))
+            (else
+             (unless %first-time (err "Failed parsing tokens"))
+             ;; No newlines in #include
+             (loop #f ((unval resolve-token-stream 1) environment tokens)))))))
+
+;; environment, tokens → environment
+(define (handle-line-directive environment tokens*)
+  (typecheck environment cpp-environment?)
+  ;; (typecheck tokens* (list-of lexeme?))
+
+  (let ((err (lambda () (scm-error 'cpp-error "handle-line-directive"
+                              "Invalid line directive: ~s"
+                              (list tokens*) #f))))
+    (let loop ((%first-time #t) (tokens tokens*))
+      (cond ((null? tokens))
+            ((pp-number? (car tokens))
+             => (lambda (line)
+                  (let ((line (string->number line))
+                        (remaining (drop-whitespace (cdr tokens))))
+                    (cond ((null? remaining) (set environment current-line (1- line)))
+                          ((string-token? (car remaining))
+                           (lambda (a . _) a)
+                           => (lambda (encoding . fragments)
+                                (-> environment
+                                    (set current-line (1- line))
+                                    ;; TODO properly join string
+                                    (set current-file (car fragments)))))
+                          ;; no newlines in #line
+                          (%first-time (loop #f ((unval resolve-token-stream 1) environment tokens)))
+                          (else (err))))))
+            ;; no newlines in #line
+            (%first-time (loop #f ((unval resolve-token-stream 1) environment tokens)))
+            (else (err))))))
+
+;; environment, tokens → environment
+(define (resolve-define environment tokens)
+  (typecheck environment cpp-environment?)
+  ;; (typecheck tokens (list-of lexeme?))
+
+  (let ((identifier (identifier-token? (car tokens)))
+        (tail (cdr tokens)))
+    (-> environment
+        bump-line
+        (add-identifier
+         identifier
+         (cond ((and (not (null? tail))
+                     (left-parenthesis-token? (car tail)))
+                ;; function like macro
+                (let ((variadic? identifiers replacement-list
+                                 (parse-identifier-list tail)))
+                  (function-like-macro
+                   identifier: identifier
+                   variadic?: variadic?
+                   identifier-list: identifiers
+                   ;; surrounding whitespace is not part of the replacement list
+                   ;; (6.10.3 p.7)
+                   body: (drop-whitespace-both replacement-list))))
+               (else (object-like-macro
+                      identifier: identifier
+                      body: (drop-whitespace-both tail))))))))
+
+
+
+
+;; environment, tokens -> environment, tokens
+(define (handle-preprocessing-tokens environment tokens)
+  ;; Prepend a newline to ensure that the token stream always starts with a
+  ;; newline (otherwise guaranteed by how we loop). Decrement line-counter
+  ;; by one to compensate.
+  (let loop ((environment (bump-line environment -1))
+             (tokens (append (lex "\n") tokens)))
+
+    (define (err fmt . args)
+      (scm-error 'cpp-error "handle-preprocessing-tokens"
+                 (string-append "~a:~a " fmt)
+                 (cons* (current-file environment)
+                        (current-line environment)
+                        args)
+                 #f))
+
+    (cond ((null? tokens) (values environment '()))
+          ((newline-token? (car tokens))
+           (let ((environment (bump-line environment))
+                 (tokens* (drop-whitespace (cdr tokens))))
+             (cond ((null? tokens*) (values environment '()))
+                   ((equal? "#" (punctuator-token? (car tokens*)))
+                    (let ((line-tokens remaining-tokens (tokens-until-eol (cdr tokens*))))
+                      ;; drop whitespace after to not "eat" the next newline token
+                      (let ((line-tokens (drop-whitespace line-tokens)))
+                        (cond ((null? line-tokens)
+                               ;; null directive
+                               (loop environment remaining-tokens))
+
+                              ((in-conditional/inactive? environment)
+                               (case (string->symbol (identifier-token? (car line-tokens)))
+                                 ((ifdef if) (loop (enter-inactive-if environment) remaining-tokens))
+                                 ((else)  (loop (flip-flop-if environment) remaining-tokens))
+                                 ((endif) (loop (leave-if environment) remaining-tokens))
+                                 ((elif)  (loop (resolve-for-if
+                                                 (leave-if environment)
+                                                 (drop-whitespace (cdr line-tokens)))
+                                                remaining-tokens))
+                                 (else (loop environment remaining-tokens))))
+
+                              ;; From here on we are not in a comment block
+                              (else
+                               (let ((directive (string->symbol (identifier-token? (car line-tokens))))
+                                     (body (drop-whitespace (cdr line-tokens))))
+                                 (if (eq? 'include directive)
+                                     ;; include is special since it returns a token stream
+                                     (let ((path (resolve-header environment body)))
+                                       ;; TODO change to store source location in lexemes
+                                       ;; and rewrite the following to
+                                       ;; (loop environment
+                                       ;;       (append (-> path read-file tokenize) remaining-tokens))
+                                       ;; TODO and then transfer these source locations when we move
+                                       ;; to "real" tokens (c to-token)
+                                       (let ((env* tokens*
+                                                   (loop
+                                                    ;; same hack as at start of loop
+                                                    (-> environment
+                                                        (enter-file path)
+                                                        (bump-line -1))
+                                                    (append (lex "\n")
+                                                            (-> path read-file tokenize)))))
+                                         (on-snd (append tokens* (abort* (loop (leave-file env*) remaining-tokens))))))
+
+                                     (let ((operation ; (environment, list token) → environment
+                                            (case directive
+                                              ((if) resolve-for-if)
+                                              ((ifdef)
+                                               (lambda (env body)
+                                                 ((if (in-environment? env (identifier-token? (car body)))
+                                                      enter-active-if enter-inactive-if)
+                                                  env)))
+                                              ((ifndef)
+                                               (lambda (env body)
+                                                 ((if (in-environment? env (identifier-token? (car body)))
+                                                      enter-inactive-if enter-active-if)
+                                                  env)))
+                                              ;; NOTE possibly validate that body is empty for endif and else
+                                              ((endif) (lambda (env _)
+                                                         (unless (in-conditional? env)
+                                                           (err "#endif outside conditional"))
+                                                         (leave-if env)))
+                                              ((else)  (lambda (env _)
+                                                         (unless (in-conditional? env)
+                                                           (err "#else outside conditional"))
+                                                         (flip-flop-if env)))
+                                              ;; ((elif)  (lambda ))
+                                              ((define) resolve-define)
+                                              ((undef) (lambda (env body) (remove-identifier env (identifier-token? (car body)))))
+                                              ((line) handle-line-directive)
+                                              ((error) (lambda (_ tokens)
+                                                          (throw 'cpp-error-directive (unlex tokens))))
+                                              ((pragma) handle-pragma)
+                                              (else (err "Unknown preprocessing directive: ~s"
+                                                         (list line-tokens))))))
+                                       (loop (operation environment body)
+                                             remaining-tokens)))))))))
+
+                   ;; Line is not a pre-processing directive
+                   (else (let ((preceding-tokens remaining-tokens (tokens-until-cpp-directive (cdr tokens))))
+                           (let* ((env* resolved-tokens (if (in-conditional/inactive? environment)
+                                                            (values environment '())
+                                                            (resolve-token-stream environment preceding-tokens))))
+                             (on-snd (append resolved-tokens
+                                             ;; The initial newline is presreved here, for better output,
+                                             ;; and to keep at least one whitespace token when there was one previously.
+                                             ;; possibly also keep a newline for line-directives.
+                                             (unless (null? remaining-tokens) (lex "\n"))
+                                             (abort* (loop env* remaining-tokens))))))))))
+
+          (else (err "Unexpected middle of line, (near ~s)"
+                     (unlex tokens))))))
+
diff --git a/module/c/to-token.scm b/module/c/to-token.scm
new file mode 100644
index 00000000..f5e459de
--- /dev/null
+++ b/module/c/to-token.scm
@@ -0,0 +1,161 @@
+(define-module (c to-token)
+  :use-module ((srfi srfi-1) :select (fold append-map))
+  :use-module (ice-9 match)
+  :use-module ((hnh util) :select (->))
+  :use-module ((system base lalr)
+               :select (make-lexical-token))
+  :use-module (c cpp-types)
+  :use-module ((c lex2) :select (parse-c-number))
+  ;; :use-module (hnh util type)
+  :use-module ((rnrs bytevectors)
+               :select (make-bytevector
+                        bytevector-length
+                        bytevector-copy!
+                        u8-list->bytevector
+                        bytevector-u8-ref))
+  :use-module ((rnrs io ports)
+               :select (string->bytevector
+                        make-transcoder
+                        utf-8-codec))
+  :export (preprocessing-token->token))
+
+(define (pp-number->c-number token)
+  (parse-c-number (pp-number? token)))
+
+(define keywords
+  '(auto break case char const continue default
+    do double else enum extern float for goto
+    if inline int long register restrict return
+    short signed sizeof static struct switch
+    typedef union unsigned void volatile while
+    _Alignas _Alignof _Atomic _Bool _Complex
+    _Generic _Imaginary _Noreturn _Static_assert
+    _Thread_local))
+
+(define (cpp-char->bytevector c)
+  (match c
+    (`(escape-sequence (simple-escape-sequence      ,x))
+     (case (string-ref x 0)
+       ((#\a) #vu8(#x07)) ; #\alarm
+       ((#\b) #vu8(#x08)) ; #\backspace
+       ((#\f) #vu8(#x0C)) ; #\page
+       ((#\n) #vu8(#x0A)) ; #\newline
+       ((#\r) #vu8(#x0D)) ; #\return
+       ((#\t) #vu8(#x09)) ; #\tab
+       ((#\v) #vu8(#x0B)) ; #\vtab
+       ;; ' " ? \
+       (else (char->integer (string-ref x 0)))))
+
+    ;; TODO these u8-list->bytevector should depend on the
+    ;;      encoding prefix of the string/char
+    (`(escape-sequence (octal-escape-sequence       ,x))
+     (-> x (string->number 8) list u8-list->bytevector))
+
+    (`(escape-sequence (hexadecimal-escape-sequence ,x))
+     (-> x (string->number 16) list u8-list->bytevector))
+
+    (`(escape-sequence (universal-character-name    ,x))
+     (let ((n (string->number x 16)))
+       (when (<= #xD800 x #xDFFF)
+         (error))
+       (when (and (< x #xA0)
+                  (or (not (= x #x24))
+                      (not (= x #x40))
+                      (not (= x #x60))))
+         (error))
+       (-> n
+           integer->char
+           string
+           (string->bytevector (make-transcoder (utf-8-codec))))))
+    (_ (error))))
+
+(define (concat-bytevectors bvs)
+  (define target (make-bytevector (apply + (map bytevector-length bvs))))
+  (fold (lambda (bv offset)
+          (let ((len (bytevector-length bv)))
+            (bytevector-copy! bv 0 target offset len)
+            (+ offset len)))
+        0
+        bvs)
+  target)
+
+(define (handle-string-fragments content)
+  (map
+   (lambda (x) (if (string? x)
+              (string->bytevector x (make-transcoder (utf-8-codec)))
+              (cpp-char->bytevector x)))
+   content))
+
+;; 6.4 paragraph 2
+;; Each preprocessing toket thas is converted to a token shall have the
+;; lexcal form of a keyword, an identifier, a constant, a string literal,
+;; or a puncturtor
+(define (preprocessing-token->token cpp-token)
+  ;; Guile's cond handles multiple from expr, if written on the form
+  ;; (cond (expr check => proc) ...)
+  (cond ((string-token? cpp-token)
+         (lambda (a . _) a)
+         => (lambda (encoding . content)
+              (make-lexical-token
+               'string-literal #f
+               (concat-bytevectors
+                (append
+                 ;; TODO this should depend on encoding
+                 (handle-string-fragments content)
+                 (list #vu8(0)))))))
+
+        ((identifier-token? cpp-token)
+            => (lambda (name)
+                 (let ((name (string->symbol name)))
+                   (if (memv name keywords)
+                       name
+                       (make-lexical-token 'identifier #f name)))))
+
+        ((pp-number? cpp-token)
+         => (lambda (content)
+              ;; TOOD should return an integer-constant or a floating-constant
+              (make-lexical-token 'constant #f (parse-c-number content))))
+
+        ((character-constant? cpp-token)
+         (lambda (a . _) a)
+         => (lambda (encoding . content)
+              (make-lexical-token
+               'constant #f
+               ;; TODO that to do with multi-byte characters?
+               ;; > 'ab' == 'a' << 8 | 'b' == 0x6162
+               ;; > '\x1234' == 0x1234
+               ;; GCC prints 34 for the following expression
+               ;; > printf("%x\n", '\x1234');
+               ;; but 6162 for this
+               ;; > printf("%x\n", 'ab');
+
+               ;; What about
+               ;; > 'a\x1234' == a << 16 | 'b' == 0x611234
+               (let ((bv (concat-bytevectors
+                          ;; TODO this should depend on encoding
+                          (handle-string-fragments content))))
+                 ;; TODO maybe actually store multiple bytes from multi byte literals
+                 (bytevector-u8-ref bv (1- (bytevector-length bv)))))))
+
+        ((punctuator-token? cpp-token)
+         => (lambda (s)
+              (cond ((string=? s "{") 'lbrace)
+                    ((string=? s "}") 'rbrace)
+                    ((string=? s "[") 'lbrack)
+                    ((string=? s "]") 'rbrack)
+                    ((string=? s "(") 'lparen)
+                    ((string=? s ")") 'rparen)
+                    ((string=? s ".") 'dot)
+                    ((string=? s "|") 'pipe)
+                    ((string=? s "||") 'pipe2)
+                    ((string=? s ";") 'semicolon)
+                    ((string=? s "|=") 'pipe=)
+                    ((string=? s ",") 'comma)
+                    ((string=? s "#") 'hash)
+                    ((string=? s "##") 'hash2)
+                    (else (string->symbol s)))))
+
+        (else
+         (scm-error 'cpp-error "preprocessing-token->token"
+                    "Can't convert ~s into a \"regular\" token."
+                    (list cpp-token) #f))))
diff --git a/module/c/trigraph.scm b/module/c/trigraph.scm
new file mode 100644
index 00000000..197e01a4
--- /dev/null
+++ b/module/c/trigraph.scm
@@ -0,0 +1,24 @@
+(define-module (c trigraph)
+  :use-module (ice-9 regex)
+  :export (replace-trigraphs))
+
+(define rx (make-regexp "\\?\\?([=\\(\\)'!<>/-])"))
+
+(define (proc m)
+  (case (string-ref (match:substring m 2) 0)
+    ((#\=) "#")
+    ((#\() "[")
+    ((#\)) "]")
+    ((#\') "^")
+    ((#\<) "{")
+    ((#\>) "}")
+    ((#\!) "|")
+    ((#\-) "~")
+    ((#\/) "\\")))
+
+(define (replace-trigraphs string)
+  (call-with-output-string
+    (lambda (port)
+      (regexp-substitute/global
+       port rx string
+       'pre proc 'post))))
diff --git a/module/c/unlex.scm b/module/c/unlex.scm
new file mode 100644
index 00000000..e467a50f
--- /dev/null
+++ b/module/c/unlex.scm
@@ -0,0 +1,84 @@
+(define-module (c unlex)
+  :use-module (hnh util type)
+  :use-module (ice-9 match)
+  :use-module (c lex2)
+  :use-module (c cpp-types)
+  :use-module (c cpp-util)
+  :use-module ((texinfo string-utils) :select (escape-special-chars))
+  :export (unlex
+           unlex-aggressive
+           stringify-token
+           stringify-tokens))
+
+(define (unlex tokens)
+  (typecheck tokens (list-of lexeme?))
+  (string-concatenate
+   (map (lambda (x) (cond (x preprocessing-token? => stringify-token)
+                     ((whitespace-token? x) (lexeme-body x))
+                     ((other-token? x) (lexeme-body x))))
+        tokens)))
+
+;; takes a list of preprocessing-token's, and return a "source" string
+(define (unlex-aggressive tokens)
+  (typecheck tokens (list-of lexeme?))
+  (string-concatenate
+   (map (lambda (x)
+          (cond ((preprocessing-token? x) (stringify-token x))
+                ((whitespace-token? x) " ")
+                ((other-token? x) (lexeme-body x))))
+        (squeeze-whitespace tokens))))
+
+(define (stringify-escape-sequence sub-token)
+  (match sub-token
+    (`(simple-escape-sequence ,x)
+     (format #f "\\~a" x))
+    (`(octal-escape-sequence ,x)
+     (format #f "\\~a" x))
+    (`(hexadecimal-escape-sequence ,x)
+     (format #f "\\x~a" x))
+    (`(universal-character-name ,x)
+     (case (string-length x)
+       ((4) (format #f "\\u~a" x))
+       ((8) (format #f "\\U~a" x))))))
+
+(define (stringify-string-tokens fragments)
+  (with-output-to-string
+    (lambda ()
+      (display #\")
+      (for-each (match-lambda
+                  (`(escape-sequence ,x)
+                   (display (stringify-escape-sequence x)))
+                  ;; Backslash in source strings is usually encoded by an
+                  ;; 'escape-sequence, but literal backslashes can be in
+                  ;; "regular" string fragments as result of the stringification
+                  ;; operator (#).
+                  (s (display (escape-special-chars s "\"\\" #\\))))
+                fragments)
+      (display #\"))))
+
+;; Returns the "source" of the token, as a preprocessing string literal token
+(define (stringify-token preprocessing-token)
+  (match (lexeme-body preprocessing-token)
+    (('string-literal `(encoding-prefix . ,prefix) parts ...)
+     (stringify-string-tokens parts))
+
+    (`(header-name (q-string ,s))
+     (format #f "~s" s))
+
+    (`(header-name (h-string ,s))
+     (format #f "<~a>" s))
+
+    (`(identifier ,id) id)
+
+    (`(pp-number ,n) n)
+
+    ;; TODO remaining parts
+    (('character-constant `(character-encoding . ,x) c parts ...)
+     (format #f "'~a'" c))
+
+    (`(punctuator ,p) p)))
+
+;; takes a token list, and return a single string literal token
+(define (stringify-tokens tokens)
+  (lexeme type: 'preprocessing-token
+          body: `(string-literal (encoding-prefix) ,(unlex-aggressive tokens))))
diff --git a/module/c/zipper.scm b/module/c/zipper.scm
new file mode 100644
index 00000000..65cea211
--- /dev/null
+++ b/module/c/zipper.scm
@@ -0,0 +1,60 @@
+;;; Commentary:
+;; Zipper data structure. Could be moved to (hnh util), but would then need to
+;; be at least slightly more thorough.
+;;; Code:
+
+(define-module (c zipper)
+  :use-module (srfi srfi-88)
+  :use-module (hnh util object)
+  :export (list-zipper
+           list-zipper?
+           left focused right
+           zip-left
+           zip-right
+           zip-find-right
+           list->zipper
+           zipper->list
+           rezip))
+
+(define-type (list-zipper)
+  (left type: list?)
+  focused
+  (right type: list?))
+
+;; Move zipper one step to the left
+(define (zip-left zipper)
+  (if (null? (left zipper))
+      zipper
+      (list-zipper left: (cdr (left zipper))
+                   right: (cons (focused zipper) (right zipper))
+                   focused: (car (left zipper)))))
+
+;; Move zipper one step to the right
+(define (zip-right zipper)
+  (if (null? (right zipper))
+      zipper
+      (list-zipper left: (cons (focused zipper) (left zipper))
+                   right: (cdr (right zipper))
+                   focused: (car (right zipper)))))
+
+;; find first element matching predicate, going right
+(define (zip-find-right predicate zipper)
+  (cond ((null? (right zipper)) zipper)
+        ((predicate (focused zipper)) zipper)
+        (else (zip-find-right predicate (zip-right zipper)))))
+
+(define (list->zipper list)
+  (list-zipper left: '()
+               focused: (car list)
+               right: (cdr list)))
+
+
+(define (rezip zipper)
+  (if (null? (left zipper))
+      zipper
+      (rezip (zip-left zipper))))
+
+(define (zipper->list zipper)
+  (let ((z (rezip zipper)))
+    (cons (focused z)
+          (right z))))