aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Makefile148
-rw-r--r--doc/ref/guile.texi2
-rw-r--r--doc/ref/guile/util-object.texi86
-rw-r--r--doc/ref/guile/util-type.texi63
-rw-r--r--doc/ref/guile/util.texi21
-rw-r--r--module/c/compiler.scm64
-rw-r--r--module/c/cpp-environment.scm215
-rw-r--r--module/c/cpp-environment/function-like-macro.scm25
-rw-r--r--module/c/cpp-environment/internal-macro.scm11
-rw-r--r--module/c/cpp-environment/object-like-macro.scm18
-rw-r--r--module/c/cpp-types.scm94
-rw-r--r--module/c/cpp-util.scm130
-rw-r--r--module/c/cpp.scm95
-rw-r--r--module/c/eval.scm265
-rw-r--r--module/c/eval/environment.scm34
-rw-r--r--module/c/eval2.scm20
-rw-r--r--module/c/lex.scm52
-rw-r--r--module/c/lex2.scm549
-rw-r--r--module/c/line-fold.scm29
-rw-r--r--module/c/operators.scm3
-rw-r--r--module/c/parse.scm411
-rw-r--r--module/c/parse2.scm554
-rw-r--r--module/c/preprocessor.scm370
-rw-r--r--module/c/preprocessor2.scm752
-rw-r--r--module/c/to-token.scm161
-rw-r--r--module/c/trigraph.scm24
-rw-r--r--module/c/unlex.scm84
-rw-r--r--module/c/zipper.scm60
-rw-r--r--module/calp/html/view/calendar.scm2
-rw-r--r--module/datetime.scm444
-rw-r--r--module/datetime/zic.scm6
-rw-r--r--module/hnh/util.scm35
-rw-r--r--module/hnh/util/io.scm8
-rw-r--r--module/hnh/util/lens.scm99
-rw-r--r--module/hnh/util/object.scm169
-rw-r--r--module/hnh/util/type.scm54
-rw-r--r--module/hnh/util/values.scm27
-rw-r--r--module/vcomponent/datetime/output.scm2
-rw-r--r--module/vcomponent/recurrence/generate.scm22
-rwxr-xr-xtests/run-tests.scm11
-rw-r--r--tests/test/c-parse.scm69
-rw-r--r--tests/test/cpp.scm603
-rw-r--r--tests/test/cpp/cpp-environment.scm45
-rw-r--r--tests/test/cpp/lex2.scm177
-rw-r--r--tests/test/cpp/preprocessor2.scm1247
-rw-r--r--tests/test/cpp/to-token.scm65
-rw-r--r--tests/test/cpp/util.scm14
-rw-r--r--tests/test/datetime.scm40
-rw-r--r--tests/test/lens.scm21
-rw-r--r--tests/test/object.scm80
-rw-r--r--tests/test/recurrence-advanced.scm1
-rw-r--r--tests/test/util.scm28
52 files changed, 7162 insertions, 447 deletions
diff --git a/Makefile b/Makefile
index 34ffaa71..96c0ea58 100644
--- a/Makefile
+++ b/Makefile
@@ -14,7 +14,153 @@ GUILE_VERSION=$(shell $(GUILE) -c '(display (version))')
GUILE_SITE_DIR=$(shell $(GUILE) -c "(display (%site-dir))")
GUILE_CCACHE_DIR=$(shell $(GUILE) -c "(display (%site-ccache-dir))")
-SCM_FILES = $(shell find module/ -type f -name \*.scm)
+not_scm_files = \
+ c/operators \
+ c/preprocessor \
+ c/cpp \
+ c/parse \
+ c/lex \
+ c/eval \
+ c/eval/environment \
+ zzz_sentinel
+
+
+scm_files = \
+ c/cpp-environment \
+ c/cpp-environment/object-like-macro \
+ c/cpp-environment/internal-macro \
+ c/cpp-environment/function-like-macro \
+ c/cpp \
+ c/eval2 \
+ c/lex2 \
+ c/line-fold \
+ c/preprocessor2 \
+ c/trigraph \
+ hnh/util \
+ hnh/util/object \
+ hnh/util/path
+
+# scm_files = \
+ base64 \
+ c/compiler \
+ c/cpp-environment \
+ c/cpp-environment/function-like-macro \
+ c/cpp-environment/internal-macro \
+ c/cpp-environment/object-like-macro \
+ c/eval2 \
+ c/lex2 \
+ c/line-fold \
+ c/preprocessor2 \
+ c/trigraph \
+ calp \
+ calp/benchmark/parse \
+ calp/config-base \
+ calp/entry-points/benchmark \
+ calp/entry-points/convert \
+ calp/entry-points/html \
+ calp/entry-points/ical \
+ calp/entry-points/import \
+ calp/entry-points/server \
+ calp/entry-points/terminal \
+ calp/entry-points/text \
+ calp/entry-points/tidsrapport \
+ calp/entry-points/update-zoneinfo \
+ calp/html/caltable \
+ calp/html/components \
+ calp/html/config \
+ calp/html/filter \
+ calp/html/util \
+ calp/html/vcomponent \
+ calp/html/view/calendar \
+ calp/html/view/calendar/month \
+ calp/html/view/calendar/shared \
+ calp/html/view/calendar/week \
+ calp/html/view/search \
+ calp/html/view/small-calendar \
+ calp/main \
+ calp/repl \
+ calp/server/routes \
+ calp/server/server \
+ calp/terminal \
+ calp/translation \
+ calp/util/config \
+ calp/util/exceptions \
+ calp/util/hooks \
+ calp/util/time \
+ crypto \
+ datetime \
+ datetime/instance \
+ datetime/timespec \
+ datetime/zic \
+ glob \
+ hnh/util \
+ hnh/util/env \
+ hnh/util/exceptions \
+ hnh/util/graph \
+ hnh/util/io \
+ hnh/util/language \
+ hnh/util/lens \
+ hnh/util/object \
+ hnh/util/options \
+ hnh/util/path \
+ hnh/util/tree \
+ hnh/util/uuid \
+ srfi/srfi-41/util \
+ srfi/srfi-64/test-error \
+ srfi/srfi-64/util \
+ sxml/html \
+ sxml/namespace \
+ sxml/transformations \
+ text/flow \
+ text/markup \
+ text/numbers \
+ text/numbers/en \
+ text/numbers/sv \
+ text/util \
+ vcomponent \
+ vcomponent/base \
+ vcomponent/config \
+ vcomponent/control \
+ vcomponent/datetime \
+ vcomponent/datetime/output \
+ vcomponent/duration \
+ vcomponent/formats/common/types \
+ vcomponent/formats/ical/output \
+ vcomponent/formats/ical/parse \
+ vcomponent/formats/ical/types \
+ vcomponent/formats/vdir/parse \
+ vcomponent/formats/vdir/save-delete \
+ vcomponent/formats/xcal/output \
+ vcomponent/formats/xcal/parse \
+ vcomponent/formats/xcal/types \
+ vcomponent/geo \
+ vcomponent/recurrence \
+ vcomponent/recurrence/display \
+ vcomponent/recurrence/display/common \
+ vcomponent/recurrence/display/en \
+ vcomponent/recurrence/display/sv \
+ vcomponent/recurrence/generate \
+ vcomponent/recurrence/internal \
+ vcomponent/recurrence/parse \
+ vcomponent/util/control \
+ vcomponent/util/describe \
+ vcomponent/util/group \
+ vcomponent/util/instance \
+ vcomponent/util/instance/methods \
+ vcomponent/util/parse-cal-path \
+ vcomponent/util/search \
+ vulgar \
+ vulgar/color \
+ vulgar/components \
+ vulgar/info \
+ vulgar/termios \
+ web/http/make-routes \
+ web/query \
+ web/uri-query \
+ xdg/basedir
+
+SCM_FILES = $(scm_files:%=module/%.scm)
+
GO_FILES = $(SCM_FILES:module/%.scm=obj-$(GUILE_VERSION)/%.go)
GUILE_ENV = GUILE_LOAD_PATH=$(PWD)/module \
diff --git a/doc/ref/guile.texi b/doc/ref/guile.texi
index a6c5ebe4..58c162e1 100644
--- a/doc/ref/guile.texi
+++ b/doc/ref/guile.texi
@@ -7,6 +7,8 @@
@include guile/util.texi
@include guile/util-path.texi
@include guile/util-config.texi
+@include guile/util-type.texi
+@include guile/util-object.texi
@include guile/base64.texi
@include guile/web.texi
@include guile/vcomponent.texi
diff --git a/doc/ref/guile/util-object.texi b/doc/ref/guile/util-object.texi
new file mode 100644
index 00000000..ceac2f2a
--- /dev/null
+++ b/doc/ref/guile/util-object.texi
@@ -0,0 +1,86 @@
+@node define-type
+@section Yet Another Object System
+
+@defmac define-type (name type-parameters ...) fields ...
+Introduce a new type.
+
+Each field is either a symbol, or a list where the first element is a
+symbol, and the remaining elements are alternating keywords and
+values, as per @ref{Field Parameters}. All fields are optional by
+default, but can be made non-optional through its type parameter.
+
+The example below creates a new type called @var{type}, with a custom
+printer which always displays the string ``TYPE''. It has two fields,
+@var{x}, which must be an integer, and @var{y}, which can have any
+type, but gets the value ``Hello'' in none is given.
+@example
+(define-type (type #:printer (lambda (r p) (display "TYPE" p)))
+ (x #:type integer?)
+ (y #:default "Hello"))
+@end example
+@end defmac
+
+@subsection Type Parameters
+
+@deffn {Type Parameter} constructor (λ (primitive-constructor type-validator))
+Use a custom constructor for the type. The given procedure is called
+with two values:
+@itemize
+@item the types primitive (and usually hidden) constructor,
+which takes as many arguments as there are fields, in the order given
+in define-type, and
+@item the type validator procedure, which also takes all arguments,
+but instead either returns an undefined value if everything is fine,
+or throws @code{'wrong-type-arg} otherwise.
+@end itemize
+The procedure should then return a new procedure, which will be bound
+as the constructor for the type. Note that default values are current
+disregarded with custom constructors.
+
+A custom constructor for the type above might look like
+@example
+(lambda (primitive-constructor type-check)
+ (lambda* (#:key x y)
+ (type-check x y)
+ (primitive-constructor x y)))
+@end example
+@end deffn
+
+@deffn {Type Parameter} printer (λ (record port))
+Use a custom printer for the type.
+@end deffn
+
+@subsection Field Parameters
+@anchor{Field Parameters}
+
+@deffn {Field Parameter} default value
+Value the field should get if not given.
+@end deffn
+
+@deffn {Field Parameter} type type-clause
+A type predicate that the field must obey. See @ref{type-clause} for details.
+@end deffn
+
+@subsection Introduced Bindings
+
+Define type introduces a number procedures. (@var{<name>} should be
+replaced with whatever was given as @var{name} to define-type.
+
+@defun @var{<name>} [kv-args ...]
+Type constructor. Takes key-value arguments. Where the keys are the
+names of the fields.
+@end defun
+
+@defun @var{<name>}? x
+Type predicate.
+@end defun
+
+And for each field @var{<field>}:
+
+@defun @var{<field>} object [value]
+Accessor for the given filed.
+Returns the current value if called with only an object, and returns a
+new object with @var{field} set to @var{value} if called with two values.
+
+The updating version checks the type if #:type was given on creation.
+@end defun
diff --git a/doc/ref/guile/util-type.texi b/doc/ref/guile/util-type.texi
new file mode 100644
index 00000000..d389287e
--- /dev/null
+++ b/doc/ref/guile/util-type.texi
@@ -0,0 +1,63 @@
+@node Type utilities
+@section Type utilities
+
+Provided by the module @code{(hnh util type)}
+
+@subsection Type Clauses
+@anchor{type-clause}
+@cindex type-clause
+
+Type clauses are an effective way of writing compound predicates
+without explicitly mentioning the variable at all steps.
+
+The simplest type predicate is a single symbol, which is directly
+called on the object:
+@example
+predicate? ⇒ (predicate? x)
+@end example
+
+Otherwise, if the predicate is a list then the variable is spliced
+into the argument list in the first position:
+@example
+(proc args ...) ⇒ (proc x args ...)
+@end example
+
+The primitives @code{and}, @code{or}, and @code{not} are also
+available, which each take the same number of predicates as schemes
+primitives of the same name, and calls them in order, with Scheme's
+usual short-circuiting rules. @footnote{These don't have to be
+primitives, but we would otherwise have one hell of a namespace
+conflict}
+
+@defmac list-of variable type-clause
+Checks if @var{variable} is a list, and that every element satisfies type-clause.
+@end defmac
+
+@defmac pair-of variable car-type-clause cdr-type-clause
+Check if @var{variable} is a cons-pair, and that the car satisfies
+@var{car-type-clause}, and that the cdr satisfies @var{cdr-type-clause}.
+@end defmac
+
+@subsection Deffinitions
+
+@defmac build-validator-body variable type-clause
+``Entry point'' of type clauses. Inserts variable into the
+type-clause, returning something ready to be passed along the eval (or
+rather, spliced into another macro).
+
+Also used if new ``primitives'' are to be added, such as list-of.
+@end defmac
+
+@defmac typecheck variable type-clause [procedure-name=(current-procedure-name)]
+Checks @var{variable} against @var{type-clause}, and raises
+@code{'wrong-type-argument} if it fails. @var{procedure-name} is used
+as the calling procedure for @code{scm-error}.
+
+Useful at the start of procedures.
+@end defmac
+
+
+@defmac current-procedure-name
+Returns the current procedure name as a symbol, or @code{#f} if not found.
+@end defmac
+
diff --git a/doc/ref/guile/util.texi b/doc/ref/guile/util.texi
index 32df5fce..222b59c5 100644
--- a/doc/ref/guile/util.texi
+++ b/doc/ref/guile/util.texi
@@ -198,6 +198,27 @@ Split a list into sub-lists on @var{element}
@end lisp
@end defun
+@defun split-by-one-of lst items
+Like split-by, but takes a list of delimiters.
+Returns a list where the first element is everything before the first
+delimiter, and the remaining elements is the splitting delimiter
+consed with everything until the next delimiter.
+
+@lisp
+(split-by-one-of '() '(+)))
+⇒ (())
+
+(split-by-one-of '(1 + 2) '(/))
+⇒ ((1 + 2))
+
+(split-by-one-of '(1 + 2 - 3) '(+ -))
+⇒ ((1) (+ 2) (- 3))
+
+(split-by-one-of '(1 + 2 * 3 + 4) '(*))
+⇒ ((1 + 2) (* 3 + 4))
+@end lisp
+@end defun
+
@defun span-upto count predicate list
Simar to span from srfi-1, but never takes more than
diff --git a/module/c/compiler.scm b/module/c/compiler.scm
new file mode 100644
index 00000000..09d49578
--- /dev/null
+++ b/module/c/compiler.scm
@@ -0,0 +1,64 @@
+(define-module (c compiler)
+ :use-module ((c lex2) :select (lex))
+ :use-module ((c trigraph) :select (replace-trigraphs))
+ :use-module ((c line-fold) :select (fold-lines))
+ :use-module (c cpp-environment object-like-macro)
+ :use-module ((c cpp-environment)
+ :select (make-environment
+ extend-environment
+ enter-file))
+ :use-module (hnh util)
+ ;; TODO importort
+ ;; handle-preprocessing-tokens
+ ;; load-and-tokenize-file
+ :export (run-compiler))
+
+"
+#define __STDC__ 1
+#define __STDC_HOSTED__ 1
+#define __STDC_VERSION__ 201112L
+"
+
+(define now (localtime (current-time)))
+(define default-macros
+ (list
+ ;; 6.10.8
+ (object-like-macro
+ identifier: "__STDC__"
+ body: (lex "1"))
+ (object-like-macro
+ identifier: "__STDC_HOSTED__"
+ body: (lex "1"))
+ (object-like-macro
+ identifier: "__STDC_VERSION__"
+ body: (lex "201112L"))
+ (object-like-macro
+ identifier: "__DATE__"
+ ;; TODO format should always be in
+ ;; english, and not tranlated
+ body: (lex (strftime "\"%b %_d %Y\"" now)))
+ (object-like-macro
+ identifier: "__TIME__"
+ body: (lex (strftime "\"%H:%M:%S\"" now)))))
+
+(define environment
+ (-> (make-environment)
+ (extend-environment default-macros)))
+
+
+
+;;; 5.1.11.2 Translation phases
+
+
+
+(define (run-compiler path)
+ (define environment (enter-file (make-environment) path))
+ (-> (load-and-tokenize-file path)
+ (handle-preprocessing-tokens environment))
+;;; 5. (something with character sets)
+;;; 6. concatenation of string literals
+;;; 7. Whitespace tokens are discarded, each preprocessing token is converted into a token
+ ;; 6.4 paragraph 2
+ ;; Each preprocessing toket thas is converted to a token shall have the lexcal form of a keyword, an identifier, a constant, a string literal, or a puncturtor
+;;; 8. All external objects and functions are resolved
+ )
diff --git a/module/c/cpp-environment.scm b/module/c/cpp-environment.scm
new file mode 100644
index 00000000..a6401e71
--- /dev/null
+++ b/module/c/cpp-environment.scm
@@ -0,0 +1,215 @@
+(define-module (c cpp-environment)
+ :use-module (srfi srfi-1)
+ :use-module (srfi srfi-88)
+ :use-module (ice-9 hash-table)
+ :use-module (hnh util object)
+ :use-module (hnh util type)
+ :use-module (hnh util lens)
+ :use-module ((c cpp-environment function-like-macro) :prefix #{fun:}#)
+ :use-module ((c cpp-environment object-like-macro) :prefix #{obj:}#)
+ :use-module ((c cpp-environment internal-macro) :prefix #{int:}#)
+ :use-module ((c unlex) :select (unlex))
+ :export (
+
+ macro-identifier
+ macro-body
+ macro-identifier-list
+ macro-variadic?
+ cpp-macro?
+ ;; pprint-macro
+
+ enter-active-if
+ enter-inactive-if
+ flip-flop-if
+ leave-if
+ in-conditional/active?
+ in-conditional/inactive?
+ in-conditional?
+
+ enter-file
+ leave-file
+ bump-line
+ current-line
+ current-file
+
+ function-macro?
+ object-macro?
+ internal-macro?
+
+ cpp-environment
+ cpp-environment?
+ cpp-if-status
+ ;; cpp-variables
+ cpp-file-stack
+
+ make-environment in-environment?
+ remove-identifier add-identifier
+ get-identifier
+ extend-environment
+ disjoin-macro
+
+ pprint-environment
+ pprint-macro
+ ))
+
+(define (macro-identifier x)
+ (define identifier
+ (cond ((obj:object-like-macro? x) obj:identifier)
+ ((fun:function-like-macro? x) fun:identifier)
+ ((int:internal-macro? x) int:identifier)
+ (else (scm-error 'wrong-type-arg "macro-identifier"
+ "Not a macro: ~s"
+ (list x) #f))))
+ (identifier x))
+
+
+(define (macro-body-proc macro)
+ (cond ((obj:object-like-macro? macro) obj:body)
+ ((fun:function-like-macro? macro) fun:body)
+ ((int:internal-macro? macro) int:body)
+ (else (scm-error 'wrong-type-arg "macro-body"
+ "Not a macro: ~s"
+ (list macro) #f))))
+
+(define macro-body
+ (case-lambda ((macro) ((macro-body-proc macro) macro))
+ ((macro value) ((macro-body-proc macro) macro value))))
+
+(define macro-identifier-list fun:identifier-list)
+(define macro-variadic? fun:variadic?)
+
+(define function-macro? fun:function-like-macro?)
+(define object-macro? obj:object-like-macro?)
+(define internal-macro? int:internal-macro?)
+
+(define (cpp-macro? x)
+ (or (obj:object-like-macro? x)
+ (fun:function-like-macro? x)
+ (int:internal-macro? x)))
+
+
+
+
+(define-type (cpp-environment)
+ (cpp-if-status type: (list-of (memv '(outside active-if inactive-if)))
+ default: '(outside))
+ ;; not exported since type signatures don't hold inside the hash table
+ ;; TODO replace hash table with something that doesn't require copying the
+ ;; entire structure every time
+ (cpp-variables type: hash-table? default: (make-hash-table))
+ (cpp-file-stack type: (and (not null?)
+ (list-of (pair-of string? exact-integer?)))
+ default: '(("*outside*" . 1))))
+
+
+
+
+(define (enter-active-if environment)
+ (modify environment cpp-if-status xcons 'active-if))
+
+(define (enter-inactive-if environment)
+ (modify environment cpp-if-status xcons 'inactive-if))
+
+;; for #else
+(define (flip-flop-if environment)
+ ((if (in-conditional/inactive? environment)
+ enter-active-if
+ enter-inactive-if)
+ (leave-if environment)))
+
+(define (leave-if environment)
+ (modify environment cpp-if-status cdr))
+
+(define (in-conditional/inactive? environment)
+ (eq? 'inactive-if (get environment cpp-if-status car*)))
+
+(define (in-conditional/active? environment)
+ (eq? 'active-if (get environment cpp-if-status car*)))
+
+(define (in-conditional? environment)
+ (or (in-conditional/inactive? environment)
+ (in-conditional/active? environment)))
+
+
+
+(define (enter-file environment filename)
+ (modify environment cpp-file-stack xcons (cons filename 1)))
+
+(define (leave-file environment)
+ (modify environment cpp-file-stack cdr))
+
+(define current-line (compose-lenses cpp-file-stack car* cdr*))
+
+(define current-file (compose-lenses cpp-file-stack car* car*))
+
+(define* (bump-line environment optional: (count 1))
+ (modify environment current-line + count))
+
+
+
+
+(define (make-environment) (cpp-environment))
+
+(define (clone-hash-table ht)
+ (alist->hash-table (hash-map->list cons ht)))
+
+(define (clone-environment environment)
+ (modify environment cpp-variables clone-hash-table))
+
+(define (in-environment? environment key)
+ (hash-get-handle (cpp-variables environment) key))
+
+(define (remove-identifier environment key)
+ (typecheck key string?)
+
+ (let ((environment (clone-environment environment)))
+ (hash-remove! (cpp-variables environment) key)
+ environment))
+
+(define (add-identifier environment key value)
+ (typecheck key string?)
+ (typecheck value cpp-macro?)
+
+ (let ((environment (clone-environment environment)))
+ (hash-set! (cpp-variables environment) key value)
+ environment))
+
+(define (get-identifier environment key)
+ (hash-ref (cpp-variables environment) key))
+
+
+(define (extend-environment environment macros)
+ (typecheck macros (list-of cpp-macro?))
+ (fold (lambda (m env) (add-identifier env (macro-identifier m) m))
+ environment macros))
+
+(define (disjoin-macro environment name)
+ (typecheck name string?)
+ (remove-identifier environment name))
+
+
+
+
+(define* (pprint-environment environment optional: (port (current-error-port)))
+ (display "== Environment ==\n")
+ (hash-for-each (lambda (key macro)
+ (pprint-macro macro port)
+ (newline port))
+ (cpp-variables environment)))
+
+(define* (pprint-macro x optional: (p (current-output-port)))
+ (cond ((internal-macro? x)
+ (format p "/* ~a INTERNAL MACRO */"
+ (macro-identifier x)))
+ ((object-macro? x)
+ (format p "#define ~a ~a"
+ (macro-identifier x)
+ (unlex (macro-body x))))
+ ((function-macro? x)
+ (format p "#define ~a(~a) ~a"
+ (macro-identifier x)
+ (string-join (append (macro-identifier-list x)
+ (if (macro-variadic? x)
+ '("...") '()))
+ "," 'infix)
+ (unlex (macro-body x))))))
diff --git a/module/c/cpp-environment/function-like-macro.scm b/module/c/cpp-environment/function-like-macro.scm
new file mode 100644
index 00000000..59b47c9c
--- /dev/null
+++ b/module/c/cpp-environment/function-like-macro.scm
@@ -0,0 +1,25 @@
+(define-module (c cpp-environment function-like-macro)
+ :use-module (hnh util object)
+ :use-module (hnh util type)
+ :use-module ((c lex2) :select (lexeme?))
+ :use-module ((c unlex) :select (unlex))
+ :export (function-like-macro
+ function-like-macro?
+ identifier
+ identifier-list
+ body
+ variadic?))
+
+(define-type (function-like-macro
+ printer: (lambda (r p)
+ (format p "#<#define ~a~a ~a>"
+ (identifier r)
+ (append (identifier-list r)
+ (if (variadic? r)
+ '("...") '()))
+ (unlex (body r)))))
+ (identifier type: string?)
+ (identifier-list type: (list-of string?))
+ (body type: (list-of lexeme?))
+ (variadic? type: boolean?
+ default: #f))
diff --git a/module/c/cpp-environment/internal-macro.scm b/module/c/cpp-environment/internal-macro.scm
new file mode 100644
index 00000000..3c946738
--- /dev/null
+++ b/module/c/cpp-environment/internal-macro.scm
@@ -0,0 +1,11 @@
+(define-module (c cpp-environment internal-macro)
+ :use-module (hnh util object)
+ :export (internal-macro
+ internal-macro?
+ identifier body))
+
+(define-type (internal-macro)
+ (identifier type: string?)
+ (body type: procedure?
+ ;; Arity 2
+ ))
diff --git a/module/c/cpp-environment/object-like-macro.scm b/module/c/cpp-environment/object-like-macro.scm
new file mode 100644
index 00000000..90a3ad23
--- /dev/null
+++ b/module/c/cpp-environment/object-like-macro.scm
@@ -0,0 +1,18 @@
+(define-module (c cpp-environment object-like-macro)
+ :use-module (hnh util object)
+ :use-module (hnh util type)
+ :use-module ((c lex2) :select (lexeme?))
+ :use-module ((c unlex) :select (unlex))
+ :export (object-like-macro
+ object-like-macro?
+ identifier
+ body))
+
+
+(define-type (object-like-macro
+ printer: (lambda (r p)
+ (format p "#<#define ~a ~a>"
+ (identifier r)
+ (unlex (body r)))))
+ (identifier type: string?)
+ (body type: (list-of lexeme?)))
diff --git a/module/c/cpp-types.scm b/module/c/cpp-types.scm
new file mode 100644
index 00000000..6dad061e
--- /dev/null
+++ b/module/c/cpp-types.scm
@@ -0,0 +1,94 @@
+(define-module (c cpp-types)
+ :use-module (c lex2)
+ :use-module (ice-9 match)
+ :use-module (c cpp-util)
+ :use-module (hnh util type)
+ :export (whitespace-token?
+ comment-token?
+ preprocessing-token?
+ other-token?
+ placemaker-token?
+ newline-token?
+ identifier-token?
+ punctuator-token?
+ pp-number?
+ string-token?
+ h-string-token?
+ q-string-token?
+ character-constant?
+ comment->whitespace
+ comments->whitespace
+ make-string-literal
+ ))
+
+(define (whitespace-token? x)
+ (and (lexeme? x)
+ (eq? 'whitespace (lexeme-type x))))
+
+(define (comment-token? x)
+ (and (lexeme? x)
+ (eq? 'comment (lexeme-type x))))
+
+(define (preprocessing-token? x)
+ (and (lexeme? x)
+ (eq? 'preprocessing-token (lexeme-type x))))
+
+(define (other-token? x)
+ (and (lexeme? x)
+ (eq? 'other (lexeme-type x))))
+
+(define (placemaker-token? x)
+ (and (lexeme? x)
+ (eq? 'placemaker (lexeme-type x))))
+
+(define (newline-token? x)
+ (and (whitespace-token? x)
+ (string=? "\n" (lexeme-body x))))
+
+(define (identifier-token? token)
+ (and (preprocessing-token? token)
+ (match (lexeme-body token)
+ (`(identifier ,id) id)
+ (_ #f))))
+
+(define (punctuator-token? token)
+ (and (preprocessing-token? token)
+ (match (lexeme-body token)
+ (`(punctuator ,x) x)
+ (_ #f))))
+
+(define (pp-number? token)
+ (and (preprocessing-token? token)
+ (match (lexeme-body token)
+ (`(pp-number ,x) x)
+ (_ #f))))
+
+;; TODO rename to string-literal-token?
+(define (string-token? token)
+ (and (preprocessing-token? token)
+ (match (lexeme-body token)
+ (('string-literal x ...) (apply values x))
+ (_ #f))))
+
+(define (character-constant? token)
+ (and (preprocessing-token? token)
+ (match (lexeme-body token)
+ (('character-constant x ...) (apply values x))
+ (_ #f))))
+
+
+(define (h-string-token? token)
+ (and (preprocessing-token? token)
+ (match (lexeme-body token)
+ (`(header-name (h-string ,x)) x)
+ (_ #f))))
+
+;; NOTE q-string tokens are never produced by the lexer,
+;; since they instead are treated as regular strings
+(define (q-string-token? token)
+ (string-token? token))
+
+(define (make-string-literal parts)
+ (typecheck parts (list-of (or string? list?)))
+ (lexeme type: 'preprocessing-token
+ body: (cons 'string-literal parts)))
diff --git a/module/c/cpp-util.scm b/module/c/cpp-util.scm
new file mode 100644
index 00000000..9674317b
--- /dev/null
+++ b/module/c/cpp-util.scm
@@ -0,0 +1,130 @@
+(define-module (c cpp-util)
+ :use-module ((srfi srfi-1) :select (drop-while break))
+ :use-module (srfi srfi-71)
+ :use-module ((hnh util) :select (->))
+ :use-module (hnh util type)
+ :use-module ((hnh util lens) :select (modify ref))
+ :use-module ((c lex2) :select (lex lexeme?))
+ :use-module ((c unlex) :select (unlex))
+ :use-module (c cpp-types)
+ :export (tokens-until-eol
+ tokens-until-cpp-directive
+ next-token-matches?
+ squeeze-whitespace
+ drop-whitespace
+ drop-whitespace-right
+ drop-whitespace-both
+ cleanup-whitespace
+ concatenate-tokens
+ merge-string-literals))
+
+
+;; Does the next non-whitespace token in the stream satisfy the predicate?
+(define (next-token-matches? predicate tokens)
+ (let ((tokens (drop-whitespace tokens)))
+ (if (null? tokens)
+ #f
+ (predicate (car tokens)))))
+
+(define (next-token-matches/line? predicate tokens)
+ (let ((tokens (drop-whitespace/line tokens)))
+ (if (null? tokens)
+ #f
+ (predicate (car tokens)))))
+
+;; Returns two values:
+;; - tokens until a newline token is met
+;; - (potentially the newline token) and the remaining tokens
+(define (tokens-until-eol tokens)
+ ;; (typecheck tokens (list-of lexeme?))
+ (break newline-token? tokens))
+
+;; call predicate with the remaining token stream, until we run out of token, or
+;; predicate matches
+(define (break-lexemes predicate lex-list)
+ (let loop ((rem lex-list) (done '()))
+ (cond ((null? rem) (values (reverse done) '()))
+ ((predicate rem) (values (reverse done) rem))
+ (else (loop (cdr rem) (cons (car rem) done))))))
+
+;; Finds the next instance of "\n#" (possibly with inbetween whitespace)
+;; and return the values before and after (inclusive)
+(define (tokens-until-cpp-directive tokens)
+ (break-lexemes
+ (lambda (tokens)
+ (and (newline-token? (car tokens))
+ (next-token-matches/line?
+ (lambda (token) (equal? "#" (punctuator-token? token)))
+ (cdr tokens))))
+ tokens))
+
+;; Replace all whitespace with single spaces.
+(define (squeeze-whitespace tokens)
+ (cond ((null? tokens) '())
+ ((null? (cdr tokens))
+ (list
+ (if (whitespace-token? (car tokens))
+ (car (lex " "))
+ (car tokens))))
+ ((and (whitespace-token? (car tokens))
+ (whitespace-token? (cadr tokens)))
+ (squeeze-whitespace (cons (car (lex " "))
+ (cddr tokens))))
+ (else (cons (car tokens)
+ (squeeze-whitespace (cdr tokens))))))
+
+;; Drop leading whitespace tokens
+(define (drop-whitespace tokens)
+ ;; (typecheck tokens (list-of lexeme?))
+ (drop-while whitespace-token? tokens))
+
+(define (drop-whitespace/line tokens)
+ ;; (typecheck tokens (list-of lexeme?))
+ (drop-while (lambda (t)
+ (and (whitespace-token? t)
+ (not (newline-token? t))))
+ tokens))
+
+(define (drop-whitespace-right tokens)
+ ;; (typecheck tokens (list-of lexeme?))
+ (-> tokens reverse drop-whitespace reverse))
+
+(define (drop-whitespace-both tokens)
+ ;; (typecheck tokens (list-of lexeme?))
+ (-> tokens
+ drop-whitespace
+ drop-whitespace-right))
+
+;; helper procedure to parse-parameter-list.
+;; If a parameter is complex then whitespace is kept, but squeezed to single spaces. Surounding whitespace is removed.
+;; Example:
+;; #define str(x, y) #y
+;; str(x, ( 2, 4 ) )
+;; expands to:
+;; "( 2, 4 )"
+;; 6.10.3.2 p 2
+(define (cleanup-whitespace tokens)
+ ;; (typecheck tokens (list-of lexeme?))
+ (-> tokens drop-whitespace-both squeeze-whitespace))
+
+(define (concatenate-tokens a b)
+ (car (lex (string-append (unlex (list a))
+ (unlex (list b))))))
+
+
+(define (merge-string-literals tokens)
+ (cond ((null? tokens) '())
+ ((null? (cdr tokens)) tokens)
+ ((string-token? (car tokens))
+ (lambda (a . _) a)
+ => (lambda (prefix-a . parts-a)
+ (cond ((string-token? (cadr tokens))
+ (lambda (a . _) a)
+ => (lambda (prefix-b . parts-b)
+ (merge-string-literals
+ ;; TODO check validity of prefixes
+ (cons (make-string-literal (cons prefix-a (append parts-a parts-b)))
+ (cddr tokens)))))
+ (else (cons (car tokens)
+ (merge-string-literals (cdr tokens)))))))
+ (else (cons (car tokens) (merge-string-literals (cdr tokens))))))
diff --git a/module/c/cpp.scm b/module/c/cpp.scm
index a2935352..aed496f2 100644
--- a/module/c/cpp.scm
+++ b/module/c/cpp.scm
@@ -5,47 +5,37 @@
:use-module (ice-9 match)
:use-module (ice-9 regex)
:use-module ((rnrs io ports) :select (call-with-port))
+ :use-module ((rnrs bytevectors) :select (bytevector?))
:use-module (ice-9 format)
:use-module ((hnh util io) :select (read-lines))
:use-module (hnh util graph)
:use-module (c lex)
:use-module (c parse)
:use-module (c operators)
- :export (do-funcall replace-symbols include#)
+ :export (replace-symbols include#)
)
;; input "#define F(x, y) x + y"
-;; 1 full define | F(x, y)
+;; 1 full define | F(x,y)
;; 2 macro name | F
-;; 3 macro args | (x, y)
-;; 4 macro body | x + y
-(define define-re (make-regexp "^#define ((\\w+)(\\([^)]*\\))?) (.*)"))
+;; 3 macro args | (x,y)
+;; 5 macro body | x + y or #f
+(define define-re (make-regexp "^#define ((\\w+)([(][^)]*[)])?)( (.*))?"))
(define (tokenize-define-line header-line)
(aif (regexp-exec define-re header-line)
(cons (match:substring it 1)
- (match:substring it 4))
+ (let ((body (match:substring it 5)))
+ (if (or (eqv? body #f)
+ (string-null? body))
+ "1" body)))
(scm-error 'c-parse-error
"tokenize-define-line"
"Line dosen't match: ~s"
(list header-line) #f)))
-(define (do-funcall function arguments)
- (if (list? arguments)
- (apply function arguments)
- (function arguments)))
-
-(define symb-map
- `((,(symbol #\|) . logior)
- (funcall . (@ (c cpp) do-funcall))
- (&& . and)
- (& . logand)
- (== . =)
- (!= . (negate =))
- ))
-
(define (replace-symbols tree dict)
(if (not (list? tree))
(or (assoc-ref dict tree) tree)
@@ -55,12 +45,27 @@
;; Direct values. Lisp also has quoted symbols in this group.
(define (immediate? x)
(or (number? x)
- (char? x)
- (string? x)))
+ (bytevector? x)))
+;; TODO replace this with something sensible
+;; like a correct list extracted from (c eval)
+;; and not thinging that types are variables
;; built in symbols. Should never be marked as dependencies
(define (primitive? x)
- (memv x (cons 'funcall binary-operators)))
+ (memv x `(
+ ;; language primitives
+ sizeof
+
+ ;; special forms introduced by parser
+ funcall ternary struct-type as-type
+
+ ;; unary operatons which aren't also binary operators
+ ++ -- ! ~
+ not compl dereference pointer
+ pre-increment pre-decrement
+ post-increment post-decrement
+ ,@binary-operators
+ )))
@@ -77,7 +82,6 @@
[arg (list arg)]))
(define right (f (cdr pair)))
- (define alt-right (replace-symbols right symb-map))
(define dependencies
(lset-difference
eq?
@@ -91,12 +95,12 @@
dependencies
(match left
[('funcall name ('#{,}# args ...))
- (cons name `(lambda ,args ,alt-right))]
+ (cons name `(lambda ,args ,right))]
[('funcall name arg)
- (cons name `(lambda (,arg) ,alt-right))]
+ (cons name `(lambda (,arg) ,right))]
- [name (cons name alt-right)])))
+ [name (cons name right)])))
(define (parse-cpp-file lines)
@@ -104,7 +108,9 @@
(catch #t
(lambda () (parse-cpp-define line))
(lambda (err caller fmt args data)
- (format #t "~a ~?~%" fmt args)
+ (format #t "~a in ~a: ~?~%"
+ err caller fmt args)
+ (format #t "~s~%" line)
#f)))
lines))
@@ -114,29 +120,32 @@
(define (tokenize-header-file header-file)
(map tokenize-define-line
(call-with-port
- (open-input-pipe
- (string-append "cpp -dM " header-file))
+ (open-pipe* OPEN_READ "cpp" "-dM" header-file)
read-lines)))
-(define-macro (include# header-file . args)
-
- (define define-form (if (null? args) 'define (car args)))
-
- (define lines (remove (compose private-c-symbol? car)
- (tokenize-header-file header-file)))
+(define (load-cpp-file header-file)
+ (define lines (tokenize-header-file header-file))
(define forms (parse-cpp-file lines))
- (define graph*
- (fold (lambda (node graph)
- (add-node graph (cdr node) (car node)))
- (make-graph car)
- (filter identity forms)))
+ (fold (lambda (node graph)
+ (add-node graph (cdr node) (car node)))
+ (make-graph car)
+ (filter identity forms)))
+(define (include% header-file)
+ (define graph* (load-cpp-file header-file))
;; Hack for termios since this symbol isn't defined.
;; (including in the above removed private c symbols)
- (define graph (add-node graph* (cons '_POSIX_VDISABLE #f) '()))
+ (define graph (add-node graph* (cons '_POSIX_VDISABLE 0) '()))
+ ;; TODO expand bodies
+ ;; (remove (compose private-c-symbol? car))
+ (resolve-dependency-graph graph))
+
+(define-macro (include# header-file . args)
+
+ (define define-form (if (null? args) 'define (car args)))
`(begin
,@(map (lambda (pair) `(,define-form ,(car pair) ,(cdr pair)))
- (resolve-dependency-graph graph))))
+ (include% header-file))))
diff --git a/module/c/eval.scm b/module/c/eval.scm
new file mode 100644
index 00000000..67d0075d
--- /dev/null
+++ b/module/c/eval.scm
@@ -0,0 +1,265 @@
+(define-module (c eval)
+ :use-module (hnh util)
+ :use-module (srfi srfi-1)
+ :use-module (srfi srfi-71)
+ :use-module (srfi srfi-88)
+ :use-module (ice-9 match)
+ :use-module (ice-9 curried-definitions)
+ :use-module ((rnrs bytevectors)
+ :select (bytevector?))
+ :use-module ((rnrs arithmetic bitwise)
+ :select (bitwise-not
+ bitwise-and
+ bitwise-ior
+ bitwise-xor
+ bitwise-arithmetic-shift-left
+ bitwise-arithmetic-shift-right))
+ :use-module (c eval environment)
+ :export (c-procedure?
+ procedure-formals
+ procedure-body
+ procedure-arity
+
+ c-eval
+ ))
+
+(define C-TRUE 1)
+(define C-FALSE 0)
+
+(define (boolean->c-boolean bool)
+ (if bool C-TRUE C-FALSE))
+
+(define (c-boolean->boolean bool)
+ (not (zero? bool)))
+
+(define (c-not b)
+ (-> b c-boolean->boolean not boolean->c-boolean))
+
+(define (c-procedure? expr)
+ (and (list? expr)
+ (not (null? expr))
+ (eq? 'lambda (car expr))))
+
+(define* (ensure-c-procedure expr optional: calling-procedure)
+ (unless (c-procedure? expr)
+ (scm-error 'c-eval-error calling-procedure
+ "Value not a procedure: ~s"
+ (list procedure #f))))
+
+(define (procedure-formals procedure)
+ (ensure-c-procedure procedure "procedure-formals")
+ (list-ref procedure 1))
+
+(define (procedure-body procedure)
+ (ensure-c-procedure procedure "procedure-body")
+ (list-ref procedure 2))
+
+(define (procedure-arity procedure)
+ (length (procedure-formals procedure)))
+
+(define (literal? expression)
+ (or (number? expression)
+ (bytevector? expression)))
+
+
+
+;; Internal helper procedures
+
+(define (mod-set operation)
+ (lambda (env slot value)
+ ;; a += b
+ ;; a = a + b
+ (c-eval env `(= ,slot (,operation ,slot ,value)))))
+
+(define (fold-2 proc init lst)
+ (car+cdr
+ (fold (lambda (arg env+done)
+ (let ((env* arg* (proc (car env+done) arg)))
+ (cons* env* arg* (cdr env+done))))
+ init
+ lst)))
+
+;; TODO this disregards
+;; - floating point convertions
+;; - integer truncation
+(define ((simple-infix operation) env . operands)
+ (let ((env done (fold-2 c-eval (cons env '()) operands)))
+ (values env (apply operation (reverse done)))))
+
+(define ((binary-operator proc) env i c)
+ (let ((env* i* (c-eval env i)))
+ (let ((env** c* (c-eval env* c)))
+ (values env** (proc i* c*)))))
+
+
+
+
+;; The order of evaluation for a number of these is undefined, meaning
+;; that any side effects without sequence points is undefined.
+;; However, for many of these I do the sensible thing and evaluate them
+;; from left to right, instead of ignoring all side effects.
+
+;; TODO double check these with a C standard
+
+
+;; Operators have their own namespace. They are called without funcall
+;; in the pseudo-lisp which C is compiled to, and they expand more like
+;; lisp macros, since its up to each operator what to do with its operands.
+;; This to allow assignment and short circuting.
+(define primitives
+ `((and . ,(lambda (env . operands)
+ (let loop ((env env) (operands operands))
+ (if (null? operands)
+ (values env C-TRUE)
+ (let* ((env* result (c-eval env (car operands))))
+ (if (c-boolean->boolean result)
+ (loop env* (cdr operands))
+ (values env* result)))))))
+ (or . ,(lambda (env . operands)
+ (let loop ((env env) (operands operands))
+ (if (null? operands)
+ (values env C-FALSE)
+ (let* ((env* result (c-eval env (car operands))))
+ (if (false? result)
+ (values env* result)
+ (loop env* (cdr operands))))))))
+ (= . ,(lambda (env slot value)
+ ;; TOOD if slot isn't a variable, but a field (or array index)
+ ;; then it needs to be resolved...
+ (let ((env* result (c-eval env value)))
+ (values (env-set! env* slot result)
+ result))))
+ (and_eq ,(mod-set 'bitand)) ; &=
+ (or_eq ,(mod-set 'bitor)) ; |=
+ (xor_eq ,(mod-set 'xor)) ; ^=
+ (+= ,(mod-set '+))
+ (-= ,(mod-set '-))
+ (*= ,(mod-set '*))
+ (/= ,(mod-set '/))
+ (<<= ,(mod-set '<<))
+ (>>= ,(mod-set '>>))
+ (%= ,(mod-set '%))
+ (+ . ,(simple-infix +))
+ (* . ,(simple-infix *))
+ (/ . ,(simple-infix /))
+ (- . ,(lambda (env op . operands)
+ (if (null? operands)
+ (let ((env* value (c-eval env op)))
+ (values env* (- value)))
+ (apply (simple-infix -)
+ env op operands))))
+ (bitor . ,(simple-infix bitwise-ior))
+ (bitand . ,(simple-infix bitwise-and))
+ (xor . ,(simple-infix bitwise-xor))
+ (not_eq . ,(lambda (env a b) (c-eval env `(not (== ,a ,b))))) ; !=
+ (<< . ,(binary-operator bitwise-arithmetic-shift-left))
+ (>> . ,(binary-operator bitwise-arithmetic-shift-right))
+ (< . ,(binary-operator (compose boolean->c-boolean <)))
+ (> . ,(binary-operator (compose boolean->c-boolean >)))
+ ;; this assumes that = handles pointers
+ (== . ,(binary-operator (compose boolean->c-boolean =)))
+ (<= . ,(binary-operator (compose boolean->c-boolean <=)))
+ (>= . ,(binary-operator (compose boolean->c-boolean >=)))
+ (% . ,(binary-operator modulo))
+
+ (not . ,(lambda (env value)
+ (let ((env* result (c-eval env value)))
+ (values env* (c-not result)))))
+ (compl . ,(lambda (env value)
+ (let ((env* result (c-eval env value)))
+ (values env* (bitwise-not result)))))
+
+ ;; ++C
+ (pre-increment . ,(lambda (env slot) (c-eval env `(+= ,slot 1))))
+ (pre-decrement . ,(lambda (env slot) (c-eval env `(-= ,slot 1))))
+ ;; TODO these (C++, C--) need to handle if slot isn't a direct variabl
+ (post-increment . ,(lambda (env slot)
+ (let ((value (env-ref env slot)))
+ (values (env-set! env slot (1+ value))
+ value))))
+ (pre-decrement . ,(lambda (env slot)
+ (let ((value (env-ref env slot)))
+ (values (env-set! env slot (1+ value))
+ value))))
+
+ (ternary . ,(lambda (env test true-clause false-clause)
+ (let ((env* value (c-eval env test)))
+ (c-eval env*
+ (if (c-boolean->boolean value)
+ true-clause false-clause)))))
+
+ ;; TODO remaining operations
+ (as-type . ,(lambda (env target-type value)
+ (format (current-error-port) "cast<~s>(~s)~%" target-type value)
+ (values env value)))
+
+ (object-slot . ,(lambda (env object slot)
+ (scm-error 'not-implemented "object-slot"
+ "Object slots are not implemented, when accessing ~s.~s"
+ (list object slot) #f)))
+ (dereference-slot ,(lambda (env ptr slot)
+ (scm-error 'not-implemented "dereference-slot"
+ "Object slots are not implemented, when accessing ~s->~s"
+ (list object slot) #f)))
+ (dereference . ,(lambda (env ptr)
+ (scm-error 'not-implemented "dereference"
+ "Poiner dereferencing is not implemented: *~s"
+ (list ptr) #f)))
+ (pointer . ,(lambda (env value)
+ (scm-error 'not-implemented "pointer"
+ "Pointer of is not implemented: &~s"
+ (list value) #f)))))
+
+;; TODO |,|
+
+
+(define (c-eval environment expression)
+ (match expression
+ (('lambda formals body) (values environment `(lambda ,formals ,body)))
+ ;; hack since sizeof really should be a operator
+ (('funcall 'sizeof arg)
+ ;; TODO
+ (format (current-error-port) "sizeof ~s~%" arg)
+ (values environment 1))
+
+ (('funcall procedure-name ('#{,}# args ...))
+ (let ((procedure (env-ref environment procedure-name)))
+ (ensure-c-procedure procedure "c-eval")
+ (unless (= (length args) (procedure-arity procedure))
+ (scm-error 'c-eval-error "c-eval"
+ "Procedure arity mismatch for ~s, expected ~s, got ~s"
+ (list procedure-name
+ (procedure-arity procedure)
+ (length args))
+ #f))
+ (let ((env args* (fold-2 c-eval (cons environment '()) args )))
+ (let ((inner-environment
+ (fold (lambda (name value env) (env-set! env name value))
+ (push-frame! env)
+ (procedure-formals procedure) args*)))
+ (let ((resulting-environment
+ result-value
+ (c-eval inner-environment (procedure-body procedure))))
+ (values (pop-frame! resulting-environment)
+ result-value))))))
+
+ (('funcall procedure arg)
+ (c-eval environment `(funcall ,procedure (#{,}# ,arg))))
+
+ ((operator operands ...)
+ (apply (or (assoc-ref primitives operator)
+ (scm-error 'c-eval-error "c-eval"
+ "Applying non-existant primitive operator: ~s, operands: ~s"
+ (list operator operands) #f))
+ environment operands))
+
+ ;; "f()" gets compiled to simply f
+ ;; meaning that we instead use the environment to determine
+ ;; if something is a variable or procedure
+ (expr
+ (if (literal? expr)
+ (values environment expr)
+ (let ((value (env-ref environment expr)))
+ (if (c-procedure? value)
+ (c-eval environment `(funcall ,value (#{,}#)))
+ (values environment value)))))))
diff --git a/module/c/eval/environment.scm b/module/c/eval/environment.scm
new file mode 100644
index 00000000..12eefaf7
--- /dev/null
+++ b/module/c/eval/environment.scm
@@ -0,0 +1,34 @@
+(define-module (c eval environment)
+ :use-module (srfi srfi-1)
+ :export (make-environment
+ env-set! env-ref push-frame! pop-frame!))
+
+(define (make-frame)
+ (make-hash-table))
+
+(define (make-environment)
+ (list (make-frame)))
+
+;; Returns an updated environment, linear update
+(define (env-set! env key value)
+ ;; get handle to differentiate #f
+ ;; (even though #f should never be stored since it's not a C value)
+ (cond ((find (lambda (frame) (hashq-get-handle frame key)) env)
+ => (lambda (frame) (hashq-set! frame key value)))
+ (else (hashq-set! (car env) key value)))
+ env)
+
+(define (env-ref env key)
+ (cond ((null? env)
+ (scm-error 'misc-error "env-ref"
+ "~s unbound"
+ (list key)
+ #f))
+ ((hashq-get-handle (car env) key) => cdr)
+ (else (env-ref (cdr env) key))))
+
+(define (push-frame! environment)
+ (cons (make-frame) environment))
+
+(define (pop-frame! environment)
+ (cdr environment))
diff --git a/module/c/eval2.scm b/module/c/eval2.scm
new file mode 100644
index 00000000..d58f20bf
--- /dev/null
+++ b/module/c/eval2.scm
@@ -0,0 +1,20 @@
+(define-module (c eval2)
+ :use-module ((hnh util) :select (->))
+ :export (C-TRUE
+ C-FALSE
+ boolean->c-boolean
+ c-boolean->boolean
+ c-not))
+
+
+(define C-TRUE 1)
+(define C-FALSE 0)
+
+(define (boolean->c-boolean bool)
+ (if bool C-TRUE C-FALSE))
+
+(define (c-boolean->boolean bool)
+ (not (zero? bool)))
+
+(define (c-not b)
+ (-> b c-boolean->boolean not boolean->c-boolean))
diff --git a/module/c/lex.scm b/module/c/lex.scm
index 34e52d88..0bde5c9e 100644
--- a/module/c/lex.scm
+++ b/module/c/lex.scm
@@ -43,8 +43,23 @@
(define-peg-pattern integer all
(and (or base-8 base-16 base-10) (? integer-suffix)))
+(define-peg-pattern float-suffix all
+ (* (or "f" "F" "l" "L")))
+
+(define-peg-pattern exponent all
+ (and (ignore (or "e" "E")) (? (or "+" "-")) base-10))
+
+;; Helper patterns for creating named groups in float
+(define-peg-pattern float-integer all base-10)
+(define-peg-pattern float-decimal all base-10)
+
+(define-peg-pattern float all
+ (or (and float-integer exponent (? float-suffix))
+ (and (? float-integer) (ignore ".") float-decimal (? exponent) (? float-suffix))
+ (and float-integer (ignore ".") (? exponent) (? float-suffix))))
+
(define-peg-pattern number body
- (or integer))
+ (or float integer))
(define-peg-pattern group all
(and (ignore "(") expr (ignore ")")))
@@ -65,11 +80,16 @@
(define-peg-pattern char all
(and (ignore "'") (or escaped-char peg-any) (ignore "'")))
+(define-peg-pattern quot none "\"")
+
+(define-peg-pattern string all
+ (and quot (* (or escaped-char (and (not-followed-by "\"") peg-any))) quot))
(define-peg-pattern* operator all
`(or ,@(map symbol->string symbol-binary-operators)
,@(map (lambda (op) `(and ,(symbol->string op) ws))
- wordy-binary-operators)))
+ wordy-binary-operators)
+ "?" ":"))
;; whitespace
(define-peg-pattern ws none
@@ -89,17 +109,23 @@
base-10-digit))))
(define-peg-pattern prefix-operator all
- (or "!" "~" "*" "&" "++" "--" "+" "-"))
+ ;; It's important that ++ and -- are BEFORE + and -
+ ;; otherwise the first + is found, leaving the second +, which fails
+ ;; to lex since it's an invalid token
+ ;; TODO sizeof can be written as a prefix operator
+ ;; (without parenthesis) if the operand is an expression.
+ (or "*" "&" "++" "--"
+ "!" "~" "+" "-"))
+
;;; Note that stacked pre or postfix operators without parenthesis
;;; dosen't work. So `*&C' is invalid, while `*(&C)' is valid.
(define-peg-pattern prefix all
- (and prefix-operator sp (or variable group funcall #; postfix
- )))
+ (and prefix-operator sp (or variable group funcall literal)))
(define-peg-pattern postfix-operator all
- (or "++" "--"))
+ (or "++" "--" "*"))
(define-peg-pattern postfix all
;; literals can't be in-place incremented and decremented
@@ -111,15 +137,25 @@
;; first case is "same" as expr, but in different order to prevent
;; infinite self reference. Pre and postfix not here, solved by having
;; them before infix in expr
- (and (or funcall postfix prefix group char number variable)
+ (and (or funcall postfix prefix group literal variable)
sp operator sp expr))
(define-peg-pattern funcall all
(and variable sp group))
+(define-peg-pattern literal body
+ (or char string number))
+
;;; main parser
(define-peg-pattern expr body
- (+ (and sp (or infix postfix prefix funcall group char number variable)
+ (+ (and sp (or
+ ;; float must be BEFORE infix, otherwise 3.2 is parsed as (infix 3 (operator ".") 2)
+ ;; that however breaks the infix logic, meaning that floating point numbers can't be
+ ;; used in basic arithmetic.
+ ;; TODO remove all implicit order of operations handling in the lexer, and move it to
+ ;; the parser. This should also fix the case of typecasts being applied incorrectly.
+ float
+ infix postfix prefix funcall group literal variable)
sp)))
diff --git a/module/c/lex2.scm b/module/c/lex2.scm
new file mode 100644
index 00000000..af90dcce
--- /dev/null
+++ b/module/c/lex2.scm
@@ -0,0 +1,549 @@
+(define-module (c lex2)
+ :use-module (ice-9 peg)
+ :use-module (ice-9 match)
+ :use-module ((hnh util) :select (->))
+ :use-module (hnh util object)
+ :use-module (hnh util type)
+ :use-module ((srfi srfi-1) :select (fold))
+ :use-module (srfi srfi-88)
+ :use-module ((c trigraph) :select (replace-trigraphs))
+ :use-module ((c line-fold) :select (fold-lines))
+ :export (lex
+ lexeme lexeme?
+ placemaker
+ (type . lexeme-type)
+ (body . lexeme-body)
+ (noexpand . lexeme-noexpand)
+
+ parse-c-number
+
+ tokenize
+ ))
+
+;;; A.1 Lexical grammar
+;;; A.1.1 Lexical elements
+
+;; (6.4)
+(define-peg-pattern token all
+ (or keyword
+ identifier
+ constant
+ string-literal
+ punctuator
+ ))
+
+;; (6.4)
+(define-peg-pattern preprocessing-token all
+ ;; string literal moved before header-name since string literals
+ ;; otherwise became q-strings
+ (or string-literal
+ header-name
+ character-constant
+ identifier
+ pp-number
+ punctuator
+ ;; Each non-white-space character that cannot be one of the above
+ ))
+
+;;; A.1.2 Keywords
+
+;; (6.4.1)
+(define-peg-pattern keyword all
+ (or "auto" "break" "case" "char" "const" "continue" "default"
+ "do" "double" "else" "enum" "extern" "float" "for" "goto"
+ "if" "inline" "int" "long" "register" "restrict" "return"
+ "short" "signed" "sizeof" "static" "struct" "switch"
+ "typedef" "union" "unsigned" "void" "volatile" "while"
+ "_Alignas" "_Alignof" "_Atomic" "_Bool" "_Complex"
+ "_Generic" "_Imaginary" "_Noreturn" "_Static_assert"
+ "_Thread_local"))
+
+;;; A.1.3 Identifiers
+
+;; (6.4.2.1)
+(define-peg-pattern identifier all
+ (and identifier-nondigit (* (or identifier-nondigit digit))))
+
+;; (6.4.2.1)
+(define-peg-pattern identifier-nondigit body
+ (or nondigit
+ universal-character-name
+ ;; TODO other implementation-defined characters
+ ))
+
+;; (6.4.2.1)
+(define-peg-pattern nondigit body
+ (or "_"
+ (range #\A #\Z)
+ (range #\a #\z)))
+
+;; (6.4.2.1)
+(define-peg-pattern digit body
+ (range #\0 #\9))
+
+;;; A.1.4 Universal character names
+
+;; (6.4.3)
+(define-peg-pattern universal-character-name all
+ (or (and (ignore "\\u") hex-quad)
+ (and (ignore "\\U") hex-quad hex-quad)))
+
+;; (6.4.3)
+(define-peg-pattern hex-quad body
+ (and hexadecimal-digit hexadecimal-digit
+ hexadecimal-digit hexadecimal-digit))
+
+;;; A.1.5 Constants
+
+;; (6.4.4)
+(define-peg-pattern constant all
+ ;; Int and float swapped from standard since we need to try parsing
+ ;; the floats beforehand
+ (or floating-constant
+ integer-constant
+ enumeration-constant
+ character-constant))
+
+;; (6.4.4.1)
+(define-peg-pattern integer-constant all
+ (and (or decimal-constant
+ hexadecimal-constant
+ octal-constant)
+ (? integer-suffix)))
+
+;; (6.4.4.1)
+(define-peg-pattern decimal-constant all
+ (and nonzero-digit (* digit)))
+
+;; (6.4.4.1)
+(define-peg-pattern octal-constant all
+ (and "0" (* octal-digit)))
+
+;; (6.4.4.1)
+(define-peg-pattern hexadecimal-constant all
+ (and hexadecimal-prefix (+ hexadecimal-digit)))
+
+;; (6.4.4.1)
+(define-peg-pattern hexadecimal-prefix none
+ (or "0x" "0X"))
+
+;; (6.4.4.1)
+(define-peg-pattern nonzero-digit body
+ (range #\1 #\9))
+
+;; (6.4.4.1)
+(define-peg-pattern octal-digit body
+ (range #\0 #\7))
+
+;; (6.4.4.1)
+(define-peg-pattern hexadecimal-digit body
+ (or (range #\0 #\9)
+ (range #\a #\f)
+ (range #\A #\F)))
+
+;; (6.4.4.1)
+(define-peg-pattern integer-suffix all
+ (or (and unsigned-suffix (? long-suffix))
+ (and long-suffix (? unsigned-suffix))))
+
+;; (6.4.4.1)
+;; This is a merger of long-suffix and long-long-suffix
+(define-peg-pattern long-suffix body
+ (or "l" "L" "ll" "LL"))
+
+;; (6.4.4.1)
+(define-peg-pattern unsigned-suffix body
+ (or "u" "U"))
+
+;; (6.4.4.2)
+(define-peg-pattern floating-constant all
+ (or decimal-floating-constant
+ hexadecimal-floating-constant))
+
+;; (6.4.4.2)
+(define-peg-pattern decimal-floating-constant all
+ (or (and fractional-constant (? exponent-part) (? floating-suffix))
+ (and digit-sequence exponent-part (? floating-suffix))))
+
+;; (6.4.4.2)
+(define-peg-pattern hexadecimal-floating-constant all
+ (and hexadecimal-prefix
+ (or hexadecimal-fractional-constant
+ hexadecimal-digit-sequence)
+ binary-exponent-part
+ (? floating-suffix)))
+
+;; (6.4.4.2)
+(define-peg-pattern fractional-constant all
+ (or (and (? digit-sequence) "." digit-sequence)
+ (and digit-sequence ".")))
+
+;; (6.4.4.2)
+(define-peg-pattern exponent-part all
+ (and (or "e" "E") (? sign) digit-sequence))
+
+;; (6.4.4.2)
+(define-peg-pattern sign all
+ (or "+" "-"))
+
+;; (6.4.4.2)
+(define-peg-pattern digit-sequence body
+ (+ digit))
+
+;; (6.4.4.2)
+(define-peg-pattern hexadecimal-fractional-constant all
+ (or (and (? hexadecimal-digit-sequence) "." hexadecimal-digit-sequence)
+ (and hexadecimal-digit-sequence ".")))
+
+;; (6.4.4.2)
+(define-peg-pattern binary-exponent-part all
+ (and (ignore (or "p" "P"))
+ (? sign)
+ digit-sequence))
+
+;; (6.4.4.2)
+(define-peg-pattern hexadecimal-digit-sequence body
+ (+ hexadecimal-digit))
+
+;; (6.4.4.2)
+(define-peg-pattern floating-suffix all
+ (or "f" "l" "F" "L"))
+
+;; (6.4.4.3)
+(define-peg-pattern enumeration-constant all
+ identifier)
+
+(define-peg-pattern character-prefix all
+ (or "L" "u" "U"))
+
+;; (6.4.4.4)
+(define-peg-pattern character-constant all
+ (and (? character-prefix)
+ (ignore "'")
+ (+ c-char)
+ (ignore "'")))
+
+;; (6.4.4.4)
+(define-peg-pattern c-char body
+ (or (and (not-followed-by (or "'" "\\" "\n")) peg-any)
+ escape-sequence))
+
+;; (6.4.4.4)
+(define-peg-pattern escape-sequence all
+ (or simple-escape-sequence
+ octal-escape-sequence
+ hexadecimal-escape-sequence
+ universal-character-name))
+
+;; (6.4.4.4)
+(define-peg-pattern simple-escape-sequence all
+ (and (ignore "\\") (or "'" "\"" "?" "\\"
+ "a" "b" "f" "n" "r" "t" "v")))
+
+;; (6.4.4.4)
+(define-peg-pattern octal-escape-sequence all
+ (and (ignore "\\") octal-digit (? octal-digit) (? octal-digit)))
+
+;; (6.4.4.4)
+(define-peg-pattern hexadecimal-escape-sequence all
+ (and (ignore "\\x") (+ hexadecimal-digit)))
+
+;; A.1.6 String literals
+
+;; (6.4.5)
+(define-peg-pattern string-literal all
+ (and (? encoding-prefix)
+ (ignore "\"")
+ (* s-char)
+ (ignore "\"")))
+
+;; (6.4.5)
+(define-peg-pattern encoding-prefix all
+ (or "u8" "u" "U" "L"))
+
+;; (6.4.5)
+(define-peg-pattern s-char body
+ (or (and (not-followed-by (or "\"" "\\" "\n")) peg-any)
+ escape-sequence))
+
+;;; A.1.7
+
+;; (6.4.6)
+(define-peg-pattern punctuator all
+ (or "[" "]" "(" ")" "{" "}"
+ "..." ; Moved to be before "."
+ "." "->"
+ "&&" "||"
+ "!="
+ "++" "--"
+ "*=" "/=" "%=" "+=" "-=" "<<=" ">>=" "&=" "^=" "|="
+ "<=" ">=" "=="
+ "="
+ "/" "%" "<<" ">>" "<" ">" "^" "|"
+ "?" ":" ";"
+ "&" "*" "+" "-" "~" "!"
+ "," "##" "#" ; # and ## flipped
+ "<:" ":>" "<%" "%>" "%:%:" "%:" ; %: and %:%: flipped
+ ))
+
+;;; A.1.8 Header names
+
+(define-peg-pattern h-string all (+ h-char))
+(define-peg-pattern q-string all (+ q-char))
+
+;; (6.4.7)
+(define-peg-pattern header-name all
+ (or (and (ignore "<") h-string (ignore ">"))
+ ;; NOTE this case will never be reached, since it's treated as a regular
+ ;; string instead
+ (and (ignore "\"") q-string (ignore "\""))))
+
+;; (6.4.7)
+(define-peg-pattern h-char body
+ (or (and (not-followed-by (or ">" "\n")) peg-any)
+ escape-sequence))
+
+;; (6.4.7)
+(define-peg-pattern q-char body
+ (or (and (not-followed-by (or "\"" "\n")) peg-any)
+ escape-sequence))
+
+;;; A.1.9 Preprocessing numbers
+
+;; (6.4.8)
+(define-peg-pattern pp-number all
+ (and (? ".") digit
+ (* (or digit
+ identifier-nondigit
+ (and (or "e" "E" "p" "P")
+ sign)
+ "."))))
+
+
+
+(define-peg-pattern whitespace all
+ (or "\t" "\n" "\v" "\f" " "
+ ;; "\r"
+ ))
+
+(define-peg-pattern block-comment body
+ (and (ignore "/*")
+ (* (and (not-followed-by "*/")
+ peg-any))
+ (ignore "*/")))
+
+(define-peg-pattern line-comment body
+ (and (ignore "//")
+ (* (and (not-followed-by "\n")
+ peg-any))))
+
+(define-peg-pattern comment all
+ (or line-comment block-comment))
+
+(define-peg-pattern non-whitespace all
+ (and (not-followed-by whitespace)
+ peg-any))
+
+(define-peg-pattern preprocessing-tokens all
+ (* (or whitespace
+ comment
+ preprocessing-token
+ non-whitespace)))
+
+
+
+;; comment could be merged with whitespace, but then unlex would have to know that
+
+;; other is the "each non-white-space character that cannot be one of the above"
+;; clause from 6.4 p. 1
+
+(define-type (lexeme)
+ (type type: (memv '(whitespace comment preprocessing-token other placemaker)))
+ (body type: (or string? list?))
+ (noexpand type: (list-of string?)
+ default: '()))
+
+(define (placemaker)
+ (lexeme type: 'placemaker body: '()))
+
+(define (lex-output->lexeme-object x)
+ (match x
+ (`(non-whitespace ,body)
+ (lexeme body: body type: 'other))
+ (`(whitespace ,body)
+ (lexeme body: body type: 'whitespace ))
+ (`(comment ,body)
+ (lexeme body: body type: 'comment ))
+ (`(preprocessing-token ,body)
+ (match body
+ ('string-literal
+ ;; Unflatten case
+ (lexeme body: '(string-literal (encoding-prefix) "")
+ type: 'preprocessing-token))
+ (('string-literal `(encoding-prefix ,px) args ...)
+ (lexeme body: `(string-literal (encoding-prefix . ,px) ,@args)
+ type: 'preprocessing-token))
+ (('string-literal args ...)
+ (lexeme body: `(string-literal (encoding-prefix) ,@args)
+ type: 'preprocessing-token))
+ (('character-constant `(character-prefix ,px) args ...)
+ (lexeme body: `(character-constant (character-prefix . ,px)
+ ,@args)
+ type: 'preprocessing-token))
+ (('character-constant args ...)
+ (lexeme body: `(character-constant (character-prefix) ,@args)
+ type: 'preprocessing-token))
+ (body (lexeme body: body type: 'preprocessing-token))))
+
+ ;; "unflatten"
+ ('comment (lexeme body: "" type: 'comment))))
+
+
+
+
+;; At a number of places I chose token depending on the order of the rule. The
+;; standard however says that the longest possible choice should be used.
+;; 6.4 p. 4
+
+;; returns a list of lexemes
+(define (lex string)
+ (if (string-null? string)
+ '()
+ (map lex-output->lexeme-object
+ (let ((result (match-pattern preprocessing-tokens string)))
+ (let ((trailing (substring (peg:string result)
+ (peg:end result))))
+ (unless (string-null? trailing)
+ (scm-error 'cpp-lex-error "lex"
+ "Failed to lex string, remaining trailing characters: ~s"
+ (list trailing) #f)))
+ (unless (list? (peg:tree result))
+ (scm-error 'cpp-lex-error "lex"
+ "Parsing just failed. Chars: ~s"
+ (list (peg:string result)) #f))
+ (cdr (peg:tree result))))))
+
+
+
+
+
+;; (parse-decimals "555" 10)
+;; ⇒ 0.5549999999999999
+;; (parse-decimals "8" 16)
+;; ⇒ 0.5
+(define (parse-decimals str base)
+ (/ (fold (lambda (digit done)
+ (let ((v (string->number digit base)))
+ (+ v (/ done base))))
+ 0.0
+ (map string (string->list str)))
+ base))
+
+;; parse a number on form <digits>.<digits>
+(define (parse-fractional str base)
+ (let* ((pair (string-split str #\.))
+ (integer (list-ref pair 0))
+ (decimals (list-ref pair 1)))
+ (+ (if (string-null? integer)
+ 0 (string->number integer 16))
+ (if (string-null? decimals)
+ 0 (parse-decimals decimals 16)))))
+
+
+(define (parse-float body)
+ (define (fractional-constant x)
+ (case x
+ ((decimal-floating-constant) 'fractional-constant)
+ ((hexadecimal-floating-constant) 'hexadecimal-fractional-constant)))
+
+ (define (exponent-part x)
+ (case x
+ ((decimal-floating-constant) 'exponent-part)
+ ((hexadecimal-floating-constant) 'binary-exponent-part)))
+
+ (define (expt-base x)
+ (case x
+ ((decimal-floating-constant) 10)
+ ((hexadecimal-floating-constant) 2)))
+
+ (define (base x)
+ (case x
+ ((decimal-floating-constant) 10)
+ ((hexadecimal-floating-constant) 16)))
+
+ (let ((type (car body))
+ (body (cdr body)))
+ (* 1.0
+ (cond ((assoc-ref body (fractional-constant type))
+ => (lambda (fc) (parse-fractional (car fc) (base type))))
+ (else (string->number (car body) (base type))))
+ (cond ((assoc-ref body (exponent-part type))
+ => (lambda (x) (expt (expt-base type)
+ (string->number (car x) (base type)))))
+ (else 1)))
+ ;; TODO do something with (possible) suffix
+ ;; (assoc-ref body 'floating-suffix)
+ ))
+
+(define (parse-integer body)
+ (let* (;; (suffix (assoc-ref body 'integer-suffix))
+ (value (cadr (car body)))
+ (value-type (car (car body))))
+ ;; TODO do something with suffix
+ (string->number
+ value
+ (case value-type
+ ((octal-constant) 8)
+ ((decimal-constant) 10)
+ ((hexadecimal-constant) 16)))))
+
+;; (parse-c-number "0x1.8p0")
+;; ⇒ 1.5
+
+;; TODO is "5ul" equivalent to "((unsigned long) 5)"
+(define (parse-c-number string)
+ (cond ((match-pattern constant string)
+ => (lambda (m)
+ (let ((m (cadr (peg:tree m)))) ; Strip 'constant wrapper
+ (case (car m)
+ ((floating-constant)
+ (parse-float (cadr m)))
+
+ ((integer-constant)
+ (parse-integer (cdr m)))
+
+ ((enumeration-constant character-constant)
+ (scm-error 'misc-error "parse-c-number"
+ "Couldn't parse [~a] as a /number/ (~s)"
+ (list string m) #f))))))
+
+ (else (scm-error 'misc-error "parse-c-number"
+ "Couldn't parse [~a] as a number"
+ (list string) #f))))
+
+
+
+
+;;; 5.1.11.2 Translation phases
+
+(define (tokenize string)
+ (-> string
+;;; 1. trigraph replacement
+ replace-trigraphs
+;;; 2. Line folding
+ fold-lines
+;;; 3. Decomposition into preprocenning tokens, whitespaces, and comments
+ lex
+ comments->whitespace))
+
+;; These really belong in (c cpp-types), but that would create a dependency cycle
+
+(define (comment->whitespace token)
+ (if ;; (comment-token? token)
+ (and (lexeme? token)
+ (eq? 'comment (type token)))
+ (car (lex " "))
+ token))
+
+(define (comments->whitespace tokens)
+ (map comment->whitespace tokens))
diff --git a/module/c/line-fold.scm b/module/c/line-fold.scm
new file mode 100644
index 00000000..c61c2c70
--- /dev/null
+++ b/module/c/line-fold.scm
@@ -0,0 +1,29 @@
+(define-module (c line-fold)
+ :use-module (srfi srfi-1)
+ :use-module (srfi srfi-71)
+ :export (fold-lines))
+
+(define (line-continued? line)
+ (and (not (string-null? line))
+ (char=? #\\ (string-ref line (1- (string-length line))))))
+
+(define (strip-backslash line)
+ (string-drop-right line 1))
+
+(define (fold-lines string)
+ (with-output-to-string
+ (lambda ()
+ (let loop ((lines (string-split string #\newline)))
+ (cond ((null? lines) 'NOOP)
+ ((null? (cdr lines))
+ ;; TODO error message if last character is a backslash
+ (display (car lines))
+ (newline))
+ (else
+ (let ((to-merge remaining (span line-continued? lines)))
+ (for-each display (map strip-backslash to-merge))
+ (display (car remaining))
+ (newline)
+ (for-each (lambda _ (newline))
+ (iota (length to-merge)))
+ (loop (cdr remaining)))))))))
diff --git a/module/c/operators.scm b/module/c/operators.scm
index ab1b3e7c..910dc8a9 100644
--- a/module/c/operators.scm
+++ b/module/c/operators.scm
@@ -9,8 +9,9 @@
`(+ - * / & ,(symbol #\|) ^ << >> % < > =))
;; apparently part of C
+;; https://en.cppreference.com/w/cpp/language/operator_alternative
(define wordy-binary-operators
- '(bitand and_eq and bitor or_eq or xor_eq xor))
+ '(bitand and_eq and bitor or_eq or xor_eq xor not_eq))
(define symbol-binary-operators
(append (map (lambda (x) (symbol-append x '=)) simple-operators)
diff --git a/module/c/parse.scm b/module/c/parse.scm
index 8030da77..7d11ea17 100644
--- a/module/c/parse.scm
+++ b/module/c/parse.scm
@@ -1,11 +1,14 @@
(define-module (c parse)
:use-module (hnh util)
:use-module (srfi srfi-1)
+ :use-module (srfi srfi-71)
:use-module (ice-9 match)
+ :use-module ((rnrs io ports)
+ :select (string->bytevector make-transcoder utf-8-codec))
+ :use-module (rnrs bytevectors)
:export (parse-lexeme-tree))
-;;; Rename this
-(define (perms set)
+(define (permutations set)
(concatenate
(map (lambda (key)
(map (lambda (o) (cons key o))
@@ -21,23 +24,124 @@
(define valid-sequences
(delete 'dummy
(lset-union eq? '(dummy)
- (map symbol-concat (perms '(() U L)))
- (map symbol-concat (perms '(() U LL))))))
+ (map symbol-concat (permutations '(() U L)))
+ (map symbol-concat (permutations '(() U LL))))))
;; => (LLU ULL LL LU UL L U)
(aif (memv (string->symbol (string-upcase str))
valid-sequences)
(case (car it)
- [(LLU ULL) '(unsigned long-long)]
+ [(LLU ULL) '(unsigned long long)]
[(LU UL) '(unsigned long)]
- [(LL) '(long-long)]
+ [(LL) '(long long)]
[(L) '(long)]
[(U) '(unsigned)])
(scm-error 'c-parse-error "parse-integer-suffix"
"Invalid integer suffix ~s"
(list str) #f)))
+(define (parse-float-suffix str)
+ (case (string->symbol str)
+ ((f F) '(float))
+ ((l L) '(long double))))
+
+(define (group-body->type vars)
+ (concatenate
+ (map
+ (match-lambda (('variable var) (list (parse-lexeme-tree `(variable ,var))))
+ (('postfix ('variable var)
+ ('postfix-operator "*"))
+ (list (parse-lexeme-tree `(variable ,var))
+ '*))
+ (else (scm-error 'c-parse-error "parse-lexeme-tree"
+ "Invalid token ~s in typecast form: ~s"
+ (list else vars) #f)))
+ vars)))
+
+;; Takes a list of strings and integers, and merges it all into a single
+;; bytevector representing a c string
+(define* (string-fragments->c-string
+ fragments optional: (transcoder (make-transcoder (utf-8-codec))))
+
+ (define fragments-fixed
+ (map (lambda (frag)
+ (if (string? frag)
+ (string->bytevector frag transcoder)
+ frag))
+ fragments))
+
+ (define bv-length
+ (fold (lambda (item sum) (+ sum (if (bytevector? item)
+ (bytevector-length item)
+ 1)))
+ 0 fragments-fixed))
+
+ (define bv (make-bytevector (1+ bv-length)))
+ ;; trailing null byte
+ (bytevector-u8-set! bv bv-length 0)
+ (fold (lambda (item idx)
+ (cond ((bytevector? item)
+ (bytevector-copy! item 0
+ bv idx
+ (bytevector-length item))
+ (+ idx (bytevector-length item)))
+ (else (bytevector-u8-set! bv idx item)
+ (+ idx 1))))
+ 0
+ fragments-fixed)
+ bv)
+
+(define (parse-float-form float-form)
+ (let ((float-string
+ (fold (lambda (arg str)
+ (string-append
+ str
+ (match arg
+ (('float-integer ('base-10 n)) n)
+ (('float-decimal ('base-10 n)) (string-append "." n))
+ (('exponent "+" ('base-10 n)) (string-append "e" n))
+ (('exponent ('base-10 n)) (string-append "e" n))
+ (('exponent "-" ('base-10 n)) (string-append "e-" n)))))
+ "" float-form)))
+ ;; exact->inexact is a no-op if we already have an inexact number, but
+ ;; ensures we get an inexact number when we have an exact number (which we
+ ;; can get from the "1." case). Returning an inexact number here is important
+ ;; to avoid arithmetic suprises later.
+ (exact->inexact
+ (or (string->number float-string)
+ (scm-error 'c-parse-error "parse-lexeme-tree"
+ "Couldn't parse expression as float: ~s"
+ (list `(float ,@args)) #f)))))
+
+
+(define (resolve-escaped-char form)
+ (match form
+ (('base-8-char n) (string->number n 8))
+ (('base-16-char n) (string->number n 16))
+ (c (char->integer
+ (case (string-ref c 0)
+ ((#\a) #\alarm)
+ ((#\b) #\backspace)
+ ((#\e) #\esc) ;; non-standard
+ ((#\f) #\page)
+ ((#\n) #\newline)
+ ((#\r) #\return)
+ ((#\t) #\tab)
+ ((#\v) #\vtab)
+ ((#\\) #\\)
+ ;; These are valid in both strings and chars
+ ((#\') #\')
+ ((#\") #\"))))))
+
+;; Takes a list of strings and escaped-char form
+;; and returns a list of strings and integers
+(define (resolve-string-fragment fragment)
+ (match fragment
+ (('escaped-char c)
+ (resolve-escaped-char c))
+ (fargment fragment)))
+
(define (parse-lexeme-tree tree)
(match tree
['() '()]
@@ -50,55 +154,67 @@
[('integer n ('integer-suffix suffix))
`(as-type
,(parse-integer-suffix suffix)
- ,(parse-lexeme-tree n))
- ]
+ ,(parse-lexeme-tree n))]
+
[('integer n)
(parse-lexeme-tree n)]
+
+ [('float args ... ('float-suffix suffix))
+ `(as-type ,(parse-float-suffix suffix)
+ ;; parse rest of float as if it lacked a suffix
+ ,(parse-lexeme-tree `(float ,@args)))]
+
+ [('float args ...) (parse-float-form args)]
+
;; Character literals, stored as raw integers
;; so mathematical operations keep working on them.
- [('char ('escaped-char ('base-8-char n)))
- (-> n (string->number 8) #; integer->char)]
- [('char ('escaped-char ('base-16-char n)))
- (-> n (string->number 16) #; integer->char)]
- [('char ('escaped-char c))
- (char->integer
- (case (string-ref c 0)
- ((#\a) #\alarm)
- ((#\b) #\backspace)
- ((#\e) #\esc)
- ((#\f) #\page)
- ((#\n) #\newline)
- ((#\r) #\return)
- ((#\t) #\tab)
- ((#\v) #\vtab)
- ((#\\) #\\)
- ((#\') #\')))]
+ [('char ('escaped-char c)) (resolve-escaped-char c)]
+
[('char c) (char->integer (string-ref c 0))]
[('variable var) (string->symbol var)]
+
+ ;; normalize some binary operators to their wordy equivalent
+ ;; (which also happens to match better with scheme)
+ [('operator "&&") 'and]
+ [('operator "&=") 'and_eq]
+ [('operator "&") 'bitand]
+ [('operator "|") 'bitor]
+ [('operator "!=") 'not_eq]
+ [('operator "||") 'or]
+ [('operator "|=") 'or_eq]
+ [('operator "^") 'xor]
+ [('operator "^=") 'xor_eq]
+ ;; Change these names to something scheme can handle better
+ [('operator ".") 'object-slot]
+ [('operator "->") 'dereference-slot]
[('operator op) (string->symbol op)]
+
[('prefix-operator op)
(case (string->symbol op)
+ ((!) 'not)
+ ((~) 'compl)
((*) 'dereference)
((&) 'pointer)
((++) 'pre-increment)
((--) 'pre-decrement)
- (else => identity))]
+ ((-) '-)
+ (else (scm-error 'c-parse-error "parse-lexeme-tree"
+ "Unknown prefix operator ~s"
+ (list op) #f)))]
[('postfix-operator op)
(case (string->symbol op)
[(++) 'post-increment]
[(--) 'post-decrement]
- [else => identity])]
+ [else (scm-error 'c-parse-error "parse-lexeme-tree"
+ "Unknown postfix operator ~s"
+ (list op) #f)])]
;; Parenthesis grouping
- [('group args)
+ [('group args ...)
(parse-lexeme-tree args)]
- ;; Atomic item. Used by flatten-infix
- [('atom body)
- (parse-lexeme-tree body)]
-
[('prefix op arg)
`(,(parse-lexeme-tree op)
,(parse-lexeme-tree arg))]
@@ -107,81 +223,204 @@
`(,(parse-lexeme-tree op)
,(parse-lexeme-tree arg))]
+
+
+
+
+ ;; resolved-operator and ternary are the return "types"
+ ;; of resolve-order-of-operations
+ [(('resolved-operator op) args ...)
+ `(,op ,@(map parse-lexeme-tree args))]
+
+ [('ternary a b c)
+ `(ternary ,(parse-lexeme-tree a)
+ ,(parse-lexeme-tree b)
+ ,(parse-lexeme-tree c))]
+
+
+
+
+ ;; Is it OK for literal strings to be "stored" inline?
+ ;; Or must they be a pointer?
+ ['string #vu8(0)]
+ [('string str ...)
+ (-> (map resolve-string-fragment str)
+ string-fragments->c-string)]
+
+ ;; implicit concatenation of string literals
+ [(('string str ...) ...)
+ (-> (map resolve-string-fragment (concatenate str))
+ string-fragments->c-string)]
+
[('infix args ...)
- (resolve-order-of-operations
- (flatten-infix (cons 'infix args)))]
+ (let ((r (resolve-order-of-operations
+ (flatten-infix (cons 'infix args)))))
+ (parse-lexeme-tree r))]
+
[('funcall function ('group arguments))
`(funcall ,(parse-lexeme-tree function)
,(parse-lexeme-tree arguments))]
- [bare (scm-error 'c-parse-error
- "parse-lexeme-tree"
- "Naked literal in lex-tree: ~s"
- (list bare)
- #f)]))
+ [(('variable "struct") ('variable value) ..1)
+ `(struct-type ,@(map string->symbol value))
+ ]
+
+ ;; A list of variables. Most likely a type signature
+ ;; [(('variable value) ..1)
+ ;; ]
+
+ ;; A typecast with only variables must (?) be a typecast?
+ [(('group groups ..1) ... value)
+ (fold-right (lambda (type done) `(as-type ,type ,done))
+ (parse-lexeme-tree value)
+ (map group-body->type groups))]
+
+ ;; Type name resolution?
+ ;; https://en.wikipedia.org/wiki/C_data_types
+ ;; base types with spaces:
+ ;; =======================
+ ;; [[un]signed] char
+ ;; [[un]signed] short [int]
+ ;; [[un]signed] int
+ ;; [un]signed [int]
+ ;; [[un]signed] long [int]
+ ;; [[un]signed] long long [int]
+ ;; float
+ ;; [long] double
+
+ ;; https://en.wikipedia.org/wiki/Type_qualifier
+ ;; qualifiers
+ ;; const
+ ;; volatile
+ ;; restrict
+ ;; _Atomic
+
+
+ ;; Storage specifiers
+ ;; auto
+ ;; register
+ ;; static
+ ;; extern
+
+ ;; struct <typename>
+ ;; enum <typename>
+ ;; union <typename>
+
+ ;; https://en.wikipedia.org/wiki/C_syntax
+ ;; int (*ptr_to_array)[100]
+
+
+ [(? symbol? bare)
+ (scm-error 'c-parse-error
+ "parse-lexeme-tree"
+ "Naked literal in lex-tree: ~s"
+ (list bare)
+ #f)]
+
+ [form
+ (scm-error 'c-parse-error
+ "parse-lexeme-tree"
+ "Unknown form in lex-tree: ~s"
+ (list form) #f)
+ ]))
;; https://en.wikipedia.org/wiki/Operators_in_C_and_C%2B%2B
+;; https://en.cppreference.com/w/c/language/operator_precedence
(define order-of-operations
(reverse
- (concatenate
- ;; This is only for binary operations
- `((-> ,(symbol #\.))
- (* / %)
- (+ -)
- (<< >>)
- (< <= > >=)
- (== !=)
- (&)
- (^)
- (,(symbol #\|))
- (&&)
- (,(symbol #\| #\|))
- (= += -= *= /= %= <<= >>= &= ^= ,(symbol #\| #\=))
- (,(symbol #\,))
- ))))
-
-(define (mark-other form)
- (if (list? form) (cons '*other* form) form))
+ ;; This is only for binary operations
+ `((-> ,(symbol #\.))
+ ;; All unary procedures go here, incnluding typecasts, and sizeof
+ (* / %)
+ (+ -)
+ (<< >>)
+ (< <= > >=)
+ (== != not_eq)
+ (& bitand)
+ (^ xorg)
+ (,(symbol #\|) bitor)
+ (&& and)
+ (,(symbol #\| #\|) or)
+ (? :)
+ (= += -= *= /= %= <<= >>= &= ^= ,(symbol #\| #\=)
+ and_eq or_eq xor_eq)
+ (,(symbol #\,))
+ )))
+
+;; a.b->c.d (. (-> (. a b) c) d)
+;; 2 * 3 / 4 * 5 => (* (/ (* 2 3) 4) 5)
+;; eller => (* 2 (/ 3 4) 5)
(define* (resolve-order-of-operations
tree optional: (order order-of-operations))
(if (null? order)
- (car tree)
+ (scm-error 'c-parse-error
+ "resolve-order-of-operations"
+ "Out of operations to resolve when resolving expression ~s"
+ (list tree) #f)
(match tree
- [('*other* body ...) body]
- [(form) (resolve-order-of-operations form order)]
- [(forms ...)
- (match (split-by forms (car order))
- [(group) (resolve-order-of-operations group (cdr order))]
- [groups
- (cons (car order)
- (map (lambda (form) (resolve-order-of-operations
- form order-of-operations))
- groups))])]
- [a a])))
+ [('fixed-infix form) form]
+ [('fixed-infix forms ...)
+ (match (split-by-one-of forms (car order))
+ [(group)
+ (resolve-order-of-operations (cons 'fixed-infix group)
+ (cdr order))]
+ [(a ('? b ...) (': c ...))
+ `(ternary ,(resolve-order-of-operations (cons 'fixed-infix a) (cdr order))
+ ,(resolve-order-of-operations (cons 'fixed-infix b) (cdr order))
+ ,(resolve-order-of-operations (cons 'fixed-infix c) (cdr order)))]
+ [(first rest ...)
+ ;; TODO this is only valid for the associative operators (+, ...)
+ ;; but not some other (<, ...)
+ (if (apply eq? (map car rest))
+ (let ((op (caar rest)))
+ `((resolved-operator ,op)
+ ,@(map (lambda (x) (resolve-order-of-operations (cons 'fixed-infix x)
+ (cdr order)))
+ (cons first (map cdr rest)))))
+ (fold (lambda (item done)
+ (let ((operator args (car+cdr item)))
+ `((resolved-operator ,operator)
+ ,done ,(resolve-order-of-operations
+ (cons 'fixed-infix args)
+ (cdr order)))))
+ (resolve-order-of-operations (cons 'fixed-infix first)
+ (cdr order))
+ rest))])])))
+
+;; 1 * 2 / 3 * 4
+;; ⇒ ((1) (* 2) (/ 3) (* 4))
+;; (1)
+;; (* (1) 2)
+;; (/ (* (1) 2) 3)
+;; (* (/ (* (1) 2) 3) 4)
;; Flatens a tree of infix triples. Stops when it should.
;; (parenthesis, function calls, ...)
(define (flatten-infix form)
- (match form
- [('infix left op ('infix right ...))
- (cons* (parse-lexeme-tree left)
- (parse-lexeme-tree op)
- (flatten-infix (cons 'infix right)))]
-
- [('infix left op right)
- (list (mark-other (parse-lexeme-tree left))
- (parse-lexeme-tree op)
- (mark-other (parse-lexeme-tree right)))]
-
- [other (scm-error 'c-parse-error
- "flatten-infix"
- "Not an infix tree ~a"
- (list other)
- #f)]))
+ (cons 'fixed-infix
+ (let loop ((form form))
+ (match form
+ [('infix left op ('infix right ...))
+ (cons* left
+ (parse-lexeme-tree op)
+ (loop (cons 'infix right)))]
+
+ [('infix left op right)
+ (list left
+ (parse-lexeme-tree op)
+ right)]
+
+ [('infix form) form]
+
+ [other (scm-error 'c-parse-error
+ "flatten-infix"
+ "Not an infix tree ~a"
+ (list other)
+ #f)]))))
diff --git a/module/c/parse2.scm b/module/c/parse2.scm
new file mode 100644
index 00000000..fad2ffd8
--- /dev/null
+++ b/module/c/parse2.scm
@@ -0,0 +1,554 @@
+(define-module (c parse2)
+ :use-module (hnh util)
+ :use-module (system base lalr))
+
+(define (make-parser)
+ (lalr-parser
+ (#{out-table:}# "/tmp/c-parser.txt")
+ (#{output:}# c-parser "/tmp/c-parser.scm")
+ ;; (#{driver:}# glr)
+
+
+ (
+ ;; keywords
+ auto break case char const continue default do double else enum
+ extern float for goto if inline int long register restrict
+ return short signed sizeof static struct switch typedef union
+ unsigned void volatile while _Alignas _Alignof _Atomic _Bool
+ _Complex _Generic _Imaginary _Noreturn _Static_assert
+ _Thread_local
+
+ ;; punctuators
+ ;; 6.4.6
+ lbrack rbrack ; []
+ lparen rparen ; ()
+ lbrace rbrace ; {}
+ dot ; .
+ ->
+ ++ -- & * + - ~ !
+ / % << >> < > <= >= == != ^
+ pipe pipe2 ; | and || scheme handles these fine, but not emacs
+ &&
+ ? :
+ semicolon
+ ...
+ = *= /= %= += -= <<= >>= &= ^=
+ pipe= ; |=
+ comma ; ,
+ hash ; #
+ hash2 ; ##
+ ;; TODO digraphs
+
+ ;; 6.4
+ ;; keyword - already translated
+ identifier
+ constant
+ string-literal
+ ;; punctuator - already translated
+ )
+
+ ;; Primitives
+
+ ;; (identifier) : $1
+ ;; (constant) : $1
+ ;; (string-literal) : $1
+
+
+ ;; compounds
+
+ (primary-expression
+
+ ;; 6.5.1
+ (identifier)
+ (constant)
+ (string-literal)
+ (lparen expression rparen)
+ (generic-selection))
+
+ (enumeration-constant
+ (identifier))
+
+
+ ;; 6.5.1.1
+ (generic-selection
+ (_Generic lparen assignment-expression comma generic-assoc-list))
+
+ (generic-assoc-list
+ (generic-association)
+ (generic-assoc-list comma generic-association))
+
+ (generic-association
+ (type-name : assignment-expression)
+ (default : assignment-expression))
+
+ ;; 6.5.2
+ (postfix-expression
+ (primary-expression)
+ (postfix-expression lbrack expression rbrack)
+ (postfix-expression lparen rparen)
+ (postfix-expression lparen argument-expression-list rparen)
+ (postfix-expression dot identifier)
+ (postfix-expression -> identifier)
+ (postfix-expression ++)
+ (postfix-expression --)
+ (lparen type-name rparen lbrace initializer-list rbrace)
+ (lparen type-name rparen lbrace initializer-list comma rbrace))
+
+ (argument-expression-list
+ (assignment-expression)
+ (argument-expression-list comma assignment-expression))
+
+ ;; 6.5.3
+ (unary-expression
+ (postfix-expression)
+ (++ unary-expression)
+ (-- unary-expression)
+ (unary-operator cast-expression)
+ (sizeof unary-expression)
+ (sizeof lparen type-name rparen)
+ (_Alignof lparen type-name rparen))
+
+ (unary-operator
+ (&)
+ (*)
+ (+)
+ (-)
+ (~)
+ (!))
+
+ ;; 6.5.4
+ (cast-expression
+ (unary-expression)
+ (lparen type-name rparen cast-expression))
+
+ ;; 6.5.5
+ (multiplicative-expression
+ (cast-expression)
+ (multiplicative-expression * cast-expression)
+ (multiplicative-expression / cast-expression)
+ (multiplicative-expression % cast-expression))
+
+ ;; 6.5.6
+ (additive-expression
+ (multiplicative-expression)
+ (additive-expression + multiplicative-expression)
+ (additive-expression - multiplicative-expression))
+
+
+ ;; 6.5.7
+ (shift-expression
+ (additive-expression)
+ (shift-expression << additive-expression)
+ (shift-expression >> additive-expression))
+
+ ;; 6.5.8
+ (relational-expression
+ (shift-expression)
+ (relational-expression < shift-expression)
+ (relational-expression > shift-expression)
+ (relational-expression <= shift-expression)
+ (relational-expression >= shift-expression))
+
+ ;; 6.5.9
+ (equality-expression
+ (relational-expression)
+ (equality-expression == relational-expression)
+ (equality-expression != relational-expression))
+
+
+ ;; 6.5.10
+ (AND-expression
+ (equality-expression)
+ (AND-expression & equality-expression))
+
+ ;; 6.5.11
+ (exclusive-OR-expression
+ (AND-expression)
+ (exclusive-OR-expression ^ AND-expression))
+
+ ;; 6.5.12
+ (inclusive-OR-expression
+ (exclusive-OR-expression)
+ (inclusive-OR-expression pipe exclusive-OR-expression))
+
+ ;; 6.5.13
+ (logical-AND-expression
+ (inclusive-OR-expression)
+ (logical-AND-expression && inclusive-OR-expression))
+
+ ;; 6.5.14
+ (logical-OR-expression
+ (logical-AND-expression)
+ (logical-OR-expression pipe2 logical-AND-expression))
+
+ ;; 6.5.15
+ (conditional-expression
+ (logical-OR-expression)
+ (logical-OR-expression ? expression : conditional-expression))
+
+ ;; 6.5.16
+ (assignment-expression
+ (conditional-expression)
+ (unary-expression assignment-operator assignment-expression))
+
+ (assignment-operator
+ (=)
+ (*=)
+ (/=)
+ (%=)
+ (+=)
+ (-=)
+ (<<=)
+ (>>=)
+ (&=)
+ (^=)
+ (pipe=))
+
+ ;; 6.5.17
+ (expression
+ (assignment-expression)
+ (expression comma assignment-expression))
+
+ ;; 6.6 constant expression
+ (constant-expression
+ (expression))
+
+ ;; 6.7
+ (declaration
+ (declaration-specifiers semicolon)
+ (declaration-specifiers init-declarator-list semicolon)
+ (static_assert-declaration))
+
+ (declaration-specifiers
+ (storage-class-specifier)
+ (storage-class-specifier declaration-specifiers)
+
+ (type-specifier)
+ (type-specifier declaration-specifiers)
+
+ (type-qualifier)
+ (type-qualifier declaration-specifiers)
+
+ (function-specifier)
+ (function-specifier declaration-specifiers)
+
+ (alignment-specifier)
+ (alignment-specifier declaration-specifiers))
+
+ (init-declarator-list
+ (init-declarator)
+ (init-declarator-list comma init-declarator))
+
+ (init-declarator
+ (declarator)
+ (declarator = initializer))
+
+
+ ;; 6.7.1
+ (storage-class-specifier
+ (typedef)
+ (extern)
+ (static)
+ (_Thread_local)
+ (auto)
+ (register))
+
+
+ ;; 6.7.2
+ (type-specifier
+ (void)
+ (char)
+ (short)
+ (int)
+ (long)
+ (float)
+ (double)
+ (signed)
+ (unsigned)
+ (_Bool)
+ (_Complex)
+ (atomic-type-specifier)
+ (struct-or-union-specifier)
+ (enum-specifier)
+ (typedef-name))
+
+ ;; 6.7.2.1
+ (struct-or-union-specifier
+ (struct-or-union lbrace struct-declaration-list rbrace)
+ (struct-or-union identifier lbrace struct-declaration-list rbrace)
+ (struct-or-union identifier))
+
+ (struct-or-union
+ (struct)
+ (union))
+
+ (struct-declaration-list
+ (struct-declaration)
+ (struct-declaration-list struct-declaration))
+
+ (struct-declaration
+ (specifier-qualifier-list semicolon)
+ (specifier-qualifier-list struct-declarator-list semicolon)
+ (static_assert-declaration))
+
+ (specifier-qualifier-list
+ (type-specifier)
+ (type-specifier specifier-qualifier-list)
+
+ (type-qualifier)
+ (type-qualifier specifier-qualifier-list))
+
+ (struct-declarator-list
+ (struct-declarator)
+ (struct-declarator-list comma struct-declarator))
+
+ (struct-declarator
+ (declarator)
+ (: constant-expression)
+ (declarator : constant-expression))
+
+ ;; 6.7.2.2
+ (enum-specifier
+ (enum identifier lbrace enumerator-list rbrace)
+ (enum lbrace enumerator-list rbrace)
+
+ (enum identifier lbrace enumerator-list comma rbrace)
+ (enum lbrace enumerator-list comma rbrace)
+
+ (enum identifier))
+
+ (enumerator-list
+ (enumerator)
+ (enumerator-list comma enumerator))
+
+ (enumerator
+ (enumeration-constant)
+ (enumeration-constant = constant-expression))
+
+ ;; 6.7.2.4
+ (atomic-type-specifier
+ (_Atomic lparen type-name rparen))
+
+ ;; 6.7.3
+ (type-qualifier
+ (const)
+ (restrict)
+ (volatile)
+ (_Atomic))
+
+ ;; 6.7.4
+ (function-specifier
+ (inline)
+ (_Noreturn))
+
+ ;; 6.7.5
+ (alignment-specifier
+ (_Alignas lparen type-name rparen)
+ (_Alignas lparen constant-expression rparen))
+
+ ;; 6.7.6
+
+ (declarator
+ (pointer direct-declarator)
+ (direct-declarator))
+
+ (direct-declarator
+ (identifier)
+ ( lparen declarator rparen )
+
+ (direct-declarator lbrack type-qualifier-list assignment-expression rbrack )
+ (direct-declarator lbrack assignment-expression rbrack )
+ (direct-declarator lbrack type-qualifier-list rbrack )
+ (direct-declarator lbrack rbrack )
+
+ (direct-declarator lbrack static type-qualifier-list assignment-expression rbrack)
+ (direct-declarator lbrack static assignment-expression rbrack)
+
+ (direct-declarator lbrack type-qualifier-list static assignment-expression rbrack)
+
+ (direct-declarator lbrack type-qualifier-list * rbrack)
+ (direct-declarator lbrack * rbrack)
+
+ (direct-declarator lparen parameter-type-list rparen )
+ (direct-declarator lparen identifier-list rparen )
+ (direct-declarator lparen rparen ))
+
+ (pointer
+ (* type-qualifier-list)
+ (*)
+ (* type-qualifier-list pointer)
+ (* pointer))
+
+ (type-qualifier-list
+ (type-qualifier)
+ (type-qualifier-list type-qualifier))
+
+ (parameter-type-list
+ (parameter-list)
+ (parameter-list comma ...))
+
+ (parameter-list
+ (parameter-declaration)
+ (parameter-list comma parameter-declaration))
+
+ (parameter-declaration
+ (declaration-specifiers declarator)
+ (declaration-specifiers abstract-declarator)
+ (declaration-specifiers))
+
+ (identifier-list
+ (identifier)
+ (identifier-list comma identifier))
+
+ ;; 6.7.7
+ (type-name
+ (specifier-qualifier-list)
+ (specifier-qualifier-list abstract-declarator))
+
+ (abstract-declarator
+ (pointer)
+ (pointer direct-abstract-declarator)
+ ( direct-abstract-declarator))
+
+ (direct-abstract-declarator
+ ( lparen abstract-declarator rparen )
+ (direct-abstract-declarator lbrack type-qualifier-list assignment-expression rbrack )
+ (direct-abstract-declarator lbrack type-qualifier-list rbrack )
+ (direct-abstract-declarator lbrack assignment-expression rbrack )
+ ( lbrack rbrack )
+ ( lbrack type-qualifier-list assignment-expression rbrack )
+ ( lbrack type-qualifier-list rbrack )
+ (direct-abstract-declarator lbrack * rbrack)
+ ( lbrack * rbrack)
+ (direct-abstract-declarator lparen parameter-type-list rparen )
+ (direct-abstract-declarator lparen rparen )
+ ( lparen parameter-type-list rparen )
+ ( lparen rparen ))
+
+ ;; 6.7.8
+ (typedef-name
+ (identifier))
+
+ ;; 6.7.9
+ (initializer
+ (assignment-expression)
+ (lbrace initializer-list rbrace)
+ (lbrace initializer-list comma rbrace))
+
+ (initializer-list
+ (designation initializer)
+ (initializer)
+ (initializer-list comma designation initializer)
+ (initializer-list comma initializer))
+
+ (designation
+ (designator-list =))
+
+ (designator-list
+ (designator)
+ (designator-list designator))
+
+ (designator
+ (lbrack constant-expression rbrack)
+ (dot identifier))
+
+ ;; 6.7.10
+ (static_assert-declaration
+ (_Static_assert lparen constant-expression comma string-literal rparen semicolon))
+
+ ;; 6.8
+ (statement
+ (labeled-statement)
+ (compound-statement)
+ (expression-statement)
+ (selection-statement)
+ (iteration-statement)
+ (jump-statement))
+
+ ;; 6.8.1
+ (labeled-statement
+ (identifier : statement)
+ (case constant-expression : statement)
+ (default : statement))
+
+ ;; 6.8.2
+ (compound-statement
+ (lbrace block-item-list rbrace)
+ (lbrace rbrace))
+
+ (block-item-list
+ (block-item)
+ (block-item-list block-item))
+
+ (block-item
+ (declaration)
+ (statement))
+
+ ;; 6.8.3
+ (expression-statement
+ (expression semicolon)
+ (semicolon))
+
+ (selection-statement
+ (if lparen expression rparen statement)
+ (if lparen expression rparen statement else statement)
+ (switch lparen expression rparen statement))
+
+ ;; 6.8.5
+ (iteration-statement
+ (while lparen expression rparen statement)
+ (do statement while lparen expression rparen semicolon)
+ (for lparen expression semicolon expression semicolon expression rparen statement)
+ (for lparen expression semicolon expression semicolon rparen statement)
+ (for lparen expression semicolon semicolon expression rparen statement)
+ (for lparen semicolon semicolon rparen statement)
+ (for lparen semicolon expression semicolon expression rparen statement)
+ (for lparen semicolon expression semicolon rparen statement)
+ (for lparen declaration expression semicolon expression rparen statement)
+ (for lparen declaration expression semicolon rparen statement)
+ (for lparen declaration semicolon expression rparen statement)
+ (for lparen declaration semicolon rparen statement))
+
+ ;; 6.8.6
+ (jump-statement
+ (goto identifier semicolon)
+ (continue semicolon)
+ (break semicolon)
+ (return expression semicolon)
+ (return semicolon))
+
+
+ ;; 6.9
+ (translation-unit
+ (external-declaration)
+ (translation-unit external-declaration))
+
+ (external-declaration
+ (function-definition)
+ (declaration))
+
+ ;; 6.9.1
+ (function-definition
+ (declaration-specifiers declarator declaration-list compound-statement)
+ (declaration-specifiers declarator compound-statement))
+
+ (declaration-list
+ (declaration)
+ (declaration-list declaration))))
+
+
+(define (build-lexical-analyzer tokens)
+ (let ((tokens tokens))
+ (lambda ()
+ (if (null? tokens)
+ '*eoi*
+ (begin1 (car tokens)
+ (set! tokens (cdr tokens)))))))
+
+
+;; (build-lexical-analyzer (list (cons 'string "hello")))
+
+(define (error-procedure a b)
+ (throw 'parse-error a b))
+
+;; (parser lexical-analyzer error-procedure)
diff --git a/module/c/preprocessor.scm b/module/c/preprocessor.scm
new file mode 100644
index 00000000..71712b17
--- /dev/null
+++ b/module/c/preprocessor.scm
@@ -0,0 +1,370 @@
+(define-module (c preprocessor)
+ :use-module (srfi srfi-1)
+ :use-module (srfi srfi-9 gnu)
+ :use-module (ice-9 rdelim)
+ :use-module (ice-9 regex)
+ :use-module (hnh util object)
+
+ :use-module (hnh util)
+ :use-module (hnh util object)
+ )
+
+(define (read-lines port)
+ (let loop ((done '()))
+ (let ((line (read-line port)))
+ (if (eof-object? line)
+ (reverse done)
+ (loop (cons line done))))))
+
+;; The source line of a give readen line
+(define line-number (make-object-property))
+;; The source file of a given readen line
+(define line-file (make-object-property))
+
+
+(define (mark-with-property! items property property-value)
+ (for-each (lambda (item) (set! (property item) property-value))
+ items))
+
+(define trigraph-rx (make-regexp "??([=()/'<>!-])"))
+(define (expand-trigraphs line)
+ (regexp-substitute/global
+ #f trigraph-rx
+ line
+ 'pre (lambda (m) (case (string-ref (match:substring m 1) 1)
+ ((#\=) "#")
+ ((#\() "[")
+ ((#\)) "]")
+ ((#\/) "\\")
+ ((#\') "^")
+ ((#\<) "{")
+ ((#\>) "}")
+ ((#\!) "|")
+ ((#\-) "~")))
+ 'post))
+
+(define (number-lines lines)
+ (for-each (lambda (line number)
+ (set! (line-number line) number))
+ lines
+ (iota (length lines) 1))
+ lines)
+
+;; Should this line be merged with the next
+(define (line-continued? line)
+ (case (string-length line)
+ ((0) #f)
+ ((1) (string=? "\\" line))
+ (else
+ (let ((len (string-length line)))
+ ;; TODO can extra backslashes change this?
+ (and (char=? #\\ (string-ref line (- len 1)))
+ (not (char=? #\\ (string-ref line (- len 2)))))))))
+
+(define (transfer-line-number to from)
+ (set! (line-number to) (line-number from))
+ to)
+
+;; Merge two lines, assuming that upper ends with a backslash
+(define (merge-lines upper lower)
+ (let ((new-string (string-append (string-drop-right upper 1) lower)))
+ (transfer-line-number new-string upper)
+ new-string))
+
+(define (fold-lines lines)
+ (fold-right (lambda (line done)
+ (if (line-continued? line)
+ (cons (merge-lines line (car done)) (cdr done))
+ (cons line done)))
+ '()
+ lines))
+
+
+(define comment-rx (make-regexp "(//|/[*]|[*]/)"))
+
+(define (strip-comments lines)
+ (let loop ((in-comment #f) (lines lines) (done '()))
+ (if (null? lines)
+ (reverse done)
+ (let ((line (car lines)))
+ (cond ((regexp-exec comment-rx line)
+ => (lambda (m)
+ (format (current-output-port) "~s ~s substr = ~s~%" in-comment (line-number line) (match:substring m))
+ (cond ((and in-comment (string=? "*/" (match:substring m)))
+ (loop #f (cons (transfer-line-number (match:suffix m) line)
+ (cdr lines))
+ done))
+ (in-comment (loop #t (cdr lines) done))
+ ((string=? "//" (match:substring m))
+ (loop #f (cdr lines) (cons (transfer-line-number (match:prefix m) line)
+ done)))
+ ((string=? "/*" (match:substring m))
+ (loop #t (cons (transfer-line-number (match:suffix m) line) (cdr lines)) done))
+ (else
+ (scm-error 'cpp-error "strip-comments"
+ "Unexpected */ in file ~a on line ~a"
+ (list (line-file line) (line-number line))
+ #f)))))
+ (else (loop in-comment (cdr lines) (cons line done))))))))
+
+
+(define-immutable-record-type <preprocessor-directive>
+ (make-preprocessor-directive type body)
+ proprocessor-directive?
+ (type directive-type)
+ (body directive-body))
+
+(define cpp-directive-rx (make-regexp "\\s*#\\s*((\\w+)(.*))?"))
+(define (preprocessor-directive? line)
+ (cond ((regexp-exec cpp-directive-rx line)
+ => (lambda (m)
+ (if (match:substring m 2)
+ (make-preprocessor-directive
+ (string->symbol (match:substring m 2))
+ (string-trim-both (match:substring m 3) char-set:whitespace))
+ 'sort-of)))
+ (else #f)))
+
+;; defined
+
+;; TODO _Pragma
+
+
+(define (expand-function-line-macro environment macro . params)
+ (expand-macro environment (apply-macro macro (map (lambda (param) (expand-macro environment param)) params))))
+
+;; (define (environment-ref ))
+
+(define (list-of? lst predicate)
+ (every predicate lst))
+
+
+;; Parantheses when defining macro
+(define (parse-parameter-string string)
+ (map string-trim-both
+ (string-split (string-trim-both string (char-set #\( #\)))
+ #\,)))
+
+
+(define-type (object-macro)
+ (body type: string?))
+
+(define-type (function-macro)
+ (formals type: (list-of? string?))
+ (body type: string?))
+
+
+;; The interesting part
+;; environment, (list string) -> (values (list string) (list strings))
+;; multiple lines since since a function-like macro can extend over multiple lines
+;; (define (expand-macros environment strings)
+;; )
+
+
+(define (crash-if-not-if body guilty)
+ (scm-error 'cpp-error guilty
+ "else, elif, and endif invalid outside if scope: ~s~%file: ~s line: ~s"
+ (list body (line-file body) (line-number body))))
+
+;; (environment, lines) -> environment x lines
+(define (parse-directives environment lines)
+ (let loop ((environment environment) (lines lines) (done '()))
+ (let* ((line (car line))
+ (directive? (preprocessor-directive? line)))
+ (case directive?
+ ((#f) ; regular line
+ (loop environment (cdr lines)
+ ;; TODO this doesn't work, since parse-macros works on multiple lines
+ (cons (parse-macros environment (car lines)) done)))
+ ((sort-of) ; a no-op directive
+ (loop environment (cdr lines) done))
+ (else ; an actual directive
+ (case (car (cpp-if-status environment))
+ ((outside)
+ (case (directive-type m)
+ ((ifndef endif else)
+ (scm-error 'cpp-error "parse-directives"
+ "Unexpected directive: ~s"
+ (list line) #f))
+ (else ; inside if, ifelse or else
+ ;; outside active-if inactive-if
+ (case (directive-type m)
+ ;; stack ending directives
+ ((endif)
+ (case (car (cpp-if-status environment))
+ ((outside) (crash-if-not-if (directive-body m) "endif"))
+ (else (loop (modify environment cpp-if-status cdr)
+ (cdr lines)
+ done))))
+
+ ;; stack nudging directives
+ ((else)
+ (case (car (cpp-if-status environment))
+ ((outside) (crash-if-not-if (directive-body m) "else"))
+ (else (loop (modify environment (lens-compose cpp-if-status car*)
+ (lambda (old)
+ (case old
+ ((active-if) 'inactive-if)
+ ((inactive-if) 'active-if))))
+ (cdr lines)
+ done))))
+ ((elif)
+ (case (car (cpp-if-status environment))
+ ((outside) (crash-if-not-if (directive-body m) "elif"))
+ (else 'TODO ;; TODO
+ )
+ ))
+
+ ;; stack creating directives
+ ;; on inactive-if each creates a new frame, which also is inactive
+ ((ifndef)
+ (case (car (cpp-if-status environment))
+ ((inactive-if) (loop (modify environment cpp-if-status
+ xcons 'inactive-if)
+ (cdr lines)
+ done))
+ (else (loop (modify environment cpp-if-status
+ xcons (if (in-environment? environment (directive-body directive?))
+ 'inactive-if 'active-if))
+ (cdr lines)
+ done))))
+
+ ((ifdef)
+ (case (car (cpp-if-status environment))
+ ((inactive-if) (loop (modify environment cpp-if-status
+ xcons 'inactive-if)
+ (cdr lines)
+ done))
+ (else
+ (loop (modify environment cpp-if-status
+ xcons (if (in-environment? environment (directive-body directive?))
+ 'active-if 'inactive-if))
+ (cdr lines)
+ done))))
+
+ ((if)
+ (case (car (cpp-if-status environment))
+ ((inactive-if) (loop (modify environment cpp-if-status
+ xcons 'inactive-if)
+ (cdr lines)
+ done))
+ (else 'TODO ;; TODO
+ )))
+
+
+ ;; other directives
+ ((include) (cond ((string-match "[<\"](.*)"
+ => (lambda (m)
+ (let ((fileneme (string-drop-right (directive-body m) 1)))
+ (case (string-ref (match:substring m 1) 0)
+ ;; TODO include-path
+ ((#\<) (handle-file environment filename))
+ ((#\") (handle-file environment filename))))))
+ (else (scm-error 'cpp-error "parse-directives"
+ "Invalid include"
+ '() #f)))))
+ ((define)
+ ;; TODO what are valid names?
+ (cond ((string-match "^(\\w+)([(][^)]*[)])?\\s+(.*)" (directive-body directive?))
+ => (lambda (m)
+ (loop (let ((macro-body (string-trim-both (match:substring m 3))))
+ (add-identifier!
+ environment
+ (match:substring m 1)
+ (cond ((match:substring m 2)
+ => (lambda (parameter-string)
+ (function-macro
+ formals: (parse-parameter-string parameter-string)
+ body: macro-body)))
+ (else (object-macro body: macro-body)))))
+ (cdr lines)
+ done)))
+ (else (scm-error 'cpp-error "parse-directives"
+ "Invalid #define line, ~s"
+ (list (directive-body directive?))
+ #f))))
+
+ ((undef)
+ (case (car (cpp-if-status environment))
+ ((inactive-if) (loop environment (cdr lines) done))
+ (else (loop (remove-identifier environment (directive-body directive?))
+ (cdr lines)
+ done))))
+
+ ((line)
+ (case (car (cpp-if-status environment))
+ ((inactive-if) (loop environment (cdr lines) done))
+ ;; TODO add first-run parameter to loop, in case expand-macros still return something invalid
+ (else (let parse-line-directive ((tokens (string-tokenize (directive-body directive?))))
+ (cond ((= 1 (length tokens))
+ ;; TODO parse token
+ (if (integer? (car tokens))
+ ;; TODO update current line
+ (loop environment (cdr lines) done)
+ (parse-line-directive (expand-macros environment (directive-body directive?)))))
+ ((= 2 (length tokens))
+ ;; TODO parse tokens
+ (if (and (integer? (car tokens))
+ (string-literal? (cadr tokens)))
+ ;; TODO update current line and file
+ (loop environment (cdr lines) done)
+ (parse-line-directive (expand-macros environment (directive-body directive?)))))
+ (else (parse-line-directive (expand-macros environment (directive-body directive?)))))))))
+
+ ((error)
+ (throw 'cpp-error-directive
+ (directive-body directive?)))
+
+ ((warning)
+ (format (current-error-port) "#warning ~a~%"
+ (directive-body directive?))
+ (loop environment (cdr lines) done))
+
+ ((pragma)
+ (format (current-error-port)
+ "PRAGMA: ~s~%" (directive-body directive?))
+ (loop environment (cdr lines) done))
+
+ ((ident sccs)
+ (format (current-error-port)
+ "IDENT: ~s~%" (directive-body directive?))
+ (loop environment (cdr lines) done))
+
+ (else
+ (scm-error 'cpp-error "parse-directives"
+ "Unknown pre-processor directive: ~s"
+ (list line) #f)
+ )))))))))
+ ))
+
+
+(define* (writeln expr optional: (port (current-output-port)))
+ (write expr port)
+ (newline port))
+
+(define (handle-lines environment lines)
+ (parse-directive environment
+ (compose strip-comments fold-lines number-lines)))
+
+ ;; parse-directives environment
+
+;; Return a number of lines
+(define (read-file file-path)
+ (define raw-lines (call-with-input-file file-path read-lines))
+ (mark-with-property! line line-file file-path)
+ (handle-lines raw-lines))
+
+
+;; pre defined macros
+;; see info manual for cpp 3.7.1 Standard Predefined Macros
+;; __FILE__
+;; __LINE__
+;; __DATE__ "Feb 12 1996"
+;; __TIME__ "23:59:01"
+;; __STDC__ 1
+;; __STDC_VERSION__ 201112L
+;; __STDC_HOSTED__ 1
+
+;; __cplusplus
+;; __OBJC__
+;; __ASSEMBLER__
diff --git a/module/c/preprocessor2.scm b/module/c/preprocessor2.scm
new file mode 100644
index 00000000..3f9552c5
--- /dev/null
+++ b/module/c/preprocessor2.scm
@@ -0,0 +1,752 @@
+(define-module (c preprocessor2)
+ :use-module (srfi srfi-1)
+ :use-module (srfi srfi-71)
+ :use-module (srfi srfi-88)
+
+ :use-module (c cpp-environment)
+ :use-module (c eval2)
+ :use-module ((c cpp-environment function-like-macro)
+ :select (function-like-macro variadic? identifier-list))
+ :use-module ((c cpp-environment object-like-macro) :select (object-like-macro object-like-macro?))
+ :use-module ((c cpp-environment internal-macro) :select (internal-macro))
+ :use-module ((hnh util) :select (-> ->> intersperse aif swap unless unval break/all))
+ :use-module ((hnh util lens) :select (set modify cdr*))
+ :use-module (hnh util path)
+ :use-module (hnh util type)
+ :use-module (hnh util object)
+ :use-module ((hnh util values) :select (abort* on-fst on-snd apply/values))
+ :use-module ((c lex2)
+ :select (lex
+ placemaker
+ lexeme?
+ lexeme-body
+ lexeme-noexpand
+
+ tokenize
+ ))
+ :use-module (c unlex)
+ :use-module (c cpp-types)
+ :use-module (c cpp-util)
+ :export (_Pragma-macro
+ defined-macro
+ c-search-path
+ handle-preprocessing-tokens))
+
+
+(define (read-file path)
+ (call-with-input-file path (@ (ice-9 rdelim) read-string)))
+
+
+
+(define-syntax-rule (alist-of variable key-type value-type)
+ (build-validator-body variable (list-of (pair-of key-type value-type))))
+
+(define (list-of-length lst n)
+ (= n (length lst)))
+
+(define parameter-map? (of-type? (alist-of string? (list-of lexeme?))))
+
+(define (concat-token? token) (and (equal? "##" (punctuator-token? token))
+ (not (member "##" (lexeme-noexpand token)))))
+(define (stringify-token? token) (equal? "#" (punctuator-token? token)))
+(define (left-parenthesis-token? token) (equal? "(" (punctuator-token? token)))
+(define (right-parenthesis-token? token) (equal? ")" (punctuator-token? token)))
+(define (comma-token? token) (equal? "," (punctuator-token? token)))
+(define (ellipsis-token? token) (equal? "..." (punctuator-token? token)))
+
+
+;; TODO
+;; > #if defined X
+;; is equivalent to
+;; > #if defined(X)
+
+
+;; parameters is a lexeme list, as returned by parse-parameter-list
+(define (build-parameter-map macro parameters)
+ (typecheck macro cpp-macro?)
+ (typecheck parameters (list-of (list-of lexeme?)))
+ (map (lambda (pair) (modify pair cdr* drop-whitespace-both))
+ (if (macro-variadic? macro)
+ (let ((head rest (split-at parameters (length (macro-identifier-list macro)))))
+ (cons (cons "__VA_ARGS__" (concatenate (intersperse
+ (lex ",")
+ rest)))
+ (map cons (macro-identifier-list macro) head)))
+ (map cons
+ (macro-identifier-list macro)
+ parameters))))
+
+(define (expand# macro parameter-map)
+ (typecheck macro cpp-macro?)
+ (typecheck parameter-map parameter-map?)
+ (let loop ((tokens (macro-body macro)))
+ (cond ((null? tokens) '())
+ ((stringify-token? (car tokens))
+ (let* ((head rest (car+cdr (drop-whitespace (cdr tokens))))
+ (x (identifier-token? head)))
+ (cond ((assoc-ref parameter-map x)
+ => (lambda (tokens)
+ (cons (stringify-tokens tokens)
+ (loop rest))))
+ (else
+ (scm-error 'macro-expand-error "expand#"
+ "'#' is not followed by a macro parameter: ~s"
+ (list x) #f)))))
+ (else (cons (car tokens)
+ (loop (cdr tokens)))))))
+
+
+;; 6.10.3.3
+(define (expand## tokens)
+ ;; (typecheck tokens (list-of lexeme?))
+
+ (let loop ((left '())
+ (right tokens))
+ (cond ((null? right)
+ (reverse left))
+ ((concat-token? (car right))
+ (let ((l (drop-whitespace left))
+ (r (drop-whitespace (cdr right))))
+ (cond ((or (null? l) (null? r))
+ (scm-error 'cpp-error "expand##"
+ "## can't be first or last token: ~s"
+ (list (unlex tokens)) #f))
+ ((and (placemaker-token? (car l))
+ (placemaker-token? (car r)))
+ (loop (cdr l) (cons (placemaker) (cdr r))))
+ ((placemaker-token? (car l))
+ (loop (cdr l) r))
+ ((placemaker-token? (car r))
+ (loop (cdr l) (cons (car l) (cdr r))))
+ (else
+ ;; 6.10.3.3 p. 3
+ ;; I believe that ## is the only special case where the
+ ;; result of concatenation is differente from the token directly.
+ (let ((token (concatenate-tokens (car l) (car r))))
+ (let ((token (if (concat-token? token)
+ (modify token lexeme-noexpand xcons "##")
+ token)))
+ (loop (cdr l) (cons token (cdr r)))))))))
+ (else
+ (let ((pre post (break concat-token? right)))
+ (loop (append left (reverse pre)) post))))))
+
+
+(define (check-arity macro parameters)
+ (if (variadic? macro)
+ (unless (>= (length parameters)
+ (length (macro-identifier-list macro)))
+ (scm-error 'cpp-arity-error "apply-macro"
+ "Too few arguments to variadic macro ~s, expected at least ~s, got ~s"
+ (list (macro-identifier macro)
+ (length (macro-identifier-list macro))
+ (length parameters))
+ (list macro)))
+ (unless (or (and (= 0 (length (macro-identifier-list macro)))
+ (= 1 (length parameters))
+ (null? (car parameters)))
+ (= (length (macro-identifier-list macro))
+ (length parameters)))
+ (scm-error 'cpp-arity-error "apply-macro"
+ "Wrong number of arguments to macro ~s, expected ~s, got ~s"
+ (list (macro-identifier macro)
+ (length (macro-identifier-list macro))
+ (length parameters))
+ (list macro)))))
+
+;; expand function like macro
+;; parameter is a list of lexeme-lists, each "top level" element matching one
+;; argument to the macro
+(define (apply-macro environment macro parameters)
+ (typecheck environment cpp-environment?)
+ ;; Each element should be the lexeme list for that argument
+ (typecheck parameters (list-of (list-of lexeme?)))
+ (typecheck macro cpp-macro?)
+ (check-arity macro parameters)
+
+ (let ()
+
+ (define (resolve-cpp-variables tokens parameter-map)
+ (define (bound-identifier? id)
+ (assoc-ref parameter-map id))
+
+ ;; expand parameters, and place placemaker tokens
+ (let loop ((tokens tokens) (last #f))
+ (cond ((null? tokens) '())
+ ((identifier-token? (car tokens))
+ bound-identifier?
+ => (lambda (id)
+ (let ((replacement (assoc-ref parameter-map id)))
+ (if (null? replacement)
+ (cons (placemaker) (loop (cdr tokens) #f))
+ ;; macroexpand replacement here! But only if the token isn't used with ## (or #)
+ (append
+ (if (or (concat-token? last)
+ (next-token-matches? concat-token? tokens))
+ replacement
+ ;; resolve-token-stream only modifies environment by updating current line
+ ;; that can't happen in a macro body
+ ((unval resolve-token-stream 1) environment replacement once?: #t))
+ (loop (cdr tokens) #f))))))
+ ((whitespace-token? (car tokens))
+ (cons (car tokens) (loop (cdr tokens) last)))
+ (else (cons (car tokens) (loop (cdr tokens) (car tokens)))))))
+
+
+ (define parameter-map (build-parameter-map macro parameters))
+ (remove placemaker-token?
+ (-> macro
+ (expand# parameter-map)
+ (resolve-cpp-variables parameter-map)
+ expand##))))
+
+
+
+;; Expand object-like macro
+
+;; #define VALUE 10
+;; #define str(x) #x
+;; #define OTHER str(VALUE)
+;; OTHER
+;; ⇒ "VALUE"
+
+;; remaining-tokens should be the token stream just after the name of the macro
+(define (expand-macro environment macro noexpand-list remaining-tokens)
+ (typecheck environment cpp-environment?)
+ (typecheck macro cpp-macro?)
+ ;; (typecheck remaining-tokens (list-of lexeme?))
+ (typecheck noexpand-list (list-of string?))
+
+ (let ((name (macro-identifier macro)))
+ (cond ((object-macro? macro)
+ (values environment (append (fold (swap mark-noexpand)
+ (expand## (macro-body macro))
+ (cons name noexpand-list))
+ remaining-tokens)))
+
+ ((function-macro? macro)
+ (if (next-token-matches? left-parenthesis-token? remaining-tokens)
+ (let ((containing remaining newlines (parse-parameter-list remaining-tokens)))
+ (values (bump-line environment newlines)
+ (append (fold (swap mark-noexpand)
+ (apply-macro environment macro containing)
+ (cons name noexpand-list))
+ remaining)))
+ (values environment
+ ;; TODO#1 the token shouldn't be expanded here, but it should neither be marked no-expand?
+ ;; Consider the case
+ ;; #define m(a) a(0,1)
+ ;; #define f(a) f(2 * (a))
+ ;; m(f)
+ (append (mark-noexpand (lex (macro-identifier macro)) (macro-identifier macro))
+ remaining-tokens))))
+
+ ((internal-macro? macro)
+ (if (next-token-matches? left-parenthesis-token? remaining-tokens)
+ (let ((containing remaining newlines (parse-parameter-list remaining-tokens)))
+ (let ((env* tokens* ((macro-body macro) environment containing)))
+ (values (bump-line env* newlines)
+ (append (fold (swap mark-noexpand)
+ tokens*
+ (cons name noexpand-list))
+ remaining))))
+ (values environment
+ (append (mark-noexpand (lex (macro-identifier macro)) (macro-identifier macro))
+ remaining-tokens))))
+
+ (else
+ (scm-error 'wrong-type-arg "expand-macro"
+ "Macro isn't a macro: ~s"
+ (list macro) #f)))))
+
+
+
+(define-type (parenthesis-group)
+ (parenthesis-group-tokens
+ type: (list-of (or lexeme? parenthesis-group?))))
+
+(define (make-parenthesis-group tokens)
+ (parenthesis-group parenthesis-group-tokens: tokens))
+
+
+(define (flatten-group tokens)
+ (cond ((null? tokens) '())
+ ((lexeme? (car tokens))
+ (cons (car tokens) (flatten-group (cdr tokens))))
+ ((parenthesis-group? (car tokens))
+ (append (lex "(")
+ (flatten-group (parenthesis-group-tokens (car tokens)))
+ (lex ")")
+ (flatten-group (cdr tokens))))))
+
+
+;; Takes a list of preprocessing tokens, and returns three values
+;; - if the last token was '...'
+;; - a list of strings of all token names
+;; - the remaining tokens
+;; Note that this is ONLY #define f(x) forms
+;; not usage forms
+(define (parse-identifier-list tokens)
+ ;; (typecheck tokens (list-of lexeme?))
+ (let* ((group remaining (parse-group (drop-whitespace tokens)))
+ (groups (reverse (map drop-whitespace-both
+ (break/all comma-token? (parenthesis-group-tokens group))))))
+ ;; Checks that there where no nested parenthesis
+ (cond ((equal? '(()) groups)
+ (values #f '() remaining))
+ (else
+ (typecheck groups (list-of (and (list-of-length 1)
+ (list-of lexeme?))))
+
+ (let ((variadic? groups (if (ellipsis-token? (caar groups))
+ (values #t (cdr groups))
+ (values #f groups))))
+ (values
+ variadic?
+ (map (lambda (x) (or (identifier-token? x)
+ (scm-error 'cpp-error "parse-identifier-list"
+ "Unexpected preprocessing-token in identifier list: ~s"
+ (list x) #f)))
+ (map car (reverse groups)))
+ remaining))))))
+
+
+
+(define (newline-count group)
+ (let loop ((tokens (parenthesis-group-tokens group)))
+ (fold (lambda (item nls)
+ (+ nls
+ (cond ((newline-token? item) 1)
+ ((parenthesis-group? item) (newline-count item))
+ (else 0))))
+ 0 tokens)))
+
+;; tokens ⇒ parenthesis-group, remaining-tokens
+(define (parse-group tokens)
+ (typecheck tokens (not null?))
+ (typecheck (car tokens) left-parenthesis-token?)
+
+ (let loop ((stack '()) (remaining tokens))
+ (cond ((and (not (null? stack))
+ (null? (cdr stack))
+ (car stack))
+ parenthesis-group?
+ => (lambda (item) (values item remaining)))
+ ((null? remaining)
+ (scm-error 'misc-error "parse-group"
+ "Ran out of tokens while parsing: ~s (stack: ~s)"
+ (list (unlex tokens) stack) #f))
+ (else
+ (let ((token remaining (car+cdr remaining)))
+ (loop (cond ((right-parenthesis-token? token)
+ (let ((group rest (break left-parenthesis-token? stack)))
+ (cons (make-parenthesis-group (reverse group))
+ ;; Remove left-parenthesis
+ (cdr rest))))
+ (else (cons token stack)))
+ remaining))))))
+
+
+;; returns three values:
+;; - a list of tokens where each is a parameter to the function like macro
+;; - the remaining tokenstream
+;; - how many newlines were encountered
+;; The standard might call these "replacement lists"
+;; Note that each returned token-list might have padding whitespace which should be trimmed.
+;; It's kept to allow __VA_ARGS__ to "remember" its whitespace
+(define (parse-parameter-list tokens)
+ (let ((group remaining (parse-group (drop-whitespace tokens))))
+ ;; Checks that no inner groups where here
+ ;; (typecheck tokens (list-of lexeme?))
+ (values (map flatten-group
+ (break/all comma-token? (parenthesis-group-tokens group)))
+ remaining
+ (newline-count group))))
+
+
+;; Add __FILE__ and __LINE__ object macros to the environment
+(define (join-file-line environment)
+ (extend-environment
+ environment
+ ;; 6.10.8
+ (list
+ (object-like-macro
+ identifier: "__FILE__"
+ body: (lex (format #f "~s" (current-file environment))))
+ (object-like-macro
+ identifier: "__LINE__"
+ body: (lex (number->string (current-line environment)))))))
+
+
+(define defined-macro
+ (internal-macro
+ identifier: "defined"
+ body: (lambda (environment arguments)
+ (typecheck arguments (and (list-of (list-of lexeme?))
+ (not null?)))
+ (aif (identifier-token? (car (list-ref arguments 0)))
+ (let ((in-env (boolean->c-boolean (in-environment? environment it))))
+ (values environment (lex (number->string in-env))))
+ (scm-error 'cpp-error "defined"
+ "Invalid parameter list to `defined': ~s"
+ (list arguments) #f)))))
+
+(define _Pragma-macro
+ (internal-macro
+ identifier: "_Pragma"
+ body: (lambda (environment arguments)
+ (typecheck arguments (and (list-of (list-of lexeme?))
+ (not null?)))
+ (cond ((string-token? (caar arguments))
+ (lambda (a . _) a)
+ ;; TODO handle rest
+ => (lambda (encoding it . rest)
+ (values (handle-pragma environment (lex it))
+ '())))
+ (else (scm-error 'cpp-pragma-error "_Pragma"
+ "Invalid argument to _Pragma: ~s"
+ (list (car arguments)) #f))))))
+
+
+
+;; environment, tokens → environment
+(define (handle-pragma environment tokens)
+ (typecheck environment cpp-environment?)
+ ;; (typecheck tokens (list-of lexeme?))
+
+ (let ((err (lambda ()
+ (scm-error 'cpp-pragma-error "handle-pragma"
+ "Invalid pragma directive: ~a"
+ (list (unlex tokens)) #f))))
+
+ (cond ((null? tokens) (err))
+ ((equal? "STDC" (identifier-token? (car tokens)))
+ (call-with-values (lambda () (apply values (filter identifier-token? (cdr tokens))))
+ (case-lambda ((identifier on-off-switch)
+ (format (current-output-port)
+ "#Pragma STDC ~a ~a"
+ (unlex (list identifier))
+ (unlex (list on-off-switch)))
+ environment)
+ (_ (err)))))
+ (else
+ (format (current-output-port)
+ "Non-standard #Pragma: ~a"
+ (unlex tokens))
+ environment))))
+
+
+;; 6.10.1 p. 4
+(define (resolve-constant-expression cpp-tokens)
+ ;; (typecheck tokens (list-of lexeme?))
+ (define zero (car (lex "0")))
+ #;
+ (define tokens
+ (map preprocessing-token->token
+ (map (lambda (token)
+ (cond ((identifier-token? token) zero)
+ (else token)))
+ (remove whitespace-token? tokens))))
+
+ 'TODO
+ ;; eval as per 6.6
+ )
+
+
+
+(define (mark-noexpand1 token name)
+ (modify token lexeme-noexpand xcons name))
+
+(define (mark-noexpand tokens name)
+ ;; (typecheck tokens (list-of lexeme?))
+ ;; (typecheck name string?)
+ (map (lambda (token) (mark-noexpand1 token name)) tokens))
+
+(define (marked-noexpand? token)
+ (cond ((identifier-token? token)
+ => (lambda (id) (member id (lexeme-noexpand token))))
+ (else #f)))
+
+;; Expands a token-stream which contains no pre-processing directives (#if:s, ...)
+;; If @var{once?} is true then the resulting body won't be scanned again for tokens to expand
+;; environment, tokens, [boolean] → environment, tokens
+(define* (resolve-token-stream environment tokens key: once?)
+ (typecheck environment cpp-environment?)
+ ;; (typecheck tokens (list-of lexeme?))
+ ;; (pprint-environment environment)
+ ;; (format (current-error-port) "~a~%~%" (unlex tokens))
+ (let loop ((environment environment) (tokens tokens))
+ (cond ((null? tokens) (values environment '()))
+ ((newline-token? (car tokens))
+ (on-snd (cons (car tokens) (abort* (loop (bump-line environment) (cdr tokens))))))
+ ((and (identifier-token? (car tokens))
+ (not (marked-noexpand? (car tokens))))
+ ;; Here is the loop after expansion
+ (apply/values (if once? values loop)
+ (maybe-extend-identifier environment
+ (identifier-token? (car tokens))
+ (lexeme-noexpand (car tokens))
+ (cdr tokens))))
+ (else (on-snd (cons (car tokens) (abort* (loop environment (cdr tokens)))))))))
+
+
+
+;; returns a new environment
+;; handle body of #if
+;; environment, (list token) → environment
+(define (resolve-for-if environment tokens)
+ (typecheck environment cpp-environment?)
+ ;; (typecheck tokens (list-of lexeme?))
+
+ (-> (extend-environment environment (list defined-macro))
+ ;; no newlines in #if line
+ ((unval resolve-token-stream 1) tokens)
+ resolve-constant-expression
+ c-boolean->boolean
+ (if (enter-active-if environment)
+ (enter-inactive-if environment))))
+
+;; environment, string, (list token) → environment, (list token)
+(define (maybe-extend-identifier environment identifier noexpand-list remaining-tokens)
+ (typecheck environment cpp-environment?)
+ (typecheck identifier string?)
+ ;; (typecheck remaining-tokens (list-of lexeme?))
+ (typecheck noexpand-list (list-of string?))
+ (cond ((get-identifier (join-file-line environment) identifier)
+ => (lambda (value)
+ (expand-macro (join-file-line environment)
+ value
+ noexpand-list
+ remaining-tokens)))
+ (else ; It wasn't an identifier, leave it as is
+ (values environment
+ (append (mark-noexpand (lex identifier)
+ identifier)
+ remaining-tokens)))))
+
+;; 'gcc -xc -E -v /dev/null' prints GCC:s search path
+(define c-search-path
+ (make-parameter (list "/usr/include"
+ "/usr/local/include")))
+
+;; #include <stdio.h>
+(define (resolve-h-file string)
+ (typecheck string string?)
+ (cond
+ ;; NOTE do I want this case?
+ ;; GCC has it
+ ((path-absolute? string) string)
+ (else
+ (or
+ (find file-exists?
+ (map (lambda (path-prefix)
+ (path-append path-prefix string))
+ (c-search-path)))
+ (scm-error 'cpp-error "resolve-h-file"
+ "Can't resolve file: ~s"
+ (list string) #f)))))
+
+;; #include "myheader.h"
+(define (resolve-q-file string)
+ (typecheck string string?)
+ (cond ((file-exists? string) string)
+ ;; This should always be a fallback (6.10.2, p. 3)
+ (else (resolve-h-file string))))
+
+
+(define (resolve-header environment tokens)
+ (typecheck environment cpp-environment?)
+ ;; (typecheck tokens (list-of lexeme?))
+
+ (let ((err (lambda (msg . args)
+ (scm-error 'cpp-error "resolve-and-include-header"
+ (string-append msg ", tokens: ~s")
+ (append args (list (unlex tokens))) #f))))
+ (let loop ((%first-time #t) (tokens tokens))
+ (cond ((null? tokens) (err "Invalid #include line"))
+ ((h-string-token? (car tokens))
+ => (lambda (str)
+ (unless (null? (drop-whitespace (cdr tokens)))
+ (err "Unexpected tokens after #include <>"))
+ (resolve-h-file str)))
+ ((q-string-token? (car tokens))
+ => (lambda (str)
+ (unless (null? (drop-whitespace (cdr tokens)))
+ (err "Unexpected tokens after #include \"\""))
+ (resolve-q-file str)))
+ (else
+ (unless %first-time (err "Failed parsing tokens"))
+ ;; No newlines in #include
+ (loop #f ((unval resolve-token-stream 1) environment tokens)))))))
+
+;; environment, tokens → environment
+(define (handle-line-directive environment tokens*)
+ (typecheck environment cpp-environment?)
+ ;; (typecheck tokens* (list-of lexeme?))
+
+ (let ((err (lambda () (scm-error 'cpp-error "handle-line-directive"
+ "Invalid line directive: ~s"
+ (list tokens*) #f))))
+ (let loop ((%first-time #t) (tokens tokens*))
+ (cond ((null? tokens))
+ ((pp-number? (car tokens))
+ => (lambda (line)
+ (let ((line (string->number line))
+ (remaining (drop-whitespace (cdr tokens))))
+ (cond ((null? remaining) (set environment current-line (1- line)))
+ ((string-token? (car remaining))
+ (lambda (a . _) a)
+ => (lambda (encoding . fragments)
+ (-> environment
+ (set current-line (1- line))
+ ;; TODO properly join string
+ (set current-file (car fragments)))))
+ ;; no newlines in #line
+ (%first-time (loop #f ((unval resolve-token-stream 1) environment tokens)))
+ (else (err))))))
+ ;; no newlines in #line
+ (%first-time (loop #f ((unval resolve-token-stream 1) environment tokens)))
+ (else (err))))))
+
+;; environment, tokens → environment
+(define (resolve-define environment tokens)
+ (typecheck environment cpp-environment?)
+ ;; (typecheck tokens (list-of lexeme?))
+
+ (let ((identifier (identifier-token? (car tokens)))
+ (tail (cdr tokens)))
+ (-> environment
+ bump-line
+ (add-identifier
+ identifier
+ (cond ((and (not (null? tail))
+ (left-parenthesis-token? (car tail)))
+ ;; function like macro
+ (let ((variadic? identifiers replacement-list
+ (parse-identifier-list tail)))
+ (function-like-macro
+ identifier: identifier
+ variadic?: variadic?
+ identifier-list: identifiers
+ ;; surrounding whitespace is not part of the replacement list
+ ;; (6.10.3 p.7)
+ body: (drop-whitespace-both replacement-list))))
+ (else (object-like-macro
+ identifier: identifier
+ body: (drop-whitespace-both tail))))))))
+
+
+
+
+;; environment, tokens -> environment, tokens
+(define (handle-preprocessing-tokens environment tokens)
+ ;; Prepend a newline to ensure that the token stream always starts with a
+ ;; newline (otherwise guaranteed by how we loop). Decrement line-counter
+ ;; by one to compensate.
+ (let loop ((environment (bump-line environment -1))
+ (tokens (append (lex "\n") tokens)))
+
+ (define (err fmt . args)
+ (scm-error 'cpp-error "handle-preprocessing-tokens"
+ (string-append "~a:~a " fmt)
+ (cons* (current-file environment)
+ (current-line environment)
+ args)
+ #f))
+
+ (cond ((null? tokens) (values environment '()))
+ ((newline-token? (car tokens))
+ (let ((environment (bump-line environment))
+ (tokens* (drop-whitespace (cdr tokens))))
+ (cond ((null? tokens*) (values environment '()))
+ ((equal? "#" (punctuator-token? (car tokens*)))
+ (let ((line-tokens remaining-tokens (tokens-until-eol (cdr tokens*))))
+ ;; drop whitespace after to not "eat" the next newline token
+ (let ((line-tokens (drop-whitespace line-tokens)))
+ (cond ((null? line-tokens)
+ ;; null directive
+ (loop environment remaining-tokens))
+
+ ((in-conditional/inactive? environment)
+ (case (string->symbol (identifier-token? (car line-tokens)))
+ ((ifdef if) (loop (enter-inactive-if environment) remaining-tokens))
+ ((else) (loop (flip-flop-if environment) remaining-tokens))
+ ((endif) (loop (leave-if environment) remaining-tokens))
+ ((elif) (loop (resolve-for-if
+ (leave-if environment)
+ (drop-whitespace (cdr line-tokens)))
+ remaining-tokens))
+ (else (loop environment remaining-tokens))))
+
+ ;; From here on we are not in a comment block
+ (else
+ (let ((directive (string->symbol (identifier-token? (car line-tokens))))
+ (body (drop-whitespace (cdr line-tokens))))
+ (if (eq? 'include directive)
+ ;; include is special since it returns a token stream
+ (let ((path (resolve-header environment body)))
+ ;; TODO change to store source location in lexemes
+ ;; and rewrite the following to
+ ;; (loop environment
+ ;; (append (-> path read-file tokenize) remaining-tokens))
+ ;; TODO and then transfer these source locations when we move
+ ;; to "real" tokens (c to-token)
+ (let ((env* tokens*
+ (loop
+ ;; same hack as at start of loop
+ (-> environment
+ (enter-file path)
+ (bump-line -1))
+ (append (lex "\n")
+ (-> path read-file tokenize)))))
+ (on-snd (append tokens* (abort* (loop (leave-file env*) remaining-tokens))))))
+
+ (let ((operation ; (environment, list token) → environment
+ (case directive
+ ((if) resolve-for-if)
+ ((ifdef)
+ (lambda (env body)
+ ((if (in-environment? env (identifier-token? (car body)))
+ enter-active-if enter-inactive-if)
+ env)))
+ ((ifndef)
+ (lambda (env body)
+ ((if (in-environment? env (identifier-token? (car body)))
+ enter-inactive-if enter-active-if)
+ env)))
+ ;; NOTE possibly validate that body is empty for endif and else
+ ((endif) (lambda (env _)
+ (unless (in-conditional? env)
+ (err "#endif outside conditional"))
+ (leave-if env)))
+ ((else) (lambda (env _)
+ (unless (in-conditional? env)
+ (err "#else outside conditional"))
+ (flip-flop-if env)))
+ ;; ((elif) (lambda ))
+ ((define) resolve-define)
+ ((undef) (lambda (env body) (remove-identifier env (identifier-token? (car body)))))
+ ((line) handle-line-directive)
+ ((error) (lambda (_ tokens)
+ (throw 'cpp-error-directive (unlex tokens))))
+ ((pragma) handle-pragma)
+ (else (err "Unknown preprocessing directive: ~s"
+ (list line-tokens))))))
+ (loop (operation environment body)
+ remaining-tokens)))))))))
+
+ ;; Line is not a pre-processing directive
+ (else (let ((preceding-tokens remaining-tokens (tokens-until-cpp-directive (cdr tokens))))
+ (let* ((env* resolved-tokens (if (in-conditional/inactive? environment)
+ (values environment '())
+ (resolve-token-stream environment preceding-tokens))))
+ (on-snd (append resolved-tokens
+ ;; The initial newline is presreved here, for better output,
+ ;; and to keep at least one whitespace token when there was one previously.
+ ;; possibly also keep a newline for line-directives.
+ (unless (null? remaining-tokens) (lex "\n"))
+ (abort* (loop env* remaining-tokens))))))))))
+
+ (else (err "Unexpected middle of line, (near ~s)"
+ (unlex tokens))))))
+
diff --git a/module/c/to-token.scm b/module/c/to-token.scm
new file mode 100644
index 00000000..f5e459de
--- /dev/null
+++ b/module/c/to-token.scm
@@ -0,0 +1,161 @@
+(define-module (c to-token)
+ :use-module ((srfi srfi-1) :select (fold append-map))
+ :use-module (ice-9 match)
+ :use-module ((hnh util) :select (->))
+ :use-module ((system base lalr)
+ :select (make-lexical-token))
+ :use-module (c cpp-types)
+ :use-module ((c lex2) :select (parse-c-number))
+ ;; :use-module (hnh util type)
+ :use-module ((rnrs bytevectors)
+ :select (make-bytevector
+ bytevector-length
+ bytevector-copy!
+ u8-list->bytevector
+ bytevector-u8-ref))
+ :use-module ((rnrs io ports)
+ :select (string->bytevector
+ make-transcoder
+ utf-8-codec))
+ :export (preprocessing-token->token))
+
+(define (pp-number->c-number token)
+ (parse-c-number (pp-number? token)))
+
+(define keywords
+ '(auto break case char const continue default
+ do double else enum extern float for goto
+ if inline int long register restrict return
+ short signed sizeof static struct switch
+ typedef union unsigned void volatile while
+ _Alignas _Alignof _Atomic _Bool _Complex
+ _Generic _Imaginary _Noreturn _Static_assert
+ _Thread_local))
+
+(define (cpp-char->bytevector c)
+ (match c
+ (`(escape-sequence (simple-escape-sequence ,x))
+ (case (string-ref x 0)
+ ((#\a) #vu8(#x07)) ; #\alarm
+ ((#\b) #vu8(#x08)) ; #\backspace
+ ((#\f) #vu8(#x0C)) ; #\page
+ ((#\n) #vu8(#x0A)) ; #\newline
+ ((#\r) #vu8(#x0D)) ; #\return
+ ((#\t) #vu8(#x09)) ; #\tab
+ ((#\v) #vu8(#x0B)) ; #\vtab
+ ;; ' " ? \
+ (else (char->integer (string-ref x 0)))))
+
+ ;; TODO these u8-list->bytevector should depend on the
+ ;; encoding prefix of the string/char
+ (`(escape-sequence (octal-escape-sequence ,x))
+ (-> x (string->number 8) list u8-list->bytevector))
+
+ (`(escape-sequence (hexadecimal-escape-sequence ,x))
+ (-> x (string->number 16) list u8-list->bytevector))
+
+ (`(escape-sequence (universal-character-name ,x))
+ (let ((n (string->number x 16)))
+ (when (<= #xD800 x #xDFFF)
+ (error))
+ (when (and (< x #xA0)
+ (or (not (= x #x24))
+ (not (= x #x40))
+ (not (= x #x60))))
+ (error))
+ (-> n
+ integer->char
+ string
+ (string->bytevector (make-transcoder (utf-8-codec))))))
+ (_ (error))))
+
+(define (concat-bytevectors bvs)
+ (define target (make-bytevector (apply + (map bytevector-length bvs))))
+ (fold (lambda (bv offset)
+ (let ((len (bytevector-length bv)))
+ (bytevector-copy! bv 0 target offset len)
+ (+ offset len)))
+ 0
+ bvs)
+ target)
+
+(define (handle-string-fragments content)
+ (map
+ (lambda (x) (if (string? x)
+ (string->bytevector x (make-transcoder (utf-8-codec)))
+ (cpp-char->bytevector x)))
+ content))
+
+;; 6.4 paragraph 2
+;; Each preprocessing toket thas is converted to a token shall have the
+;; lexcal form of a keyword, an identifier, a constant, a string literal,
+;; or a puncturtor
+(define (preprocessing-token->token cpp-token)
+ ;; Guile's cond handles multiple from expr, if written on the form
+ ;; (cond (expr check => proc) ...)
+ (cond ((string-token? cpp-token)
+ (lambda (a . _) a)
+ => (lambda (encoding . content)
+ (make-lexical-token
+ 'string-literal #f
+ (concat-bytevectors
+ (append
+ ;; TODO this should depend on encoding
+ (handle-string-fragments content)
+ (list #vu8(0)))))))
+
+ ((identifier-token? cpp-token)
+ => (lambda (name)
+ (let ((name (string->symbol name)))
+ (if (memv name keywords)
+ name
+ (make-lexical-token 'identifier #f name)))))
+
+ ((pp-number? cpp-token)
+ => (lambda (content)
+ ;; TOOD should return an integer-constant or a floating-constant
+ (make-lexical-token 'constant #f (parse-c-number content))))
+
+ ((character-constant? cpp-token)
+ (lambda (a . _) a)
+ => (lambda (encoding . content)
+ (make-lexical-token
+ 'constant #f
+ ;; TODO that to do with multi-byte characters?
+ ;; > 'ab' == 'a' << 8 | 'b' == 0x6162
+ ;; > '\x1234' == 0x1234
+ ;; GCC prints 34 for the following expression
+ ;; > printf("%x\n", '\x1234');
+ ;; but 6162 for this
+ ;; > printf("%x\n", 'ab');
+
+ ;; What about
+ ;; > 'a\x1234' == a << 16 | 'b' == 0x611234
+ (let ((bv (concat-bytevectors
+ ;; TODO this should depend on encoding
+ (handle-string-fragments content))))
+ ;; TODO maybe actually store multiple bytes from multi byte literals
+ (bytevector-u8-ref bv (1- (bytevector-length bv)))))))
+
+ ((punctuator-token? cpp-token)
+ => (lambda (s)
+ (cond ((string=? s "{") 'lbrace)
+ ((string=? s "}") 'rbrace)
+ ((string=? s "[") 'lbrack)
+ ((string=? s "]") 'rbrack)
+ ((string=? s "(") 'lparen)
+ ((string=? s ")") 'rparen)
+ ((string=? s ".") 'dot)
+ ((string=? s "|") 'pipe)
+ ((string=? s "||") 'pipe2)
+ ((string=? s ";") 'semicolon)
+ ((string=? s "|=") 'pipe=)
+ ((string=? s ",") 'comma)
+ ((string=? s "#") 'hash)
+ ((string=? s "##") 'hash2)
+ (else (string->symbol s)))))
+
+ (else
+ (scm-error 'cpp-error "preprocessing-token->token"
+ "Can't convert ~s into a \"regular\" token."
+ (list cpp-token) #f))))
diff --git a/module/c/trigraph.scm b/module/c/trigraph.scm
new file mode 100644
index 00000000..197e01a4
--- /dev/null
+++ b/module/c/trigraph.scm
@@ -0,0 +1,24 @@
+(define-module (c trigraph)
+ :use-module (ice-9 regex)
+ :export (replace-trigraphs))
+
+(define rx (make-regexp "\\?\\?([=\\(\\)'!<>/-])"))
+
+(define (proc m)
+ (case (string-ref (match:substring m 2) 0)
+ ((#\=) "#")
+ ((#\() "[")
+ ((#\)) "]")
+ ((#\') "^")
+ ((#\<) "{")
+ ((#\>) "}")
+ ((#\!) "|")
+ ((#\-) "~")
+ ((#\/) "\\")))
+
+(define (replace-trigraphs string)
+ (call-with-output-string
+ (lambda (port)
+ (regexp-substitute/global
+ port rx string
+ 'pre proc 'post))))
diff --git a/module/c/unlex.scm b/module/c/unlex.scm
new file mode 100644
index 00000000..e467a50f
--- /dev/null
+++ b/module/c/unlex.scm
@@ -0,0 +1,84 @@
+(define-module (c unlex)
+ :use-module (hnh util type)
+ :use-module (ice-9 match)
+ :use-module (c lex2)
+ :use-module (c cpp-types)
+ :use-module (c cpp-util)
+ :use-module ((texinfo string-utils) :select (escape-special-chars))
+ :export (unlex
+ unlex-aggressive
+ stringify-token
+ stringify-tokens))
+
+(define (unlex tokens)
+ (typecheck tokens (list-of lexeme?))
+ (string-concatenate
+ (map (lambda (x) (cond (x preprocessing-token? => stringify-token)
+ ((whitespace-token? x) (lexeme-body x))
+ ((other-token? x) (lexeme-body x))))
+ tokens)))
+
+;; takes a list of preprocessing-token's, and return a "source" string
+(define (unlex-aggressive tokens)
+ (typecheck tokens (list-of lexeme?))
+ (string-concatenate
+ (map (lambda (x)
+ (cond ((preprocessing-token? x) (stringify-token x))
+ ((whitespace-token? x) " ")
+ ((other-token? x) (lexeme-body x))))
+ (squeeze-whitespace tokens))))
+
+(define (stringify-escape-sequence sub-token)
+ (match sub-token
+ (`(simple-escape-sequence ,x)
+ (format #f "\\~a" x))
+ (`(octal-escape-sequence ,x)
+ (format #f "\\~a" x))
+ (`(hexadecimal-escape-sequence ,x)
+ (format #f "\\x~a" x))
+ (`(universal-character-name ,x)
+ (case (string-length x)
+ ((4) (format #f "\\u~a" x))
+ ((8) (format #f "\\U~a" x))))))
+
+(define (stringify-string-tokens fragments)
+ (with-output-to-string
+ (lambda ()
+ (display #\")
+ (for-each (match-lambda
+ (`(escape-sequence ,x)
+ (display (stringify-escape-sequence x)))
+ ;; Backslash in source strings is usually encoded by an
+ ;; 'escape-sequence, but literal backslashes can be in
+ ;; "regular" string fragments as result of the stringification
+ ;; operator (#).
+ (s (display (escape-special-chars s "\"\\" #\\))))
+ fragments)
+ (display #\"))))
+
+;; Returns the "source" of the token, as a preprocessing string literal token
+(define (stringify-token preprocessing-token)
+ (match (lexeme-body preprocessing-token)
+ (('string-literal `(encoding-prefix . ,prefix) parts ...)
+ (stringify-string-tokens parts))
+
+ (`(header-name (q-string ,s))
+ (format #f "~s" s))
+
+ (`(header-name (h-string ,s))
+ (format #f "<~a>" s))
+
+ (`(identifier ,id) id)
+
+ (`(pp-number ,n) n)
+
+ ;; TODO remaining parts
+ (('character-constant `(character-encoding . ,x) c parts ...)
+ (format #f "'~a'" c))
+
+ (`(punctuator ,p) p)))
+
+;; takes a token list, and return a single string literal token
+(define (stringify-tokens tokens)
+ (lexeme type: 'preprocessing-token
+ body: `(string-literal (encoding-prefix) ,(unlex-aggressive tokens))))
diff --git a/module/c/zipper.scm b/module/c/zipper.scm
new file mode 100644
index 00000000..65cea211
--- /dev/null
+++ b/module/c/zipper.scm
@@ -0,0 +1,60 @@
+;;; Commentary:
+;; Zipper data structure. Could be moved to (hnh util), but would then need to
+;; be at least slightly more thorough.
+;;; Code:
+
+(define-module (c zipper)
+ :use-module (srfi srfi-88)
+ :use-module (hnh util object)
+ :export (list-zipper
+ list-zipper?
+ left focused right
+ zip-left
+ zip-right
+ zip-find-right
+ list->zipper
+ zipper->list
+ rezip))
+
+(define-type (list-zipper)
+ (left type: list?)
+ focused
+ (right type: list?))
+
+;; Move zipper one step to the left
+(define (zip-left zipper)
+ (if (null? (left zipper))
+ zipper
+ (list-zipper left: (cdr (left zipper))
+ right: (cons (focused zipper) (right zipper))
+ focused: (car (left zipper)))))
+
+;; Move zipper one step to the right
+(define (zip-right zipper)
+ (if (null? (right zipper))
+ zipper
+ (list-zipper left: (cons (focused zipper) (left zipper))
+ right: (cdr (right zipper))
+ focused: (car (right zipper)))))
+
+;; find first element matching predicate, going right
+(define (zip-find-right predicate zipper)
+ (cond ((null? (right zipper)) zipper)
+ ((predicate (focused zipper)) zipper)
+ (else (zip-find-right predicate (zip-right zipper)))))
+
+(define (list->zipper list)
+ (list-zipper left: '()
+ focused: (car list)
+ right: (cdr list)))
+
+
+(define (rezip zipper)
+ (if (null? (left zipper))
+ zipper
+ (rezip (zip-left zipper))))
+
+(define (zipper->list zipper)
+ (let ((z (rezip zipper)))
+ (cons (focused z)
+ (right z))))
diff --git a/module/calp/html/view/calendar.scm b/module/calp/html/view/calendar.scm
index 9378737f..3d70fb1b 100644
--- a/module/calp/html/view/calendar.scm
+++ b/module/calp/html/view/calendar.scm
@@ -171,7 +171,7 @@ window.default_calendar='~a';"
;; Button to view week
(_ "Week"))
- ,(btn href: (date->string (set (day start-date) 1) "/month/~1.html")
+ ,(btn href: (date->string (day start-date 1) "/month/~1.html")
;; button to view month
(_ "Month"))
diff --git a/module/datetime.scm b/module/datetime.scm
index 8bba6e89..d54ba403 100644
--- a/module/datetime.scm
+++ b/module/datetime.scm
@@ -3,8 +3,6 @@
:replace (second)
:use-module (srfi srfi-1)
- :use-module (srfi srfi-9)
- :use-module (srfi srfi-9 gnu)
:use-module (srfi srfi-41)
:use-module (srfi srfi-71)
:use-module (srfi srfi-88)
@@ -15,12 +13,13 @@
->
->>
swap
- set
label
span-upto
- set->
))
+ :use-module (hnh util object)
+ :use-module (hnh util lens)
+
:use-module (ice-9 i18n)
:use-module (ice-9 format)
:use-module (ice-9 regex)
@@ -37,8 +36,11 @@
datetime
datetime?
- get-date
- get-timezone
+ ;; get-date
+ ;; get-timezone
+ datetime-date
+ datetime-time
+ tz
date-zero?
time-zero?
@@ -171,45 +173,40 @@
pre: (ensure (lambda (x) (<= sun x sat))))
-;;; RECORD TYPES
-
-;;; DATE
-
-(define-immutable-record-type <date>
- (make-date year month day)
- date?
- (year year) (month month) (day day))
-
-(define* (date key: (year 0) (month 0) (day 0))
- (unless (and (integer? year) (integer? month) (integer? day))
- (scm-error 'wrong-type-arg "date"
- "Year, month, and day must all be integers. ~s, ~s, ~s"
- (list year month day)
- #f))
- (make-date year month day))
-(set-record-type-printer!
- <date> (lambda (r p) (display (date->string r "#~1") p)))
-
-
-;;; TIME
-
-(define-immutable-record-type <time>
- (make-time hour minute second)
- time?
- (hour hour) (minute minute) (second second))
-
-(define* (time key: (hour 0) (minute 0) (second 0))
- (unless (and (integer? hour) (integer? minute) (integer? second))
- (scm-error 'wrong-type-arg "time"
- "Hour, minute, and second must all be integers. ~s, ~s, ~s"
- (list hour minute second)
- #f))
- (make-time hour minute second))
+;;; RECORD TYPES
-(set-record-type-printer!
- <time>
- (lambda (r p) (display (time->string r "#~3") p)))
+(define-type (date printer: (lambda (r p) (display (date->string r "#~1") p)))
+ (year default: 0 type: integer?)
+ (month default: 0 type: integer?)
+ (day default: 0 type: integer?))
+
+(define-type (time printer: (lambda (r p) (display (time->string r "#~3") p)))
+ (hour default: 0 type: integer?)
+ (minute default: 0 type: integer?)
+ (second default: 0 type: integer?))
+
+(define (datetime-constructor-constructor constructor validator)
+ (let ((date% date)
+ (time% time))
+ (lambda* (key: date time tz
+ (year 0) (month 0) (day 0)
+ (hour 0) (minute 0) (second 0))
+ (let ((date (or date (date% year: year month: month day: day)))
+ (time (or time (time% hour: hour minute: minute second: second))))
+ (validator date time tz)
+ (constructor date time tz)))))
+
+(define-type (datetime
+ constructor: datetime-constructor-constructor
+ printer: (lambda (r p)
+ (if (and (tz r) (not (string=? "UTC" (tz r))))
+ (write (datetime->sexp r) p)
+ (display (datetime->string r "#~1T~3~Z") p))))
+
+ (datetime-date type: date?)
+ (datetime-time type: time?)
+ tz)
(define (date-zero? date)
@@ -218,53 +215,14 @@
(define (time-zero? time)
(= 0 (hour time) (minute time) (second time)))
-;;; DATETIME
-
-(define-immutable-record-type <datetime>
- (make-datetime date time tz)
- datetime?
- (date get-date)
- (time get-time%)
- (tz tz) ; #f for localtime, "UTC", "Europe/Stockholm", ...
- )
-
-(define (get-timezone datetime)
- (tz datetime))
-
-
-(define* (datetime
- key: date time
- (year 0) (month 0) (day 0)
- (hour 0) (minute 0) (second 0)
- tz)
- (let ((date (or date (make-date year month day)))
- (time (or time (make-time hour minute second))))
- (unless (date? date)
- (scm-error 'wrong-type-arg "datetime"
- "Date must be a date object, got ~s"
- (list date) (list date)))
- (unless (time? time)
- (scm-error 'wrong-type-arg "datetime"
- "Time must be a time object, got ~s"
- (list time) (list time)))
- (make-datetime date time tz)))
-
-(set-record-type-printer!
- <datetime>
- (lambda (r p)
- (if (and (tz r) (not (string=? "UTC" (tz r))))
- (write (datetime->sexp r) p)
- (display (datetime->string r "#~1T~3~Z") p))))
-
-
;; NOTE there isn't any stable way to craft the tm objects.
;; I could call mktime on some date, and replace the fields
;; with the set-tm:*, but that is worse that breaking the API.
(define (datetime->tm datetime)
- (let ((t (get-time% datetime))
- (d (get-date datetime)))
+ (let ((t (datetime-time datetime))
+ (d (datetime-date datetime)))
(vector (second t)
(minute t)
(hour t)
@@ -296,8 +254,8 @@
(define (unix-time->datetime n)
;; tm->datetime returns GMT here (as hinted by the
;; name @var{gmtime}). Blindly change it to UTC.
- (set (tz (tm->datetime (gmtime n)))
- "UTC"))
+ (-> (tm->datetime (gmtime n))
+ (tz "UTC")))
;; this returns UTC time, with a TZ component set to "UTC"
@@ -305,7 +263,7 @@
(unix-time->datetime ((@ (guile) current-time))))
(define (current-date)
- (get-date (current-datetime)))
+ (datetime-date (current-datetime)))
@@ -324,10 +282,11 @@
[(string=? "local" (tz dt)) (mktime v)]
[else (mktime v (tz dt))])))))
;; strip tz-name, to conform with my local time.
- (set (tz (tm->datetime tm)) #f))))
+ (-> (tm->datetime tm)
+ (tz #f)))))
(define (as-date date/-time)
- (cond [(datetime? date/-time) (get-date date/-time)]
+ (cond [date/-time datetime? => datetime-date]
[(date? date/-time) date/-time]
[(time? date/-time) (date)]
[else (scm-error 'wrong-type-arg
@@ -337,7 +296,7 @@
#f)]))
(define (as-time date/-time)
- (cond [(datetime? date/-time) (get-time% date/-time)]
+ (cond [date/-time datetime? => datetime-time]
[(date? date/-time) (time)]
[(time? date/-time) date/-time]
[else (scm-error 'wrong-type-arg "as-time"
@@ -379,15 +338,15 @@
366 365))
(define (start-of-month date)
- (set (day date) 1))
+ (-> date (day 1)))
(define (end-of-month date)
- (set (day date) (days-in-month date)))
+ (-> date (day (days-in-month date))))
(define (start-of-year date)
- (set-> date
- (day 1)
- (month 1)))
+ (-> date
+ (day 1)
+ (month 1)))
(define (date-stream date-increment start-day)
(stream-iterate (lambda (d) (date+ d date-increment))
@@ -624,10 +583,10 @@
(prev-month-len (days-in-month (date- date* (date month: 1))))
(month-start (modulo (- (week-day date*) week-start) 7)))
(values
- (map (lambda (d) (set (day (date- date* (date month: 1))) d))
+ (map (lambda (d) (-> date* (date- (date month: 1)) (day d)))
(iota month-start (1+ (- prev-month-len month-start))))
- (map (lambda (d) (set (day date*) d)) (iota month-len 1))
- (map (lambda (d) (set (day (date+ date* (date month: 1))) d))
+ (map (lambda (d) (day date* d)) (iota month-len 1))
+ (map (lambda (d) (-> date* (date+ (date month: 1)) (day d)))
(iota (modulo (- (* 7 5) month-len month-start) 7) 1)))))
@@ -664,17 +623,17 @@
(let ((date-diff
(cond [start-date
- (let ((end-date (date+ start-date (get-date dt))))
- (1- (days-in-interval start-date end-date))) ]
- [(or (not (zero? (month (get-date dt))))
- (not (zero? (year (get-date dt)))))
+ (let ((end-date (date+ start-date (datetime-date dt))))
+ (1- (days-in-interval start-date end-date)))]
+ [(or (not (zero? (month (datetime-date dt))))
+ (not (zero? (year (datetime-date dt)))))
(scm-error 'misc-error "datetime->decimal-hour"
"Multi-month intervals only supported when start-date is given (~a)"
(list dt)
#f)]
- [else (day (get-date dt))])))
- (+ (time->decimal-hour (get-time% dt))
- (* date-diff 24))))
+ [else (-> dt datetime-date day)])))
+ (-> dt datetime-time time->decimal-hour
+ (+ (* date-diff 24)))))
;; Returns a list of all dates from start to end.
;; both inclusive
@@ -693,8 +652,8 @@
(fmt "~1T~3")
(locale %global-locale)
key: allow-unknown?)
- (define date (get-date datetime))
- (define time (get-time% datetime))
+ (define date (datetime-date datetime))
+ (define time (datetime-time datetime))
(with-output-to-string
(lambda ()
(fold (lambda (token state)
@@ -718,7 +677,7 @@
((#\a) (display (week-day-name (week-day date) 3 locale: locale)))
((#\B) (display (locale-month (month date) locale)))
((#\b) (display (locale-month-short (month date) locale)))
- ((#\Z) (when (equal? "UTC" (get-timezone datetime)) (display "Z")))
+ ((#\Z) (when (equal? "UTC" (tz datetime)) (display "Z")))
(else (unless allow-unknown?
(scm-error 'misc-error "datetime->string"
"Invalid format token ~a"
@@ -777,14 +736,6 @@ Returns -1 on failure"
(define* (loop str fmt dt optional: (ampm ampm))
(loop* str fmt dt ampm))
- (define time (get-time% dt))
- (define date (get-date dt))
- (define zone (get-timezone dt))
- (define (as-dt dt)
- (cond [(date? dt) (datetime date: dt time: time tz: zone)]
- [(time? dt) (datetime date: date time: dt tz: zone)]
- [else dt]))
-
(cond [(and (null? str) (null? fmt))
(ampm dt)]
[(null? str)
@@ -811,7 +762,7 @@ Returns -1 on failure"
(if (eq? #\Z (car str))
(loop (cdr str)
(cddr fmt)
- (set (tz dt) "UTC"))
+ (tz dt "UTC"))
(loop str
(cddr fmt)
dt))]
@@ -825,17 +776,13 @@ Returns -1 on failure"
(case (string-ref (match:substring m 1) 0)
((#\a #\A)
(lambda (dt)
- (datetime date: (get-date dt)
- time: (if (= 12 (hour (get-time% dt)))
- (set (hour (get-time% dt)) 0)
- (get-time% dt)))))
+ (modify* dt datetime-time hour
+ (lambda (x) (if (= x 12) 0 x)))))
((#\p #\P)
(lambda (dt)
- (datetime date: (get-date dt)
- time: (if (= 12 (hour (get-time% dt)))
- (get-time% dt)
- (set (hour (get-time% dt))
- (+ 12 (hour (get-time% dt))))))))))
+ (modify* dt datetime-time hour
+ (lambda (x) (if (= x 12)
+ x (+ x 12))))))))
))
;; fail here?
(else (loop str (cddr fmt) dt)))
@@ -853,8 +800,8 @@ Returns -1 on failure"
((next-char rest ...) (span (lambda (c) (not (eqv? c next-char))) str)))))
(loop post
(cddr fmt)
- (as-dt (set (month date)
- (parse-month (list->string head) locale)))))]
+ (set dt datetime-date month
+ (parse-month (list->string head) locale))))]
[(#\H #\M #\S #\m #\d)
;; This captures both the possibility of a date with a single digit,
;; e.g. 7 may, but also compact, digits only, form without delimiters,
@@ -864,13 +811,14 @@ Returns -1 on failure"
(loop
post
(cddr fmt)
- (as-dt
- (case (cadr fmt)
- [(#\H) (set (hour time) num)]
- [(#\M) (set (minute time) num)]
- [(#\S) (set (second time) num)]
- [(#\m) (set (month date) num)]
- [(#\d) (set (day date) num)]))))]
+ (let ((lens
+ (case (cadr fmt)
+ [(#\H) (lens-compose datetime-time hour)]
+ [(#\M) (lens-compose datetime-time minute)]
+ [(#\S) (lens-compose datetime-time second)]
+ [(#\m) (lens-compose datetime-date month)]
+ [(#\d) (lens-compose datetime-date day)])))
+ (set dt lens num))))]
[(#\Y)
(let* ((pre post (span-upto 4 char-numeric? str))
@@ -878,7 +826,7 @@ Returns -1 on failure"
(loop
post
(cddr fmt)
- (as-dt (set (year date) num))))]
+ (set dt datetime-date year num)))]
[else (err "Unimplemented or incorrect parse token ~S" str)])]
[else
@@ -894,11 +842,11 @@ Returns -1 on failure"
(define* (string->time str optional: (fmt "~H:~M:~S") (locale %global-locale)
key: return-trailing)
- (get-time% (string->datetime str fmt locale return-trailing: return-trailing)))
+ (datetime-time (string->datetime str fmt locale return-trailing: return-trailing)))
(define* (string->date str optional: (fmt "~Y-~m-~d") (locale %global-locale)
key: return-trailing)
- (get-date (string->datetime str fmt locale return-trailing: return-trailing)))
+ (datetime-date (string->datetime str fmt locale return-trailing: return-trailing)))
;; Parse @var{string} as either a date, time, or date-time.
;; String MUST be on iso-8601 format.
@@ -924,7 +872,7 @@ Returns -1 on failure"
(let ((dt (string->datetime str "~Y~m~dT~H~M~S~Z")))
(if (tz dt)
dt
- (set (tz dt) zone))))
+ (tz dt zone))))
(define (parse-iso-date str)
(string->date str))
@@ -949,8 +897,8 @@ Returns -1 on failure"
second: ,(second t)))
(define* (datetime->sexp dt optional: verbose)
- `(datetime date: ,(if verbose (date->sexp (get-date dt)) (get-date dt))
- time: ,(if verbose (time->sexp (get-time% dt)) (get-time% dt))
+ `(datetime date: ,(if verbose (date->sexp (datetime-date dt)) (datetime-date dt))
+ time: ,(if verbose (time->sexp (datetime-time dt)) (datetime-time dt))
tz: ,(tz dt)))
@@ -992,8 +940,8 @@ Returns -1 on failure"
(define (datetime= . args)
(reduce (lambda (a b)
- (and (date= (get-date a) (get-date b))
- (time= (get-time% a) (get-time% b))
+ (and (date= (datetime-date a) (datetime-date b))
+ (time= (datetime-time a) (datetime-time b))
a))
#t args))
@@ -1053,16 +1001,16 @@ Returns -1 on failure"
(define datetime<
(fold-comparator
(lambda (a b)
- (if (date= (get-date a) (get-date b))
- (time< (get-time% a) (get-time% b))
- (date< (get-date a) (get-date b))))))
+ (if (date= (datetime-date a) (datetime-date b))
+ (time< (datetime-time a) (datetime-time b))
+ (date< (datetime-date a) (datetime-date b))))))
(define datetime<=
(fold-comparator
(lambda (a b)
- (if (date= (get-date a) (get-date b))
- (time<= (get-time% a) (get-time% b))
- (date<= (get-date a) (get-date b))))))
+ (if (date= (datetime-date a) (datetime-date b))
+ (time<= (datetime-time a) (datetime-time b))
+ (date<= (datetime-date a) (datetime-date b))))))
(define date/-time<
(fold-comparator
@@ -1126,19 +1074,20 @@ Returns -1 on failure"
(let loop ((target base) (change change))
(if (>= (days-in-month target) (+ (day change) (day target)))
;; No date overflow, just add the change
- (values (set-> target (day = (+ (day change))))
- (set-> change (day 0)))
+ (values (-> target (day (+ (day target)
+ (day change))))
+ (day change 0))
;; Date (and possibly year) overflow
(loop (if (= 12 (month target))
- (set-> target
- (year = (+ 1))
- (month 1)
- (day 1))
- (set-> target
- (month = (+ 1))
- (day 1)))
- (set-> change
- (day = (- (1+ (- (days-in-month target) (day target))))))))))
+ (-> (modify target year 1+)
+ (month 1)
+ (day 1))
+ (-> (modify target month 1+)
+ (day 1)))
+ (modify change day -
+ (- (day target))
+ (days-in-month target)
+ 1)))))
(define-values (month-fixed change**)
(if (date-zero? change*)
@@ -1146,20 +1095,19 @@ Returns -1 on failure"
(let loop ((target days-fixed) (change change*))
(if (< 12 (+ (month change) (month target)))
;; if we overflow into the next year
- (loop (set-> target
- (year = (+ 1))
- (month 1))
- (set (month change) = (- (- 13 (month target)))))
-
+ (loop (-> (modify target year 1+)
+ (month 1))
+ (modify change month
+ + (month target) -13))
;; if we don't overflow our date
- (values (set (month target) = (+ (month change)))
- (set (month change) 0))
+ (values (modify target month + (month change))
+ (month change 0))
))))
;; change** should here should have both month and date = 0
- (set (year month-fixed) = (+ (year change**))))
+ (year month-fixed (+ (year month-fixed) (year change**))))
(define (date+% change base)
@@ -1188,33 +1136,30 @@ Returns -1 on failure"
(define-values (days-fixed change*)
(let loop ((target base) (change change))
(if (>= (day change) (day target))
- (let ((new-change (set (day change) = (- (day target)))))
+ (let ((new-change (modify change day - (day target))))
(loop (if (= 1 (month target))
- (set-> target
- (year = (- 1))
- (month 12)
- (day 31) ; days in december
- )
- (set-> target
- (month = (- 1))
- (day (days-in-month (set (month target) = (- 1))))))
+ (-> (modify target year 1-)
+ (month 12)
+ (day 31) ; days in december
+ )
+ (let ((nm (modify target month 1-)))
+ (day nm (days-in-month nm))))
new-change))
- (values (set (day target) = (- (day change)))
- (set (day change) 0)))))
+ (values (modify target day - (day change))
+ (day change 0)))))
(define-values (month-fixed change**)
(let loop ((target days-fixed) (change change*))
(if (>= (month change) (month target))
- (loop (set-> target
- (year = (- 1))
- (month 12))
- (set (month change) = (- (month target))))
- (values (set (month target) = (- (month change)))
- (set (month change) 0)))))
+ (loop (-> (modify target year 1-)
+ (month 12))
+ (modify change month - (month target)))
+ (values (modify target month - (month change))
+ (month change 0)))))
;; change** should here should have both month and date = 0
- (set (year month-fixed) = (- (year change**))))
+ (modify month-fixed year - (year change**)))
(define (date-% change base)
@@ -1248,28 +1193,28 @@ Returns -1 on failure"
;; while (day base) > (days-in-month base)
;; month++; days -= (days-in-month base)
(define second-fixed
- (let loop ((target (set (second base) = (+ (second change)))))
+ (let loop ((target (modify base second + (second change))))
(if (>= (second target) 60)
- (loop (set-> target
- (minute = (+ 1))
- (second = (- 60))))
+ (loop (-> target
+ (modify minute 1+)
+ (modify second - 60)))
target)))
;; while (month base) > 12
;; year++; month -= 12
(define minute-fixed
- (let loop ((target (set (minute second-fixed) = (+ (minute change)))))
+ (let loop ((target (modify second-fixed minute + (minute change))))
(if (>= (minute target) 60)
- (loop (set-> target
- (hour = (+ 1))
- (minute = (- 60))))
+ (loop (-> target
+ (modify hour 1+)
+ (modify minute - 60)))
target)))
- (define hour-almost-fixed (set (hour minute-fixed) = (+ (hour change))))
+ (define hour-almost-fixed (modify minute-fixed hour + (hour change)))
(if (<= 24 (hour hour-almost-fixed))
(let ((div remainder (floor/ (hour hour-almost-fixed) 24)))
- (values (set (hour hour-almost-fixed) remainder) div))
+ (values (hour hour-almost-fixed remainder) div))
(values hour-almost-fixed 0)))
;;; PLUS
@@ -1289,28 +1234,26 @@ Returns -1 on failure"
(define-values (second-fixed change*)
(let loop ((target base) (change change))
(if (> (second change) (second target))
- (loop (set-> target
- (minute = (- 1))
- (second 60))
- (set (second change) = (- (second target))))
- (values (set (second target) = (- (second change)))
- (set (second change) 0)))))
+ (loop (-> (modify target minute 1-)
+ (second 60))
+ (modify change second - (second target)))
+ (values (modify target second - (second change))
+ (second change 0)))))
(define-values (minute-fixed change**)
(let loop ((target second-fixed) (change change*))
(if (> (minute change) (minute target))
- (loop (set-> target
- (hour = (- 1))
- (minute 60))
- (set (minute change) = (- (minute target))))
- (values (set (minute target) = (- (minute change)))
- (set (minute change) 0)))))
+ (loop (-> (modify target hour 1-)
+ (minute 60))
+ (modify change minute - (minute target)))
+ (values (modify target minute - (minute change))
+ (minute change 0)))))
(if (>= (hour minute-fixed) (hour change**))
- (values (set (hour minute-fixed) = (- (hour change**))) 0)
+ (values (modify minute-fixed hour - (hour change**)) 0)
(let ((diff (- (hour minute-fixed)
(hour change**))))
- (values (set (hour minute-fixed) (modulo diff 24))
+ (values (hour minute-fixed (modulo diff 24))
(abs (floor (/ diff 24)))))))
;; Goes backwards from base, returning the two values:
@@ -1331,21 +1274,20 @@ Returns -1 on failure"
(define (datetime+ base change)
- (let ((time overflow (time+ (get-time% base) (get-time% change))))
- (datetime date: (date+ (get-date base)
- (get-date change)
- (date day: overflow))
- time: time
- tz: (get-timezone base)
- )))
+ (let ((time* overflow (time+ (datetime-time base) (datetime-time change))))
+ (-> base
+ (modify datetime-date date+
+ (datetime-date change)
+ (date day: overflow))
+ (datetime-time time*))))
(define (datetime- base change)
- (let ((time underflow (time- (get-time% base) (get-time% change))))
- (datetime date: (date- (get-date base)
- (get-date change)
- (date day: underflow))
- time: time
- tz: (tz base))))
+ (let ((time* underflow (time- (datetime-time base) (datetime-time change))))
+ (-> base
+ (modify datetime-date date-
+ (datetime-date change)
+ (date day: underflow))
+ (datetime-time time*))))
;;; the *-difference procedures takes two actual datetimes.
;;; date- instead takes a date and a delta (but NOT an actual date).
@@ -1357,20 +1299,18 @@ Returns -1 on failure"
(define-values (b* a*)
(let loop ((b b) (a a))
(if (> (day a) (day b))
- (let ((new-a (set (day a) = (- (1+ (day b))))))
+ (let ((new-a (day a (- (day a) (day b) 1))))
(loop (if (= 0 (month b))
- (set-> b
- (year = (- 1))
- (month 11)
- (day 30) ; Last day in december
- )
- (set-> b
- (month = (- 1))
- (day (1- (days-in-month b))))) ; last in prev month
+ (-> (modify b year 1-)
+ (month 11)
+ (day 30) ; Last day in december
+ )
+ (-> (modify b month 1-)
+ (day (1- (days-in-month b))))) ; last in prev month
new-a))
;; elif (> (day b) (day a))
- (values (set (day b) = (- (day a)))
- (set (day a) 0)))))
+ (values (day b (- (day b) (day a)))
+ (day a 0)))))
;; (day a*) should be 0 here.
@@ -1378,17 +1318,16 @@ Returns -1 on failure"
(define-values (b** a**)
(let loop ((b b*) (a a*))
(if (> (month a) (month b))
- (loop (set-> b
- (year = (- 1))
- (month 11))
- (set (month a) = (- (1+ (month b)))))
+ (loop (-> (modify b year 1-)
+ (month 11))
+ (modify a month - 1 (month b)))
;; elif (> (month b) (month a))
- (values (set (month b) = (- (month a)))
- (set (month a) 0)))))
+ (values (modify b month - (month a))
+ (month a 0)))))
;; a** should here should have both month and date = 0
- (set (year b**) = (- (year a**))))
+ (year b** (- (year b**) (year a**))))
@@ -1407,20 +1346,21 @@ Returns -1 on failure"
(list earlier-date later-date)
#f))
- (date-difference% (set-> later-date
- (month = (- 1))
- (day = (- 1)))
- (set-> earlier-date
- (month = (- 1))
- (day = (- 1)))))
+ (let ((proc (lambda (d) (-> d
+ (modify month 1-)
+ (modify day 1-)))))
+ (date-difference% (proc later-date)
+ (proc earlier-date))))
;; NOTE, this is only properly defined when end is greater than start.
(define (datetime-difference end start)
;; NOTE Makes both start and end datetimes in the current local time.
- (let ((fixed-time overflow (time- (get-time% end)
- (get-time% start))))
- (datetime date: (date-difference (date- (get-date end)
+ (let ((fixed-time overflow (time- (datetime-time end)
+ (datetime-time start))))
+ (datetime date: (date-difference (date- (datetime-date end)
(date day: overflow))
- (get-date start))
- time: fixed-time)))
+ (datetime-date start))
+ time: fixed-time
+ ;; TODO TZ
+ )))
diff --git a/module/datetime/zic.scm b/module/datetime/zic.scm
index 66c0ba06..1c9b34ee 100644
--- a/module/datetime/zic.scm
+++ b/module/datetime/zic.scm
@@ -342,17 +342,17 @@
(datetime
date:
(match (rule-on rule)
- ((? number? on) (set (day d) on))
+ ((? number? on) (day d on))
(('last n)
(iterate (lambda (d) (date- d (date day: 1)))
(lambda (d) (eqv? n (week-day d)))
- (set (day d) (days-in-month d))))
+ (day d (days-in-month d))))
(((? (lambda (x) (memv x '(< >))) <>) wday base-day)
(iterate (lambda (d) ((if (eq? '< <>)
date- date+)
d (date day: 1)))
(lambda (d) (eqv? wday (week-day d)))
- (set (day d) base-day))))
+ (day d base-day))))
tz: (case (timespec-type (rule-at rule))
((#\w) #f)
((#\s) (warning (_ "what even is \"Standard time\"‽")) #f)
diff --git a/module/hnh/util.scm b/module/hnh/util.scm
index d2c0dd5f..096e38c5 100644
--- a/module/hnh/util.scm
+++ b/module/hnh/util.scm
@@ -34,6 +34,9 @@
group-by
split-by
+ split-by-one-of
+
+ break/all
span-upto
cross-product
@@ -341,6 +344,38 @@
(cdr rem))])))
+(define (split-by-one-of lst items)
+ (cond ((null? items)
+ (scm-error 'wrong-type-arg "split-by-one-of"
+ "Must have at least one item to split by, when splitting ~s"
+ (cons items '()) #f))
+ ((not (list? items))
+ (scm-error 'wrong-type-arg "split-by-one-of"
+ "Items must be list of list of symbols, got ~s"
+ (list items) #f))
+ (else
+ (call-with-values
+ (lambda ()
+ (car+cdr
+ (let loop ((token 'sentinel-token) (lst lst))
+ (let ((head tail (break (lambda (item) (memv item items))
+ lst)))
+ (let ((group (cons token head)))
+ (if (null? tail)
+ (list group)
+ (cons group (loop (car tail) (cdr tail)))))))))
+ ;; Remove the sentinel token
+ (lambda (first rest) (cons (cdr first) rest))))))
+
+
+(define (break/all predicate lst)
+ (let loop ((lst lst))
+ (if (null? lst)
+ '(())
+ (let ((fst rest (break predicate lst)))
+ (if (null? rest)
+ (list fst)
+ (cons fst (loop (cdr rest))))))))
;; Simar to span from srfi-1, but never takes more than
;; @var{count} items. Can however still take less.
diff --git a/module/hnh/util/io.scm b/module/hnh/util/io.scm
index d73a1de8..2fbad39f 100644
--- a/module/hnh/util/io.scm
+++ b/module/hnh/util/io.scm
@@ -1,11 +1,12 @@
(define-module (hnh util io)
:use-module ((hnh util) :select (begin1))
- :use-module ((ice-9 rdelim) :select (read-line))
+ :use-module ((ice-9 rdelim) :select (read-line read-string))
:export (open-input-port
open-output-port
read-lines
with-atomic-output-to-file
- call-with-tmpfile))
+ call-with-tmpfile
+ read-file))
(define (open-input-port str)
(if (string=? "-" str)
@@ -72,3 +73,6 @@
(begin1
(proc port filename)
(close-port port))))))
+
+(define (read-file path)
+ (call-with-input-file path read-string))
diff --git a/module/hnh/util/lens.scm b/module/hnh/util/lens.scm
new file mode 100644
index 00000000..7a8fbd19
--- /dev/null
+++ b/module/hnh/util/lens.scm
@@ -0,0 +1,99 @@
+(define-module (hnh util lens)
+ :use-module (srfi srfi-1)
+ :export (modify
+ modify*
+ set
+ get
+
+ identity-lens
+ compose-lenses
+ lens-compose
+
+ ref car* cdr*))
+
+
+(define (modify object lens f . args)
+ (lens object (apply f (lens object) args)))
+
+(define-syntax modify*
+ (syntax-rules ()
+ ((_ object f) (f object))
+ ((_ object lens rest ...)
+ (modify object lens
+ (lambda (object*) (modify* object* rest ...))))))
+
+;; The simple case of getting and setting when you already have the lens is trivial
+;; (lens object)
+;; (lens object new-value)
+
+(define-syntax set
+ (syntax-rules ()
+ ((_ object lenses ... value)
+ (modify* object lenses ... (const value)))))
+
+(define-syntax get
+ (syntax-rules ()
+ ((_ object) object)
+ ((_ object f lenses ...)
+ (get (f object) lenses ...))))
+
+
+
+
+(define (make-lens getter setter)
+ (case-lambda ((datum) (getter datum))
+ ((datum new-value) (setter datum new-value))))
+
+(define-syntax build-lens
+ (syntax-rules ()
+ ((_ (getter gargs ...)
+ (setter sargs ...))
+ ;; (make-lens (lambda (datum) (getter datum gargs ...))
+ ;; (lambda (datum new-value) (setter datum sargs ... new-value)))
+ (case-lambda ((datum)
+ (getter datum gargs ...))
+ ((datum new-value)
+ (setter datum sargs ... new-value))))
+ ((_ (getter args ...) setter)
+ (build-accesor (getter args ...) (setter)))
+ ((_ getter (setter args ...))
+ (build-lens (getter) (setter args ...)))
+ ((_ getter setter)
+ (build-lens (getter) (setter)))))
+
+
+
+
+(define identity-lens
+ (case-lambda ((a) a)
+ ((_ a) a)))
+
+(define (compose-lenses% f g)
+ (build-lens (get f g) (set f g)))
+
+(define (compose-lenses . fs)
+ (reduce-right compose-lenses% identity-lens fs))
+
+(define lens-compose compose-lenses)
+
+
+
+(define (list-change list index value)
+ (cond ((zero? index)
+ (cons value (cdr list)))
+ ((null? list)
+ (scm-error 'out-of-range "list-change" "" #f #f))
+ (else
+ (cons (car list)
+ (list-change (cdr list)
+ (1- index)
+ value)))))
+
+
+
+(define (ref idx)
+ (build-lens (list-ref idx) (list-change idx)))
+
+
+(define car* (make-lens car (lambda (pair value) (cons value (cdr pair)))))
+(define cdr* (make-lens cdr (lambda (pair value) (cons (car pair) value))))
diff --git a/module/hnh/util/object.scm b/module/hnh/util/object.scm
new file mode 100644
index 00000000..4477b462
--- /dev/null
+++ b/module/hnh/util/object.scm
@@ -0,0 +1,169 @@
+(define-module (hnh util object)
+ :use-module (srfi srfi-9 gnu)
+ :use-module (ice-9 curried-definitions)
+ :use-module (hnh util)
+ :use-module (hnh util type)
+ :export (define-type))
+
+
+
+;; If given a syntax list extract the first lexeme, if given a "symbol", return that.
+(define (syntax-first stx)
+ (syntax-case stx ()
+ ((a rest ...) #'a)
+ (a #'a)))
+
+(define (construct-syntax stx base transform)
+ (->> base
+ syntax->datum
+ (format #f transform)
+ string->symbol
+ (datum->syntax stx)))
+
+;; stx should be a syntax object of a key-value list on the form
+;; (key: value key2: value2)
+;; and target-key the datum which the target key unwraps to.
+;; returns the corresponding values syntax
+;; or #f if none is found
+(define (kv-ref stx target-key)
+ (syntax-case stx ()
+ ((key value rest ...)
+ (if (eqv? target-key (syntax->datum #'key))
+ #'value
+ (kv-ref #'(rest ...) target-key)))
+ (_ #f)))
+
+
+
+;; Given (x type: predicate?), expand to a single `unless' form (otherwise #f)
+(define-syntax (validator stx)
+ (syntax-case stx ()
+ ((_ (name kvs ...))
+ (cond ((kv-ref #'(kvs ...) type:)
+ => (lambda (type-stx)
+ (with-syntax ((type type-stx))
+ #'(unless (build-validator-body name type)
+ (scm-error 'wrong-type-arg "validator"
+ "Invalid value for `~s'. Expected ~s, got ~s"
+ (list (quote name) (quote type) name) #f)))))
+ (else #f)))
+ ((_ name) #f)))
+
+
+
+
+;; Get syntax for getter-procedure's symbol
+(define-syntax (field-get stx)
+ (syntax-case stx ()
+ ;; ((_ (name kv ...)) #'(field-get name))
+ ((_ type-name name)
+ (->>
+ (format #f "~a-~a-get"
+ (syntax->datum #'type-name)
+ (syntax->datum #'name))
+ string->symbol
+ (datum->syntax stx)))))
+
+;; get syntax for setter-procedure's symbol
+(define-syntax (field-set stx)
+ (syntax-case stx ()
+ ;; ((_ (name kv ...)) #'(field-set name))
+ ((_ type-name name)
+ (->>
+ (format #f "~a-~a-set"
+ (syntax->datum #'type-name)
+ (syntax->datum #'name))
+ string->symbol
+ (datum->syntax stx)))))
+
+;; Construct a field line for define-immutable-record-type
+(define ((field-declaration type) stx)
+ (syntax-case stx ()
+ (name
+ (with-syntax ((name-get (->> (format #f "~a-~a-get"
+ (syntax->datum type)
+ (syntax->datum #'name))
+ string->symbol
+ (datum->syntax stx)))
+ (name-set (->> (format #f "~a-~a-set"
+ (syntax->datum type)
+ (syntax->datum #'name))
+ string->symbol
+ (datum->syntax stx))))
+ #'(name name-get name-set)))))
+
+;; Accessors are procedures for getting and setting fields in records
+(define-syntax (build-accessor stx)
+ (syntax-case stx ()
+ ((_ type-name (name kvs ...))
+ #'(define name
+ (case-lambda ((datum)
+ ((field-get type-name name) datum))
+ ((datum new-value)
+ ;; validator uses the first field (in the list) as both
+ ;; the name of the field, and a reference to the value of
+ ;; the field. This ensures those two are the same for validator,
+ ;; while keeping name bound to the accessor in the outer scope.
+ (let ((name new-value))
+ (validator (name kvs ...)))
+ ((field-set type-name name) datum new-value)))))
+ ((_ type-name name) #'(build-accessor type-name (name)))))
+
+
+;; Go from my concept of field deffinitions, to what lambda* wants as arguments
+(define (lambda*-stx field)
+ (syntax-case field ()
+ ((name kvs ...)
+ (cond ((kv-ref #'(kvs ...) default:)
+ => (lambda (dflt) #`(name #,dflt)))
+ (else #'name)))
+ (name #'name)))
+
+
+
+(define-syntax (define-type stx)
+ (syntax-case stx ()
+ ((_ (name attribute ...) field ...)
+ ;; These names SHOULD leak
+ (with-syntax ((<type>? (construct-syntax stx #'name "~a?")))
+ ;; These names are manually constructed, since generated identifiers are
+ ;; only dependant on the source from which they orginate, which leads to
+ ;; multiple instances of <type> being equal for similar types...
+ ;; See the manual 6.10.10 Hygiene and the Top-Level
+ (with-syntax ((<type> (construct-syntax stx #'name "<~a>"))
+ (make-<type> (construct-syntax stx #'name "make-~a%")))
+ #`(begin
+ (define-immutable-record-type <type>
+ (make-<type> #,@(map syntax-first #'(field ...)))
+ <type>?
+ #,@(map (field-declaration #'name)
+ (map syntax-first #'(field ...))))
+
+ ;; User-facing constructor
+ (define name
+ #,(cond ((kv-ref #'(attribute ...) constructor:)
+ => (lambda (constructor-builder)
+ #`(#,constructor-builder
+ ;; primitive constructor
+ make-<type>
+ ;; Type validator
+ (lambda #,(map syntax-first #'(field ...))
+ (validator field) ...))))
+ (else #`(lambda* (key: #,@(map lambda*-stx #'(field ...)))
+ ;; Type validators
+ (validator field) ...
+ (make-<type> #,@(map syntax-first #'(field ...)))))))
+
+ ;; Field accessors
+ (build-accessor name field) ...
+
+ ;; if printer in attribute
+ #,@(cond ((kv-ref #'(attribute ...) printer:)
+ => (lambda (printer)
+ (list #`(set-record-type-printer! <type> #,printer))))
+ (else '()))))))
+
+ ;; else, type name without extra attributes
+ #;
+ ((_ name field ...)
+ #'(define-type (name) field ...))))
diff --git a/module/hnh/util/type.scm b/module/hnh/util/type.scm
new file mode 100644
index 00000000..50008a3a
--- /dev/null
+++ b/module/hnh/util/type.scm
@@ -0,0 +1,54 @@
+(define-module (hnh util type)
+ :use-module ((srfi srfi-1) :select (every))
+ :export (build-validator-body
+ list-of pair-of
+ of-type?
+ typecheck
+ current-procedure-name))
+
+(define-syntax list-of
+ (syntax-rules ()
+ ((_ variable (rule ...))
+ (and (list? variable)
+ (every (lambda (x) (build-validator-body x (rule ...))) variable)))
+ ((_ variable rule)
+ (and (list? variable)
+ (every rule variable)))))
+
+(define-syntax-rule (pair-of variable a b)
+ (and (pair? variable)
+ (build-validator-body (car variable) a)
+ (build-validator-body (cdr variable) b)))
+
+;; DSL for specifying type predicates
+;; Basically a procedure body, but the variable to test is implicit.
+(define-syntax build-validator-body
+ (syntax-rules (and or not)
+ ((_ variable (and clauses ...)) (and (build-validator-body variable clauses) ...))
+ ((_ variable (or clauses ...)) (or (build-validator-body variable clauses) ...))
+ ((_ variable (not clause)) (not (build-validator-body variable clause)))
+ ((_ variable (proc args ...)) (proc variable args ...))
+ ((_ variable proc) (proc variable))))
+
+(define-syntax-rule (current-procedure-name)
+ ;; 1 since make-stack is at top of stack
+ (frame-procedure-name (stack-ref (make-stack #t) 1)))
+
+(define-syntax of-type?
+ (syntax-rules ()
+ ((_ variable type-spec)
+ (build-validator-body variable type-spec))
+ ((_ type-spec)
+ (lambda (x) (build-validator-body x type-spec)))))
+
+(define-syntax typecheck
+ (syntax-rules ()
+ ((_ variable type-clause)
+ (let ((procedure-name (current-procedure-name)))
+ (typecheck variable type-clause procedure-name)))
+ ((_ variable type-clause procedure-name)
+ (unless (build-validator-body variable type-clause)
+ (scm-error 'wrong-type-arg procedure-name
+ "Invalid value for ~s. Expected ~s, got ~s"
+ (list (quote variable) (quote type-clause) variable)
+ #f)))))
diff --git a/module/hnh/util/values.scm b/module/hnh/util/values.scm
new file mode 100644
index 00000000..79f06cff
--- /dev/null
+++ b/module/hnh/util/values.scm
@@ -0,0 +1,27 @@
+(define-module (hnh util values)
+ :use-module (ice-9 control)
+ :export (abort* on-fst on-snd apply/values)
+ )
+
+
+(define-syntax-rule (abort* form)
+ (call-with-values (lambda () form) abort))
+
+
+;; (on-fst (+ 2 (abort* (values 3 4))))
+;; ⇒ 5 ⇒ 4
+
+(define-syntax-rule (on-fst form)
+ (% form
+ (lambda (prompt fst . rest)
+ (apply values (prompt fst) rest))))
+
+(define-syntax-rule (on-snd form)
+ (% form
+ (lambda (prompt fst snd . rest)
+ (apply values fst (prompt snd) rest))))
+
+
+(define-syntax-rule (apply/values proc form)
+ (call-with-values (lambda () form)
+ proc))
diff --git a/module/vcomponent/datetime/output.scm b/module/vcomponent/datetime/output.scm
index 614438da..fb3d0478 100644
--- a/module/vcomponent/datetime/output.scm
+++ b/module/vcomponent/datetime/output.scm
@@ -73,7 +73,7 @@
(let ((s (prop ev 'DTSTART))
(e (prop ev 'DTEND)))
(if e
- (let ((fmt-str (if (date= (get-date s) (get-date e))
+ (let ((fmt-str (if (date= (datetime-date s) (datetime-date e))
(_ "~H:~M")
;; Note the non-breaking space
(_ "~Y-~m-~d ~H:~M"))))
diff --git a/module/vcomponent/recurrence/generate.scm b/module/vcomponent/recurrence/generate.scm
index 07305647..cc725b09 100644
--- a/module/vcomponent/recurrence/generate.scm
+++ b/module/vcomponent/recurrence/generate.scm
@@ -119,7 +119,7 @@
(branching-fold
(lambda (rule dt)
(let* ((key value (car+cdr rule))
- (d (if (date? dt) dt (get-date dt)))
+ (d (if (date? dt) dt (datetime-date dt)))
;; NOTE It's proably an error to give BYHOUR, BYMINUTE, and BYSECOND
;; rules for a date object. This doesn't warn if those are given, but
;; instead silently discards them.
@@ -128,8 +128,8 @@
(if (date? dt)
(if (date? o) o d)
(if (date? o)
- (datetime date: o time: t tz: (get-timezone dt))
- (datetime date: d time: o tz: (get-timezone dt)))))))
+ (datetime date: o time: t tz: (tz dt))
+ (datetime date: d time: o tz: (tz dt)))))))
(case key
[(BYMONTH)
(if (and (eq? 'YEARLY (freq rrule))
@@ -141,11 +141,11 @@
(concatenate
(map (lambda (wday)
(all-wday-in-month
- wday (start-of-month (set (month d) value))))
+ wday (start-of-month (month d value))))
(map cdr (byday rrule)))))
;; else
- (to-dt (set (month d) value)))]
+ (to-dt (month d value)))]
[(BYDAY)
(let* ((offset value (car+cdr value)))
@@ -201,12 +201,12 @@
[(BYYEARDAY) (to-dt (date+ (start-of-year d)
(date day: (1- value))))]
[(BYMONTHDAY)
- (to-dt (set (day d)
+ (to-dt (day d
(if (positive? value)
value (+ 1 value (days-in-month d)))))]
- [(BYHOUR) (to-dt (set (hour t) value))]
- [(BYMINUTE) (to-dt (set (minute t) value))]
- [(BYSECOND) (to-dt (set (second t) value))]
+ [(BYHOUR) (to-dt (hour t value))]
+ [(BYMINUTE) (to-dt (minute t value))]
+ [(BYSECOND) (to-dt (second t value))]
[else (scm-error 'wrong-type-arg "update"
"Unrecognized by-extender ~s"
key #f)])))
@@ -254,7 +254,7 @@
(extend-recurrence-set
rrule
(if (date? base-date)
- (date+ base-date (get-date (make-date-increment rrule)))
+ (date+ base-date (datetime-date (make-date-increment rrule)))
(datetime+ base-date (make-date-increment rrule))))))
(define ((month-mod d) value)
@@ -273,7 +273,7 @@
#t
(let ((key values (car+cdr (car remaining)))
(t (as-time dt))
- (d (if (date? dt) dt (get-date dt))))
+ (d (if (date? dt) dt (datetime-date dt))))
(and (case key
[(BYMONTH) (memv (month d) values)]
[(BYMONTHDAY) (memv (day d) (map (month-mod d) values))]
diff --git a/tests/run-tests.scm b/tests/run-tests.scm
index 986d1ac4..1f4a7bbe 100755
--- a/tests/run-tests.scm
+++ b/tests/run-tests.scm
@@ -95,6 +95,8 @@ fi
;; end of individual test case
(test-runner-on-test-begin! runner
(lambda (runner)
+ #;
+ (set-current-error-port (open-output-string))
(test-runner-aux-value! runner (transform-time-of-day (gettimeofday)))))
(test-runner-on-test-end! runner
(lambda (runner)
@@ -113,7 +115,14 @@ fi
=> (lambda (p) (with-output-to-string
(lambda ()
(display (bold "[SOURCE]: "))
- (truncated-print p width: 60))))))))
+ (truncated-print p width: 60)))))
+ (else (bold "[UNNAMED ASSERTION]")))))
+ #;
+ (when verbose?
+ (display
+ (map (lambda (line) (string-append (make-indent (1+ depth)) "> " line "\n"))
+ (string-split (get-output-string (current-error-port)) #\n)))
+ (newline))
(when (eq? 'fail (test-result-kind))
(cond ((test-result-ref runner 'actual-error)
=> (lambda (err)
diff --git a/tests/test/c-parse.scm b/tests/test/c-parse.scm
new file mode 100644
index 00000000..c16958de
--- /dev/null
+++ b/tests/test/c-parse.scm
@@ -0,0 +1,69 @@
+;;; Commentary
+;; Test implementation details of C parser
+;; TODO Should be ran before (test cpp)
+;;; Code
+
+(define-module (test cpp)
+ :use-module (srfi srfi-64)
+ :use-module (srfi srfi-88)
+ :use-module ((c lex) :select (lex))
+ :use-module (c parse))
+
+(define flatten-infix (@@ (c parse) flatten-infix))
+(define resolve-order-of-operations (@@ (c parse) resolve-order-of-operations))
+
+(test-group "Flatten infix"
+ (test-equal "Simple binary operator"
+ '(fixed-infix (integer (base-10 "1"))
+ +
+ (integer (base-10 "2")))
+ (flatten-infix (lex "1 + 2")))
+
+ (test-equal "Simple binary operator, with compound structure in on branch"
+ '(fixed-infix (integer (base-10 "1"))
+ +
+ (funcall (variable "f")
+ (group (integer (base-10 "2")))))
+ (flatten-infix (lex "1 + f(2)"))))
+
+(test-group "Order of operations"
+ (test-equal "Basic binary operator"
+ '((resolved-operator +)
+ (integer (base-10 "1"))
+ (integer (base-10 "2")))
+ (resolve-order-of-operations (flatten-infix (lex "1 + 2"))))
+
+ (test-equal "Multiple operators, with non-left-associative application"
+ '((resolved-operator +)
+ (integer (base-10 "1"))
+ ((resolved-operator *)
+ (integer (base-10 "2"))
+ (integer (base-10 "3"))))
+ (resolve-order-of-operations (flatten-infix (lex "1 + 2 * 3"))))
+
+ (test-equal "Multiple of the same operation gets clumed together"
+ '((resolved-operator +)
+ (integer (base-10 "1"))
+ (integer (base-10 "2"))
+ (integer (base-10 "3")))
+ (resolve-order-of-operations (flatten-infix (lex "1 + 2 + 3"))))
+
+ (test-equal "Simple Ternary"
+ '(ternary
+ (integer (base-10 "1"))
+ (integer (base-10 "2"))
+ (integer (base-10 "3")))
+ (resolve-order-of-operations (flatten-infix (lex "1 ? 2 : 3"))))
+
+ (test-equal "ternary with further infix operators"
+ '(ternary ((resolved-operator +)
+ (integer (base-10 "1"))
+ (integer (base-10 "2")))
+ ((resolved-operator %)
+ (integer (base-10 "3"))
+ (integer (base-10 "4")))
+ ((resolved-operator *)
+ (integer (base-10 "5"))
+ (integer (base-10 "6"))))
+ (resolve-order-of-operations (flatten-infix (lex "1 + 2? 3 % 4 : 5 * 6")))))
+
diff --git a/tests/test/cpp.scm b/tests/test/cpp.scm
index 9c720fde..1294bc96 100644
--- a/tests/test/cpp.scm
+++ b/tests/test/cpp.scm
@@ -3,37 +3,602 @@
;;; Code:
(define-module (test cpp)
+ :use-module (srfi srfi-1)
:use-module (srfi srfi-64)
:use-module (srfi srfi-88)
:use-module ((c lex) :select (lex))
- :use-module ((c parse) :select (parse-lexeme-tree)))
+ :use-module ((c parse) :select (parse-lexeme-tree))
+ :use-module ((c eval) :select (c-eval))
+ :use-module ((c eval environment) :select (make-environment env-set!))
+ :use-module ((rnrs arithmetic bitwise)
+ :select (bitwise-xor)))
+
+;; Note that the lexer's output isn't stable.
+;; The tests here are more to see where the lexer succeeds but the parser fails.
+;; So changing the lexer test cases isn't a problem
+;; but don't change the parser test cases
+
+;; __asm__ always has strings as arguments
+(test-skip "__asm__")
+
+;; Lexer produces garbage when attempted. Fixing this would also fix cast
+;; operations.
+(test-skip "Float in infix expression")
+;; order of operation is wrong, leading to an incorrect result
+(test-skip "Cast with operation")
+
+;; not implemented
+(test-skip "Token concatenation")
+
+;; A string follewed by a macro (which expands to a string)
+;; should be concatenated. This is however not yet implemented
+(test-skip "Implicit concatenation of string and macro")
(define run (compose parse-lexeme-tree lex))
-(test-equal
- '(+ (post-increment (dereference C)) 3)
- (run "(*C)++ + 3"))
+(define (alist->environment alist)
+ (fold (lambda (pair env)
+ (apply env-set! env pair))
+ (make-environment)
+ alist))
+
+(define (exec form . base-bindings)
+ (call-with-values
+ (lambda () (c-eval (alist->environment base-bindings)
+ (run form)))
+ (lambda (env value) value)))
+
+(define-syntax let-group
+ (syntax-rules ()
+ ((let ((form name) rest ...) body ...)
+ (test-group name
+ (let ((form name)
+ rest ...)
+ body ...)))))
+
+(let-group
+ ((form "(*C)++ + 3"))
+ (test-equal '(infix (postfix (group (prefix (prefix-operator "*")
+ (variable "C")))
+ (postfix-operator "++"))
+ (operator "+")
+ (integer (base-10 "3")))
+ (lex form))
+ (test-equal '(+ (post-increment (dereference C)) 3)
+ (run form)))
+
+(let-group
+ ((form "*C++ + 3"))
+ (test-equal '(infix (postfix (prefix (prefix-operator "*")
+ (variable "C"))
+ (postfix-operator "++"))
+ (operator "+")
+ (integer (base-10 "3")))
+ (lex form))
+ (test-equal '(+ (post-increment (dereference C)) 3)
+ (run form)))
+
+(let-group
+ ((form "*C++"))
+ (test-equal '(postfix (prefix (prefix-operator "*")
+ (variable "C"))
+ (postfix-operator "++"))
+ (lex form))
+ (test-equal '(post-increment (dereference C))
+ (run form)))
+
+(let-group
+ ((form "C++ + C++"))
+ (test-equal '(infix (postfix (variable "C")
+ (postfix-operator "++"))
+ (operator "+")
+ (postfix (variable "C")
+ (postfix-operator "++")))
+ (lex form))
+ (test-equal '(+ (post-increment C) (post-increment C))
+ (run form)))
+
+(let-group
+ ((form "++C + ++C"))
+ (test-equal '(infix (prefix (prefix-operator "++")
+ (variable "C"))
+ (operator "+")
+ (prefix (prefix-operator "++")
+ (variable "C")))
+ (lex form))
+ (test-equal '(+ (pre-increment C) (pre-increment C))
+ (run form)))
+
+(let-group
+ ((form "2 + 2 * 2"))
+ (test-equal '(infix (integer (base-10 "2"))
+ (operator "+")
+ (infix (integer (base-10 "2"))
+ (operator "*")
+ (integer (base-10 "2"))))
+ (lex form))
+ (test-equal '(+ 2 (* 2 2)) (run form))
+ (test-equal 6 (exec form)))
+
+(let-group
+ ((form "2 * 2 + 2"))
+ (test-equal '(infix (integer (base-10 "2"))
+ (operator "*")
+ (infix (integer (base-10 "2"))
+ (operator "+")
+ (integer (base-10 "2"))))
+ (lex form))
+ (test-equal '(+ (* 2 2) 2) (run form))
+ (test-equal 6 (exec form)))
+
+(let-group
+ ((form "2+2+2"))
+ (test-equal '(infix (integer (base-10 "2"))
+ (operator "+")
+ (infix (integer (base-10 "2"))
+ (operator "+")
+ (integer (base-10 "2")))) (lex form))
+ (test-equal '(+ 2 2 2) (run form))
+ (test-equal 6 (exec form)))
+
+(test-group "Unary minus"
+ (test-group "Without space"
+ (let ((form "-1"))
+ (test-equal '(prefix (prefix-operator "-")
+ (integer (base-10 "1")))
+ (lex form))
+ (test-equal '(- 1) (run form))
+ (test-equal -1 (exec form))))
+
+ (test-group "With space"
+ (let ((form "- 1"))
+ (test-equal '(prefix (prefix-operator "-")
+ (integer (base-10 "1")))
+ (lex form))
+ (test-equal '(- 1) (run form))
+ (test-equal -1 (exec form))))
+
+ (test-group "Before variable"
+ (let ((form "-x"))
+ (test-equal '(prefix (prefix-operator "-")
+ (variable "x"))
+ (lex form))
+ (test-equal '(- x) (run form))
+ (test-equal -5 (exec form '(x 5)))))
+
+ (test-group "Before infix"
+ (let ((form "-x+3"))
+ (test-equal '(infix (prefix (prefix-operator "-")
+ (variable "x"))
+ (operator "+")
+ (integer (base-10 "3")))
+ (lex form))
+ (test-equal '(+ (- x) 3) (run form))
+ (test-equal -2 (exec form '(x 5)))))
+
+ (test-group "Inside infix expression"
+ (let ((form "x+-3"))
+ (test-equal '(infix (variable "x")
+ (operator "+")
+ (prefix (prefix-operator "-")
+ (integer (base-10 "3"))))
+ (lex form))
+ (test-equal '(+ x (- 3)) (run form))
+ (test-equal 2 (exec form '(x 5)))))
+ )
+
+
+
+
+;; Hand picked forms from output of `cpp -dM /usr/include/termios.h` on
+;; FreeBSD 13.1-RELEASE releng/13.1-n250148-fc952ac2212 GENERIC amd64
+;; 2022-06-28
+
+(let ((form "00000200"))
+ (test-equal '(integer (base-8 "0000200")) (lex form))
+ (test-equal 128 (run form)))
+
+(let ((form "0"))
+ (test-equal '(integer (base-10 "0")) (lex form))
+ (test-equal 0 (run form)))
+
+(let ((form "1000000U"))
+ (test-equal '(integer (base-10 "1000000") (integer-suffix "U")) (lex form))
+ (test-equal '(as-type (unsigned) 1000000) (run form))
+ (test-equal 1000000 (exec form)))
+
+
+(let ((form "0x10c"))
+ (test-equal '(integer (base-16 "10c")) (lex form))
+ (test-equal 268 (run form)))
+
+;; Lexer keeps original case, handled later by parser
+(let ((form "0X10C"))
+ (test-equal '(integer (base-16 "10C")) (lex form))
+ (test-equal 268 (run form)))
+
+(let ((form "a != b"))
+ (test-equal '(infix (variable "a")
+ (operator "!=")
+ (variable "b"))
+ (lex form))
+ (test-equal '(not_eq a b) (run form))
+ (test-equal 1 (exec form '(a 1) '(b 2)))
+ (test-equal 0 (exec form '(a 1) '(b 1)))
+ )
+
+(let ((form "((c) == (val) && (val) != _POSIX_VDISABLE)"))
+ ;; (test-equal '() (lex form))
+ (test-equal '(and (== c val)
+ (not_eq val _POSIX_VDISABLE))
+ (run form))
+ (test-equal 0 (exec form '(c 1) '(val 2) '(_POSIX_VDISABLE 3)))
+ )
+
+(let ((form "CTRL('O')"))
+ (test-equal '(funcall (variable "CTRL") (group (char "O"))) (lex form))
+ (test-equal '(funcall CTRL 79) (run form))
+ (test-equal (bitwise-xor #x40 (char->integer #\O))
+ (exec form
+ ;; Definition copied from our parsers output of
+ ;; preprocessing output as defined above
+ '(CTRL (lambda (x)
+ (ternary (and (>= x 97) (<= x 122))
+ (+ (- x 97) 1)
+ (bitand (+ (- x 65) 1) 127)))))))
+
+(let ((form "CREPRINT"))
+ (test-equal '(variable "CREPRINT") (lex form))
+ (test-equal 'CREPRINT (run form)))
+
+(let ((form "(CCTS_OFLOW | CRTS_IFLOW)"))
+ (test-equal '(group (infix (variable "CCTS_OFLOW")
+ (operator "|")
+ (variable "CRTS_IFLOW")))
+ (lex form))
+ (test-equal '(bitor CCTS_OFLOW CRTS_IFLOW) (run form)))
+
+;; ((x) >= 'a' && (x) <= 'z'
+;; ? ((x) - 'a' + 1)
+;; : (((x) - 'a' + 1) & 0x7f))
+(let ((form "((x) >= 'a' && (x) <= 'z' ? ((x) - 'a' + 1) : (((x) - 'a' + 1) & 0x7f))"))
+ ;; (test-equal '() (lex form))
+ (test-equal '(ternary
+ (and (>= x #x61)
+ (<= x #x7A))
+ (+ (- x #x61) 1)
+ (bitand (+ (- x #x61) 1) 127))
+ (run form)))
+
+(let ((form "((x) & ~(IOCPARM_MASK << 16))"))
+ ;; (test-equal '() (lex form))
+ (test-equal '(bitand x (compl (<< IOCPARM_MASK 16))) (run form)))
+
+(let ((form "(((x) >> 8) & 0xff)"))
+ ;; (test-equal '() (lex form))
+ (test-equal '(bitand (>> x 8) 255) (run form)))
+
+(let ((form "(((x) >> 16) & IOCPARM_MASK)"))
+ ;; (test-equal '() (lex form))
+ (test-equal '(bitand (>> x 16) IOCPARM_MASK) (run form)))
+
+(let ((form "((1 << IOCPARM_SHIFT) - 1)"))
+ ;; (test-equal '() (lex form))
+ (test-equal '(- (<< 1 IOCPARM_SHIFT) 1) (run form)))
+
+(let ((form "_IO('t', 120)"))
+ (test-equal '(funcall
+ (variable "_IO")
+ (group (infix (char "t")
+ (operator ",")
+ (integer (base-10 "120")))))
+ (lex form))
+ (test-equal '(funcall _IO (#{,}# 116 120)) (run form)))
+
+;; note the lone type
+(let ((form "_IOW('t', 98, int)"))
+ ;; (test-equal '() (lex form))
+ (test-equal '(funcall _IOW (#{,}# 116 98 int))
+ (run form)))
+
+;; note the multi-word type
+(let ((form "_IOR('t', 19, struct termios)"))
+ ;; (test-equal '() (lex form))
+ (test-equal '(funcall _IOR (#{,}# 116 19 (struct-type termios))) (run form)))
+
+
+;; TODO concatenation rules
+;; #define __CONCAT(x,y) __CONCAT1(x,y)
+;; #define __CONCAT1(x,y) x ## y
+;; #define __CONSTANT_CFSTRINGS__ 1
+;; #define __COPYRIGHT(s) __IDSTRING(__CONCAT(__copyright_,__LINE__),s)
+
+(test-group "Token concatenation"
+ (let ((form "x ## y"))
+ (test-equal '() (lex form))
+ (test-equal '0 (run form))))
+
+(test-group "Floating point numbers"
+
+ (test-group "Diffent forms"
+ (test-group "No decimal point, exponent, no suffix"
+ (let ((form "10e10"))
+ (test-equal '(float (float-integer (base-10 "10"))
+ (exponent (base-10 "10")))
+ (lex form))
+ (test-equal 10e10 (run form))))
+
+ (test-group "No decimal point, negative exponent"
+ (let ((form "10e-10"))
+ (test-equal '(float (float-integer (base-10 "10"))
+ (exponent "-" (base-10 "10")))
+ (lex form))
+ (test-equal 10e-10 (run form))))
+
+ (test-group "No decimal point, exponent and suffix"
+ (let ((form "10e10L"))
+ (test-equal '(float (float-integer (base-10 "10"))
+ (exponent (base-10 "10"))
+ (float-suffix "L"))
+ (lex form))
+ (test-equal '(as-type (long double) 10e10)
+ (run form))))
+
+ (test-group "Leading period, no exponent or suffix"
+ (let ((form ".1"))
+ (test-equal '(float (float-decimal (base-10 "1"))) (lex form))
+ (test-equal 0.1 (run form))))
+
+ (test-group "Trailing period, no exponent or suffix"
+ (let ((form "1."))
+ (test-equal '(float (float-integer (base-10 "1"))) (lex form))
+ (test-equal 1.0 (run form)))))
+
+
+ (test-group "Negative float"
+ (let ((form "-1.0"))
+ (test-equal '(prefix (prefix-operator "-")
+ (float (float-integer (base-10 "1"))
+ (float-decimal (base-10 "0"))))
+ (lex form))
+ (test-equal '(- 1.0) (run form))))
+
+
+
+ (test-group "Real world examples"
+ (let ((form "4.9406564584124654e-324"))
+ (test-equal '(float (float-integer (base-10 "4"))
+ (float-decimal (base-10 "9406564584124654"))
+ (exponent "-" (base-10 "324")))
+ (lex form))
+ (test-equal 4.9406564584124654e-324 (run form)))
+
+ (let ((form "1.7976931348623157e+308"))
+ (test-equal '(float (float-integer (base-10 "1"))
+ (float-decimal (base-10 "7976931348623157"))
+ (exponent "+" (base-10 "308")))
+ (lex form))
+ (test-equal 1.7976931348623157e+308 (run form))))
+
+ (test-group "Float in infix expression"
+ (test-group "Simple case"
+ (let ((form "1. + .1"))
+ (test-equal '(infix (float (float-integer (base-10 "1")))
+ (operator "+")
+ (float (float-decimal (base-10 "1"))))
+ (lex form))
+ (test-equal '(+ 1.0 0.1) (run form))))
+ ;; (test-group "Complicated case")
+ ))
+
+(test-group "Typecasts"
+
+ (let ((form "(unsigned) 5"))
+ (test-equal '((group (variable "unsigned"))
+ (integer (base-10 "5")))
+ (lex form))
+ (test-equal '(as-type (unsigned) 5)
+ (run form)))
+
+ (let ((form "(unsigned integer) 5"))
+ (test-equal '((group (variable "unsigned")
+ (variable "integer"))
+ (integer (base-10 "5")))
+ (lex form))
+ (test-equal '(as-type (unsigned integer) 5) (run form)))
+
+ (test-group "Pointer with space"
+ (let ((form "(int *) 5"))
+ (test-equal '((group (postfix (variable "int")
+ (postfix-operator "*")))
+ (integer (base-10 "5")))
+ (lex form))
+ (test-equal '(as-type (int *) 5)
+ (run form))))
+
+ (test-group "Pointer without space"
+ (let ((form "(int*) 5"))
+ (test-equal '((group (postfix (variable "int")
+ (postfix-operator "*")))
+ (integer (base-10 "5")))
+ (lex form))
+ (test-equal '(as-type (int *) 5)
+ (run form))))
+
+ (test-group "Multi word type pointer"
+ (let ((form "(unsigned int*) 5"))
+ (test-equal '((group (variable "unsigned")
+ (postfix (variable "int")
+ (postfix-operator "*")))
+ (integer (base-10 "5")))
+ (lex form))
+ (test-equal '(as-type (unsigned int *) 5)
+ (run form))))
+
+ (test-group "Double cast"
+ (let ((form "(int)(unsigned) 5"))
+ (test-equal '((group (variable "int"))
+ (group (variable "unsigned"))
+ (integer (base-10 "5"))) (lex form))
+ (test-equal '(as-type (int) (as-type (unsigned) 5))
+ (run form))))
+
+ (test-group "Cast with operation"
+ (let ((form "(int) 5 + 7"))
+ (test-equal '((group (variable "int"))
+ (infix (integer (base-10 "5"))
+ (operator "+")
+ (integer (base-10 "7"))))
+ (lex form))
+
+ (test-equal '(+ (as-type (int) 5) 7)
+ (run form))))
+
+
+
+ (test-group "Tripple cast, with value inside paranthesis"
+ (let ((form "(type)(__uintptr_t)(const void *)(var)"))
+ (test-equal '((group (variable "type"))
+ (group (variable "__uintptr_t"))
+ (group (variable "const")
+ (postfix (variable "void")
+ (postfix-operator "*")))
+ (group (variable "var")))
+ (lex form))
+ (test-equal '(as-type (type)
+ (as-type (__uintptr_t)
+ (as-type (const void *)
+ var)))
+ (run form))))
+
+ (test-group "Same as above, but whole thing inside parenthesis"
+ (let ((form "((type)(__uintptr_t)(const void *)(var))"))
+ (test-equal '(group (group (variable "type"))
+ (group (variable "__uintptr_t"))
+ (group (variable "const")
+ (postfix (variable "void")
+ (postfix-operator "*")))
+ (group (variable "var")))
+ (lex form))
+ (test-equal '(as-type (type)
+ (as-type (__uintptr_t)
+ (as-type (const void *)
+ var)))
+ (run form))))
+
+ (let ((form "((type)(__uintptr_t)(const volatile void *)(var))"))
+ ;; (test-equal '() (lex form))
+ (test-equal '(as-type (type)
+ (as-type (__uintptr_t)
+ (as-type (const volatile void *)
+ var)))
+ (run form)))
+
+ (let ((form "((unsigned long) ((inout) | (((len) & IOCPARM_MASK) << 16) | ((group) << 8) | (num)))"))
+ (test-equal '(group (group (variable "unsigned") (variable "long"))
+ (group (infix (group (variable "inout"))
+ (operator "|")
+ (infix (group (infix (group (infix (group (variable "len"))
+ (operator "&")
+ (variable "IOCPARM_MASK")))
+ (operator "<<")
+ (integer (base-10 "16"))))
+ (operator "|")
+ (infix (group (infix (group (variable "group"))
+ (operator "<<")
+ (integer (base-10 "8"))))
+ (operator "|")
+ (group (variable "num")))))))
+ (lex form))
+ (test-equal '(as-type (unsigned long)
+ (bitor inout
+ (<< (bitand len IOCPARM_MASK) 16)
+ (<< group 8)
+ num))
+ (run form))))
+
+(test-group "Characters"
+ (let ((form "'c'"))
+ (test-equal '(char "c") (lex form))
+ (test-equal #x63 (run form)))
+
+ (let ((form "'\\n'"))
+ (test-equal '(char (escaped-char "n")) (lex form))
+ (test-equal (char->integer #\newline) (run form))))
+
+(test-group "Strings"
+ (test-group "Empty string"
+ (let ((form "\"\""))
+ (test-equal 'string (lex form))
+ (test-equal #vu8(0) (run form))))
+
+ (test-group "Simple string"
+ (let ((form "\"li\""))
+ (test-equal '(string "li") (lex form))
+ (test-equal #vu8(#x6C #x69 0) (run form))))
+
+ (test-group "Implicit concatenation of strings"
+ (let ((form "\"a\" \"b\""))
+ (test-equal '((string "a")
+ (string "b"))
+ (lex form))
+ (test-equal #vu8(#x61 #x62 0)
+ (run form))))
-(test-equal
- '(+ (post-increment (dereference C)) 3)
- (run "*C++ + 3"))
+ (test-group "Implicit concatenation of string and macro"
+ (let ((form "\"a\" MACRO"))
+ (test-equal '((string "a") (variable "MACRO")) (lex form))
+ (test-equal '() (run form))))
-(test-equal
- '(post-increment (dereference C))
- (run "*C++"))
+ (test-group "String with only escape"
+ (let ((form (string #\" #\\ #\" #\")))
+ (test-equal `(string (escaped-char "\"")) (lex form))
+ (test-equal #vu8(34 0) (run form))))
-(test-equal
- '(+ (post-increment C) (post-increment C))
- (run "C++ + C++"))
+ (test-group "String with escape at start"
+ (let ((form (string #\" #\\ #\" #\a #\")))
+ (test-equal `(string (escaped-char "\"") "a") (lex form))
+ (test-equal #vu8(34 #x61 0) (run form))))
-(test-equal
- '(+ (pre-increment C) (pre-increment C))
- (run "++C + ++C"))
+ (test-group "String with escape at end"
+ (let ((form (string #\" #\a #\\ #\" #\")))
+ (test-equal `(string "a" (escaped-char "\"")) (lex form))
+ (test-equal #vu8(#x61 34 0) (run form))))
-(test-equal '(+ 2 (* 2 2)) (run "2 + 2 * 2"))
+ (test-group "String with escape in middle"
+ (let ((form (string #\" #\a #\\ #\" #\b #\")))
+ (test-equal `(string "a" (escaped-char "\"") "b") (lex form))
+ (test-equal #vu8(#x61 34 #x62 0) (run form))))
-(test-equal '(+ (* 2 2) 2) (run "2 * 2 + 2"))
+ ;; \e is semi non-standard
+ (test-group "String with bakslash-e esacpe"
+ (let ((form "\"\\e\""))
+ (test-equal '(string (escaped-char "e")) (lex form))
+ (test-equal #vu8(#x1b 0) (run form))))
-(test-equal '(+ 2 2 2) (run "2+2+2"))
+ (test-group "String with byte secquence escape"
+ (let ((form "\"\\xf0\\x9f\\x92\\xa9\""))
+ (test-equal '(string (escaped-char (base-16-char "f0"))
+ (escaped-char (base-16-char "9f"))
+ (escaped-char (base-16-char "92"))
+ (escaped-char (base-16-char "a9")))
+ (lex form))
+ (test-equal #vu8(#xf0 #x9f #x92 #xa9 0) (run form)))))
+(test-group "__asm__"
+ (let ((form "__asm__(\".globl \" __XSTRING(sym))"))
+ (test-equal '() (lex form))
+ ;; TODO implicit string concatenation
+ (test-equal '(funcall __asm__
+ (string ".globl ")
+ (funcall __XSTRING sym)) (run form))))
+(let ((form "__attribute__((__aligned__(x)))"))
+ (test-equal '(funcall (variable "__attribute__")
+ (group (group (funcall (variable "__aligned__")
+ (group (variable "x"))))))
+ (lex form))
+ ;; This drops the extra set of parenthesis. Do we care?
+ (test-equal '(funcall __attribute__
+ (funcall __aligned__ x))
+ (run form)))
diff --git a/tests/test/cpp/cpp-environment.scm b/tests/test/cpp/cpp-environment.scm
new file mode 100644
index 00000000..684c0fb5
--- /dev/null
+++ b/tests/test/cpp/cpp-environment.scm
@@ -0,0 +1,45 @@
+(define-module (test cpp cpp-environmunt)
+ :use-module (srfi srfi-64)
+ :use-module (srfi srfi-88)
+ :use-module (c cpp-environment)
+ :use-module ((c lex2) :select (lex))
+ :use-module (c cpp-environment object-like-macro)
+ )
+
+(let ((e (make-environment)))
+ (test-equal '(outside) (cpp-if-status e))
+ (let ((e* (enter-active-if e)))
+ (test-equal "Enter works" '(active-if outside) (cpp-if-status e*))
+ (test-equal "Original object remainins unmodified"
+ '(outside) (cpp-if-status e))))
+
+(define cpp-file-stack (@@ (c cpp-environment) cpp-file-stack))
+
+(let ((e (make-environment)))
+ (test-equal "Default file stack"
+ '(("*outside*" . 1))
+ (cpp-file-stack e))
+ (let ((e* (enter-file e "test.c")))
+ (test-equal "File stack after entering file"
+ '(("test.c" . 1) ("*outside*" . 1)) (cpp-file-stack e*))
+ (let ((e** (bump-line e*)))
+ (test-equal 2 (current-line e**)))))
+
+
+
+(let ((e (make-environment)))
+ (let ((e* (add-identifier
+ e "key"
+ (object-like-macro
+ identifier: "key"
+ body: (lex "value")))))
+ (let ((result (get-identifier e* "key")))
+ (test-assert (cpp-macro? result))
+ (test-equal (lex "value")
+ (macro-body result)))))
+
+(let ((e (make-environment)))
+ (let ((result (get-identifier e "key")))
+ (test-assert "Missing identifier returns #f"
+ (not result)))
+ )
diff --git a/tests/test/cpp/lex2.scm b/tests/test/cpp/lex2.scm
new file mode 100644
index 00000000..f4f9b857
--- /dev/null
+++ b/tests/test/cpp/lex2.scm
@@ -0,0 +1,177 @@
+(define-module (test cpp lex2)
+ :use-module (srfi srfi-64)
+ :use-module (srfi srfi-88)
+ :use-module (ice-9 peg)
+ :use-module (c lex2))
+
+(define (l s) (lexeme type: 'preprocessing-token body: s))
+
+(define (ls . xs)
+ (map l xs))
+
+(test-equal "Integer literal"
+ (ls '(pp-number "10"))
+ (lex "10"))
+
+(test-equal "String literal"
+ (ls `(string-literal (encoding-prefix) "Hello"))
+ (lex "\"Hello\""))
+
+
+(test-equal "Mulitple tokens, including whitespace"
+ (list (lexeme type: 'whitespace body: " ")
+ (l '(pp-number "10"))
+ (lexeme type: 'whitespace body: " "))
+ (lex " 10 "))
+
+(test-equal "Char literal"
+ (ls `(character-constant (character-prefix) "a"))
+ (lex "'a'"))
+
+
+
+(test-equal "Comment inside string"
+ (ls `(string-literal (encoding-prefix) "Hel/*lo"))
+ (lex "\"Hel/*lo\""))
+
+(test-equal "#define line"
+ (list
+ (l '(punctuator "#"))
+ (l '(identifier "define"))
+ (lexeme type: 'whitespace body: " ")
+ (l '(identifier "f"))
+ (l '(punctuator "("))
+ (l '(identifier "x"))
+ (l '(punctuator ")"))
+ (lexeme type: 'whitespace body: " ")
+ (l '(pp-number "10")))
+ (lex "#define f(x) 10"))
+
+
+
+(test-equal "Nested parenthesis"
+ (list
+ (l '(identifier "f"))
+ (l '(punctuator "("))
+ (l '(pp-number "1"))
+ (l '(punctuator ","))
+ (lexeme type: 'whitespace body: " ")
+ (l '(punctuator "("))
+ (l '(pp-number "2"))
+ (l '(punctuator ","))
+ (lexeme type: 'whitespace body: " ")
+ (l '(pp-number "3"))
+ (l '(punctuator ")"))
+ (l '(punctuator ","))
+ (lexeme type: 'whitespace body: " ")
+ (l '(pp-number "4"))
+ (l '(punctuator ")")))
+ (lex "f(1, (2, 3), 4)"))
+
+
+
+;; Generating a single lexeme
+;; (whitespace " ")
+;; would also be ok
+(test-equal "Grouped whitespace"
+ (list (lexeme type: 'whitespace body: " ")
+ (lexeme type: 'whitespace body: " "))
+ (lex " "))
+
+(test-equal "Newlines get sepparate whitespace tokens"
+ (list (lexeme type: 'whitespace body: " ")
+ (lexeme type: 'whitespace body: " ")
+ (lexeme type: 'whitespace body: "\n")
+ (lexeme type: 'whitespace body: " "))
+ (lex " \n "))
+
+
+;; Refer to 6.4 p.1 for the syntax requirement
+;; 6.10.9 p. 2 for the sample string
+(test-equal "each non-white-space character that cannot be one of the above"
+ (list (l '(punctuator "."))
+ (l '(punctuator "."))
+ (lexeme type: 'other body: "\\") ; <- Interesting part
+ (l '(identifier "listing"))
+ (l '(punctuator "."))
+ (l '(identifier "dir")))
+ (lex "..\\listing.dir"))
+
+
+(test-equal "Propper H-string"
+ (ls '(header-name (h-string "a")))
+ (lex "<a>"))
+
+(test-equal "Unexpected h-string"
+ (list (l '(pp-number "1"))
+ (lexeme type: 'whitespace body: " ")
+ (l '(header-name (h-string " 2 ")))
+ (lexeme type: 'whitespace body: " ")
+ (l '(pp-number "3")))
+ (lex "1 < 2 > 3"))
+
+(test-equal "Quotation mark inside h-string"
+ (ls '(header-name (h-string "a\"b")))
+ (lex "<a\"b>"))
+
+(test-equal "Interaction of h-strings and regular strings"
+ (test-equal "Less than string, not h-string"
+ (ls '(pp-number "1")
+ '(string-literal (encoding-prefix) "<")
+ '(punctuator ">"))
+ (lex "1\"<\">"))
+
+ (test-equal "H-string, not string"
+ (list (lexeme type: 'preprocessing-token body: '(pp-number "1"))
+ (lexeme type: 'preprocessing-token body: '(header-name (h-string "\"")))
+ (lexeme type: 'other body: "\""))
+ (lex "1<\">\"")))
+
+(test-equal "Q-strings are lexed as regular strings"
+ (list (l '(punctuator "#"))
+ (l '(identifier "include"))
+ (lexeme type: 'whitespace body: " ")
+ (l '(string-literal (encoding-prefix) "test")))
+ ;; # include here, since generated tokens could possible depend on that context,
+ ;; and the reason regular strings are returned is since the lexer doesn't check
+ ;; that context
+ (lex "#include \"test\"")
+ )
+
+
+
+(test-group "Unicode"
+ (test-equal "In string literals"
+ (ls '(string-literal (encoding-prefix) "åäö"))
+ (lex "\"åäö\""))
+
+ (test-equal "Outside string literals"
+ (list (lexeme type: 'other body: "å")
+ (lexeme type: 'other body: "ä")
+ (lexeme type: 'other body: "ö"))
+ (lex "åäö")))
+
+
+
+
+(test-group "Characters with prefixes"
+ (test-equal (ls '(character-constant (character-prefix . "u")
+ "a"))
+ (lex "u'a'"))
+ (test-equal (ls '(character-constant (character-prefix . "U")
+ "a"))
+ (lex "U'a'"))
+ (test-equal (ls '(character-constant (character-prefix . "L")
+ "a"))
+ (lex "L'a'")))
+
+;; Note that these strings have 0 "data" components
+(test-group "Strings with prefixes"
+ (test-equal (ls '(string-literal (encoding-prefix . "u8")))
+ (lex "u8\"\""))
+ (test-equal (ls '(string-literal (encoding-prefix . "u")))
+ (lex "u\"\""))
+ (test-equal (ls '(string-literal (encoding-prefix . "U")))
+ (lex "U\"\""))
+ (test-equal (ls '(string-literal (encoding-prefix . "L")))
+ (lex "L\"\"")))
diff --git a/tests/test/cpp/preprocessor2.scm b/tests/test/cpp/preprocessor2.scm
new file mode 100644
index 00000000..1df1a621
--- /dev/null
+++ b/tests/test/cpp/preprocessor2.scm
@@ -0,0 +1,1247 @@
+(define-module (test cpp preprocessor2)
+ :use-module (srfi srfi-64)
+ :use-module (srfi srfi-64 util)
+ :use-module (srfi srfi-64 test-error)
+ :use-module (srfi srfi-71)
+ :use-module (srfi srfi-88)
+ :use-module ((hnh util) :select (-> unval))
+ :use-module ((hnh util lens) :select (set))
+ :use-module (c preprocessor2)
+ :use-module ((c cpp-environment)
+ :select (extend-environment
+ make-environment
+ get-identifier
+ enter-file
+ in-environment?
+ macro-identifier-list
+ macro-body
+ cpp-file-stack))
+ :use-module ((c cpp-environment function-like-macro) :select (function-like-macro))
+ :use-module ((c cpp-environment object-like-macro) :select (object-like-macro))
+ :use-module ((c cpp-util)
+ :select (drop-whitespace-both
+ tokens-until-eol
+ squeeze-whitespace
+ cleanup-whitespace
+ next-token-matches?
+ ))
+ :use-module ((c unlex)
+ :select (
+ unlex
+ unlex-aggressive
+ stringify-token
+ stringify-tokens
+ )
+ )
+ :use-module ((c cpp-types)
+ :select (punctuator-token? identifier-token?))
+ :use-module (c lex2)
+ )
+
+;; TODO Redefinition checking code isn't yet written
+(test-skip "Example 6")
+
+;; See (c preprocessor2) TODO#1
+(test-expect-fail (test-match-group
+ "6.10.3.5 Scope of macro definitions"
+ "Example 3"))
+
+;; TODO # if (and # elif) aren't yet implemented
+(test-skip (test-match-group "Conditionals" "if"))
+
+(define apply-macro (@@ (c preprocessor2) apply-macro))
+(define build-parameter-map (@@ (c preprocessor2) build-parameter-map))
+(define expand# (@@ (c preprocessor2) expand#))
+(define expand## (@@ (c preprocessor2) expand##))
+(define expand-macro (@@ (c preprocessor2) expand-macro))
+(define handle-line-directive (@@ (c preprocessor2) handle-line-directive))
+(define handle-preprocessing-tokens (@@ (c preprocessor2) handle-preprocessing-tokens))
+(define join-file-line (@@ (c preprocessor2) join-file-line))
+(define mark-noexpand (@@ (c preprocessor2) mark-noexpand))
+(define maybe-extend-identifier (@@ (c preprocessor2) maybe-extend-identifier))
+(define parse-identifier-list (@@ (c preprocessor2) parse-identifier-list))
+(define parse-parameter-list (@@ (c preprocessor2) parse-parameter-list))
+(define resolve-define (@@ (c preprocessor2) resolve-define))
+(define resolve-token-stream (@@ (c preprocessor2) resolve-token-stream))
+;; (define tokenize (@@ (c preprocessor2) tokenize))
+(define resolve-h-file (@@ (c preprocessor2) resolve-h-file))
+(define resolve-q-file (@@ (c preprocessor2) resolve-q-file))
+(define resolve-header (@@ (c preprocessor2) resolve-header))
+;; (define include-header (@@ (c preprocessor2) include-header))
+
+;; Remove the noexpand list from each token.
+
+;; Allows equal? with fresh tokens
+(define (remove-noexpand tokens)
+ ;; (typecheck tokens (list-of token?))
+ (map (lambda (token) (set token lexeme-noexpand '()))
+ tokens))
+
+(define* (run str optional: (env (make-environment)))
+ (let ((env tokens (handle-preprocessing-tokens env (tokenize str))))
+ (drop-whitespace-both (remove-noexpand tokens))))
+
+ (define (call-with-tmp-header string proc)
+ (let* ((filename (string-copy "/tmp/headerfile-XXXXXXX"))
+ (port (mkstemp! filename)))
+ (with-output-to-port port
+ (lambda () (display string)
+ ))
+ (close-port port)
+ (proc filename)))
+
+
+
+(test-group "Tokens until End Of Line"
+ (call-with-values
+ (lambda () (tokens-until-eol (lex "before\nafter")))
+ (lambda (bef aft)
+ (test-equal (lex "before") bef)
+ (test-equal (lex "\nafter") aft))))
+
+
+
+(test-equal "Squeeze whitespace"
+ (lex "bef aft")
+ (squeeze-whitespace
+ (append (lex "bef ")
+ (lex " aft"))))
+
+
+(test-group "Stringify"
+ (test-equal "("
+ (stringify-token (car (lex "("))))
+ ;; TODO more cases
+
+ (test-equal (car (lex "\"(a, b)\""))
+ (stringify-tokens (lex "(a, b)"))))
+
+
+(test-group "Parse identifier list"
+ (test-group "Single argument"
+ (let ((rest args remaining (parse-identifier-list (lex "(x)"))))
+ (test-assert (not rest))
+ (test-equal '("x") args)
+ (test-equal '() remaining)))
+
+ (test-group "Multiple parameters"
+ (let ((rest args remaining (parse-identifier-list (lex "(x, y)"))))
+ (test-assert (not rest))
+ (test-equal '("x" "y") args)
+ (test-equal '() remaining)))
+
+ (test-group "Zero parameters"
+ (let ((rest args remaining (parse-identifier-list (lex "()"))))
+ (test-assert (not rest))
+ (test-equal '() args)
+ (test-equal '() remaining)))
+
+ (test-group "Rest args after regular"
+ (let ((rest args remaining (parse-identifier-list (lex "(x, ...)"))))
+ (test-assert rest)
+ (test-equal '("x") args)
+ (test-equal '() remaining)))
+
+ (test-group "Only rest args"
+ (let ((rest args remaining (parse-identifier-list (lex "(...)"))))
+ (test-assert rest)
+ (test-equal '() args)
+ (test-equal '() remaining)))
+
+ (test-group "Errors"
+ (test-error "Compound forms are invalid"
+ 'wrong-type-arg (parse-identifier-list (lex "((y))")))
+
+ (test-error "Non-identifier atoms are invalid"
+ 'cpp-error (parse-identifier-list (lex "(1)")))
+
+ (test-error "Rest args not at end is invalid"
+ 'cpp-error (parse-identifier-list (lex "(..., y)")))))
+
+
+
+
+(test-equal "Clean up whitespace"
+ (lex "( 2 , 4 )")
+ (cleanup-whitespace (lex " \n ( 2 , \n 4 ) \t ")))
+
+
+;; Parameter lists (the callsite arguments to the macro)
+(test-group "Parameter list"
+ (test-group "Empty parameter list"
+ (let ((containing remaining nls (parse-parameter-list (lex "()"))))
+ (test-equal '(()) containing)
+ (test-equal '() remaining)
+ (test-equal 0 nls)))
+
+ (test-group "Single value in parameter list"
+ (let ((containing remaining nls (parse-parameter-list (lex "(x)"))))
+ (test-equal (list (lex "x")) containing)
+ (test-equal '() remaining)
+ (test-equal 0 nls)))
+
+ (test-group "Two values in parameter list"
+ (let ((containing remaining nls (parse-parameter-list (lex "(x, y)"))))
+ (test-equal (list (lex "x")
+ (lex " y"))
+ containing)
+ (test-equal '() remaining)
+ (test-equal 0 nls)))
+
+ (test-group "Three values in parameter list"
+ (let ((containing remaining nls (parse-parameter-list (lex "(x, y, z)"))))
+ (test-equal (list (lex "x")
+ (lex " y")
+ (lex " z"))
+ containing)
+ (test-equal '() remaining)
+ (test-equal 0 nls)))
+
+ (test-group "Two empty parameters"
+ (let ((containing remaining nls (parse-parameter-list (lex "(,)"))))
+ (test-equal (list (lex "") (lex "")) containing)
+ (test-equal '() remaining)
+ (test-equal 0 nls)))
+
+ (test-group "Numeric parameter"
+ (let ((containing remaining nls (parse-parameter-list (lex "(1)"))))
+ (test-equal (list (lex "1")) containing)
+ (test-equal '() remaining)
+ (test-equal 0 nls))
+ )
+
+ (test-group "Two values, one of which is a paretheseed pair"
+ (let ((containing remaining nls
+ (parse-parameter-list (lex "(x, (y, z))"))))
+ (test-equal (list (lex "x") (lex " (y, z)"))
+ containing)
+ (test-equal '() remaining)
+ (test-equal 0 nls)))
+
+ (test-group "Newline in parameters"
+ (let ((containing remaining nls (parse-parameter-list (lex "(\n1\n)"))))
+ (test-equal (list (lex "\n1\n")) containing)
+ (test-equal '() remaining)
+ (test-equal 2 nls))))
+
+(test-group "Build parameter map"
+ (test-equal "Simplest case, zero arguments"
+ '()
+ (let ((m (function-like-macro
+ identifier: "str"
+ identifier-list: '()
+ body: (lex "#x"))))
+ (build-parameter-map
+ m '())))
+
+ (test-equal "Single (simple) argument"
+ `(("x" . ,(lex "x")))
+ (let ((m (function-like-macro
+ identifier: "str"
+ identifier-list: '("x")
+ body: '())))
+ (build-parameter-map
+ m
+ (list (lex "x")))))
+
+ (test-equal "Single advanced argument"
+ `(("x" . ,(lex "(x)")))
+ (let ((m (function-like-macro
+ identifier: "str"
+ identifier-list: '("x")
+ body: '())))
+ (build-parameter-map
+ m (list (lex "(x)")))))
+
+ (test-group "Rest arguments"
+ (test-equal "Single simple"
+ `(("__VA_ARGS__" . ,(lex "x")))
+ (let ((m (function-like-macro
+ identifier: "str"
+ identifier-list: '()
+ variadic?: #t
+ body: '())))
+ (build-parameter-map
+ m (list (lex "x")))))
+
+ (test-equal "Two simple"
+ `(("__VA_ARGS__" . ,(lex "x,y")))
+ (let ((m (function-like-macro
+ identifier: "str"
+ identifier-list: '()
+ variadic?: #t
+ body: '())))
+ (build-parameter-map
+ m (list (lex "x,y")))))))
+
+
+(test-group "Expand stringifiers"
+ (let ((m (function-like-macro
+ identifier: "str"
+ identifier-list: '("x")
+ body: (lex "#x"))))
+ (test-equal "Correct stringification of one param"
+ (lex "\"10\"")
+ (expand#
+ m (build-parameter-map
+ m (list (lex "10"))))))
+
+ (let ((m (function-like-macro
+ identifier: "str"
+ identifier-list: '()
+ body: (lex "#x"))))
+ (test-error "Stringification fails for non-parameters"
+ 'macro-expand-error
+ (expand#
+ m (build-parameter-map
+ m (list (lex "x"))))))
+
+ (let ((m (function-like-macro
+ identifier: "f"
+ identifier-list: '()
+ variadic?: #t
+ body: (lex "# __VA_ARGS__"))))
+ (test-equal "Stringify __VA_ARGS__"
+ (lex "\"10, 20\"")
+ (expand# m (build-parameter-map m (list (lex "10, 20")))))))
+
+
+(let ((e (join-file-line (make-environment))))
+ (test-equal "__FILE__ default value"
+ (object-like-macro identifier: "__FILE__"
+ body: (lex "\"*outside*\""))
+ (get-identifier e "__FILE__"))
+ (test-equal "__LINE__ default value"
+ (object-like-macro identifier: "__LINE__"
+ body: (lex "1"))
+ (get-identifier e "__LINE__")))
+
+
+(test-group "Token streams"
+ (test-group "Non-expanding"
+ (test-equal "Null stream"
+ '() ((unval resolve-token-stream 1) (make-environment) '()))
+ (test-equal "Constant resolve to themselves"
+ (lex "1") ((unval resolve-token-stream 1) (make-environment) (lex "1")))
+ (test-equal "Identifier-likes not in environment stay put"
+ (lex "x") (remove-noexpand ((unval resolve-token-stream 1) (make-environment) (lex "x"))))
+ (test-equal "Identifier-likes with stuff after keep stuff after"
+ (lex "x 1") (remove-noexpand ((unval resolve-token-stream 1) (make-environment) (lex "x 1")))))
+
+ (test-group "Object likes"
+ (test-equal "Expansion of single token"
+ (lex "10")
+ (remove-noexpand
+ ((unval resolve-token-stream 1)
+ (extend-environment (make-environment)
+ (list (object-like-macro
+ identifier: "x"
+ body: (lex "10"))))
+ (lex "x"))))
+
+ (test-equal "Expansion keeps stuff after"
+ (lex "10 1")
+ (remove-noexpand
+ ((unval resolve-token-stream 1)
+ (extend-environment (make-environment)
+ (list (object-like-macro
+ identifier: "x"
+ body: (lex "10"))))
+ (lex "x 1"))))
+
+ (test-equal "Multiple object like macros in one stream"
+ (lex "10 20")
+ (remove-noexpand
+ ((unval resolve-token-stream 1)
+ (extend-environment (make-environment)
+ (list (object-like-macro
+ identifier: "x"
+ body: (lex "10"))
+ (object-like-macro
+ identifier: "y"
+ body: (lex "20"))))
+ (lex "x y"))))))
+
+
+(test-group "Macro expansion"
+ (test-group "Expand macro part 1"
+ ;; Expand object like macros
+ ;; apply-macro depends on this, but expand macro with function like macros
+ ;; depend on apply-macro, thereby the two parter
+ (test-group "Object like macros"
+ (call-with-values
+ (lambda () (expand-macro (make-environment)
+ (object-like-macro
+ identifier: "x" body: (lex "1 + 2"))
+ '()
+ '()))
+ (lambda (_ tokens) (test-equal "Simplest case" (lex "1 + 2") (remove-noexpand tokens))))
+
+ (call-with-values
+ (lambda () (expand-macro (make-environment)
+ (object-like-macro
+ identifier: "x" body: (lex "1+2"))
+ '()
+ (cdr (lex "x something else"))))
+ (lambda (_ tokens) (test-equal "Expansion with stuff after"
+ (lex "1+2 something else") (remove-noexpand tokens))))
+
+ ;; (call-with-values (expand-macro (make-environment)))
+
+ ))
+
+
+ (test-group "Maybe extend identifier"
+ (test-equal "Non-identifier returns remaining"
+ (lex "x")
+ (remove-noexpand ((unval maybe-extend-identifier 1)
+ (make-environment) "x" '()'())))
+
+ (test-equal "Non-identifiers remaining tokens are returned verbatim"
+ (append (lex "x") (lex "after"))
+ (remove-noexpand ((unval maybe-extend-identifier 1)
+ (make-environment) "x" '() (lex "after"))))
+
+ (test-equal "Object like identifier expands"
+ (lex "1 + 2")
+ (remove-noexpand ((unval maybe-extend-identifier 1)
+ (extend-environment (make-environment)
+ (list
+ (object-like-macro
+ identifier: "x"
+ body: (lex "1 + 2"))))
+ "x"
+ '()
+ '())))
+
+ (test-equal "Object like macro still returns remaining verbatim"
+ (append (lex "1 + 2") (lex "after"))
+ (remove-noexpand ((unval maybe-extend-identifier 1)
+ (extend-environment (make-environment)
+ (list
+ (object-like-macro
+ identifier: "x"
+ body: (lex "1 + 2"))))
+ "x"
+ '()
+ (lex "after"))))
+
+ )
+
+ (test-group "Apply macro"
+ (test-equal "zero arg macro on nothing"
+ (lex "1")
+ (remove-noexpand (apply-macro
+ (make-environment)
+ (function-like-macro identifier: "f"
+ identifier-list: '()
+ body: (lex "1"))
+ '())))
+
+ (test-equal "Single arg macro"
+ (lex "10")
+ (remove-noexpand (apply-macro
+ (make-environment)
+ (function-like-macro identifier: "f"
+ identifier-list: '("x")
+ body: (lex "x"))
+ ((unval parse-parameter-list) (lex "(10)")))))
+
+ (test-equal "Two arg macro"
+ (lex "10 + 20")
+ (remove-noexpand (apply-macro
+ (make-environment)
+ (function-like-macro identifier: "f"
+ identifier-list: '("x" "y")
+ body: (lex "x + y"))
+ ((unval parse-parameter-list) (lex "(10, 20)"))))))
+
+ (test-group "Expand macro part 2"
+ (test-group "Function like macros"
+ (let ((e (make-environment)))
+ (let ((m (function-like-macro
+ identifier: "f"
+ identifier-list: '()
+ body: (lex "1"))))
+ (call-with-values (lambda () (expand-macro e m '() (lex "()")))
+ (lambda (_ tokens*) (test-equal (lex "1") (remove-noexpand tokens*))))
+ (test-error "Arity error for to many args"
+ 'cpp-arity-error (expand-macro e m '() (lex "(10)"))))
+
+ (let ((m (function-like-macro
+ identifier: "f"
+ identifier-list: '("x")
+ variadic?: #t
+ body: (lex "__VA_ARGS__ x"))))
+ (call-with-values (lambda () (expand-macro e m '() (lex "(1)")))
+ (lambda (_ tokens*) (test-equal (lex " 1") (remove-noexpand tokens*))))
+ ;; This doesn't fail, since a single required argument is satisfied by the default nothing
+ #;
+ (test-error "Arity error on too few args (with variadic)"
+ 'cpp-arity-error (expand-macro e m '() (lex "()")))
+ (call-with-values (lambda () (expand-macro e m '() (lex "(1,2,3)")))
+ (lambda (_ tokens*) (test-equal (lex "2,3 1") (remove-noexpand tokens*))))
+ )
+ ))))
+
+(let ((e (make-environment)))
+ (test-group "Resolve token stream with function likes"
+ (test-equal "Macro expanding to its parameter"
+ (lex "0")
+ (remove-noexpand ((unval resolve-token-stream 1)
+ (extend-environment
+ e (list (function-like-macro identifier: "f"
+ identifier-list: '("x")
+ body: (lex "x"))))
+ (lex "f(0)"))))
+
+ (test-equal "Macro expanding parameter multiple times"
+ (lex "(2) * (2)")
+ (remove-noexpand ((unval resolve-token-stream 1)
+ (extend-environment
+ e (list (function-like-macro identifier: "f"
+ identifier-list: '("x")
+ body: (lex "(x) * (x)"))))
+ (lex "f(2)")))
+ )
+
+ (test-equal "Object like contains another object like"
+ (lex "z")
+ (remove-noexpand ((unval resolve-token-stream 1)
+ (extend-environment
+ e (list (object-like-macro identifier: "x"
+ body: (lex "y"))
+ (object-like-macro identifier: "y"
+ body: (lex "z"))))
+ (lex "x"))))
+
+ (test-equal "function like contains another macro"
+ (lex "10")
+ (remove-noexpand ((unval resolve-token-stream 1)
+ (extend-environment
+ e (list (function-like-macro identifier: "f"
+ identifier-list: '("x")
+ body: (lex "g(x)"))
+ (function-like-macro identifier: "g"
+ identifier-list: '("y")
+ body: (lex "y"))))
+ (lex "f(10)"))))
+
+
+ (test-equal "function like containing another macro using the same parameter name"
+ (lex "10")
+ (remove-noexpand ((unval resolve-token-stream 1)
+ (extend-environment
+ e (list (function-like-macro identifier: "f"
+ identifier-list: '("x")
+ body: (lex "g(x)"))
+ (function-like-macro identifier: "g"
+ identifier-list: '("x")
+ body: (lex "x"))))
+ (lex "f(10)"))))
+
+
+
+ (test-equal "function like contains another macro"
+ (lex "10 * 2 + 20 * 2 + 30")
+ (remove-noexpand ((unval resolve-token-stream 1)
+ (extend-environment
+ e (list (function-like-macro identifier: "f"
+ identifier-list: '("x" "y")
+ body: (lex "g(x) + g(y)"))
+ (function-like-macro identifier: "g"
+ identifier-list: '("x")
+ body: (lex "x * 2"))))
+ (lex "f(10, 20) + 30"))))))
+
+(let ((e (extend-environment
+ (make-environment)
+ (list (@ (c preprocessor2) defined-macro)))))
+ (test-group "defined() macro"
+ (test-equal "defined(NOT_DEFINED)"
+ (lex "0") (remove-noexpand ((unval resolve-token-stream 1) e (lex "defined(X)"))))
+ (test-equal "defined(DEFINED)"
+ (lex "1") (remove-noexpand ((unval resolve-token-stream 1)
+ (extend-environment
+ e (list (object-like-macro identifier: "X"
+ body: (lex "10"))))
+ (lex "defined(X)"))))))
+
+
+(let ((env (resolve-define (make-environment)
+ (lex "f(x) x+1"))))
+ (test-assert "New binding added" (in-environment? env "f"))
+ (let ((m (get-identifier env "f")))
+ (test-equal "Macro parameters" '("x") (macro-identifier-list m))
+ (test-equal "Macro body" (lex "x+1") (macro-body m))))
+
+;; This should issue a warning, since the standard requires a space after the ending parenthe here (6.10.3)
+;; (resolve-define (make-environment)
+;; (lex "f(x)x+1"))
+
+(test-group "Recursive macros"
+ (let ((env (resolve-define (make-environment)
+ (lex "x x"))))
+ (test-equal "Macro expanding to itself leaves the token"
+ (mark-noexpand (lex "x") "x")
+ ((unval resolve-token-stream 1) env (lex "x"))))
+
+ ;; Test from C standard 6.10.3.4 p. 4
+ ;; Both the expansion "2*f(9)" and "2*9*g" are valid.
+ ;; The case chosen here is mostly a consequence of how the code works
+ (let ((env (-> (make-environment)
+ (resolve-define (lex "f(a) a*g"))
+ (resolve-define (lex "g(a) f(a)")))))
+ (test-equal "Mutual recursion with two function like macros"
+ (lex "2*f(9)")
+ (remove-noexpand ((unval resolve-token-stream 1) env (lex "f(2)(9)")))))
+
+ (let ((env (-> (make-environment)
+ (resolve-define (lex "f 2 * g"))
+ (resolve-define (lex "g(x) x + f")))))
+ (test-equal "Mutual recursion with object and function like macro"
+ (lex "2 * 10 + f")
+ (remove-noexpand ((unval resolve-token-stream 1) env (lex "f(10)")))))
+
+ (let ((env (-> (make-environment)
+ (resolve-define (lex "x 2*y"))
+ (resolve-define (lex "y 3*x")))))
+ (test-equal "Mutual recursion with two object likes"
+ (lex "2*3*x")
+ (remove-noexpand ((unval resolve-token-stream 1) env (lex "x"))))))
+
+
+
+
+(test-group "Line directive"
+ (let ((e (make-environment)))
+ (test-equal "#line <number>"
+ '(("*outside*" . 9))
+ (cpp-file-stack (handle-line-directive e (lex "10"))))
+ (test-equal "#line <line> <file>"
+ '(("file" . 9))
+ (cpp-file-stack (handle-line-directive e (lex "10 \"file\""))))
+
+ (test-equal "#line <macro>"
+ '(("*outside*" . 9))
+ (cpp-file-stack
+ (handle-line-directive
+ (resolve-define e (lex "x 10"))
+ (lex "x"))))))
+
+
+;; NOTE these tests assume a "regular" Unix system
+(test-group "#include"
+ (test-group "Resolve header paths"
+ (test-equal "Find in path"
+ "/usr/include/stdio.h"
+ (resolve-h-file "stdio.h"))
+
+ (test-error "Fail if not in path"
+ 'cpp-error
+ (resolve-h-file "This file doesn't exist"))
+
+ (test-equal "Q-string with absolute path"
+ "/dev/null"
+ (resolve-q-file "/dev/null"))
+ (test-error "Q-File fails for missing file"
+ 'cpp-error (resolve-q-file "This file doesn't exists"))
+
+ (test-equal "Q-strings also look in path"
+ "/usr/include/stdio.h"
+ (resolve-q-file "stdio.h")))
+
+ (test-group "resolve-header returns paths from pp tokens (from #include directive)"
+ (test-equal "H-string"
+ "/usr/include/stdio.h"
+ (resolve-header (make-environment)
+ (lex "<stdio.h>")))
+ (test-equal "Q-string"
+ "/usr/include/stdio.h"
+ (resolve-header (make-environment)
+ (lex "\"stdio.h\""))))
+
+ ;; TODO #include is subject to macro expansion
+
+ (test-group "Actually including stuff"
+ (call-with-tmp-header "
+#define X 10
+int x;
+" (lambda (filename)
+ (test-equal "Include through #include"
+ (lex "int x;\n\n10")
+ (run (format #f "
+#include \"~a\"
+X
+" filename))))))
+
+ ;; NOTE should really be below "regular" __LINE__ tests
+ (call-with-tmp-header "__LINE__" (lambda (path)
+ (test-equal "__LINE__ in other file"
+ (lex "1")
+ (run (format #f "#include \"~a\"\n" path))))))
+
+
+
+
+
+(call-with-values (lambda ()
+ (handle-preprocessing-tokens (make-environment)
+ (lex "1")))
+ (lambda (env tokens)
+ (test-equal "Simplest case" (lex "1") tokens)))
+
+
+
+(test-equal "Define"
+ (lex "1")
+ (run "
+#define x 1
+x"))
+
+(test-group "__LINE__ and __FILE__"
+ (test-group "__LINE__"
+ (test-equal "only __LINE__"
+ (lex "1")
+ (run "__LINE__"))
+
+ (test-equal "__LINE__ after linebreak"
+ (lex "2")
+ (run "\n__LINE__"))
+
+
+ (test-equal "__LINE__ through macro"
+ (lex "5")
+ (drop-whitespace-both (run " // 1
+#define x __LINE__ // 2
+// 3
+// 4
+x // 5"))
+ )
+
+ (test-equal "__LINE__ standalone"
+ (lex "5")
+ (drop-whitespace-both
+ (run " // 1
+// 2
+// 3
+// 4
+__LINE__"))))
+
+ (test-equal "__FILE__"
+ (lex "\"sample-file.c\"")
+ (run "__FILE__" (enter-file (make-environment) "sample-file.c")))
+
+ (test-group "#line"
+ (test-equal "Updating line"
+ (lex "10")
+ (run "#line 10\n__LINE__"))
+
+ (test-equal "Updating line and file"
+ (lex "10 \"file.c\"")
+ (run "#line 10 \"file.c\"\n__LINE__ __FILE__"))
+ )
+ )
+
+
+
+(test-group "expand##"
+ (test-error 'cpp-error (expand## (lex "a ##")))
+ (test-error 'cpp-error (expand## (lex "## a")))
+ (test-error 'cpp-error (expand## (lex "##")))
+ (test-equal (lex "ab") (expand## (lex "a ## b")))
+ )
+
+(test-group "Token concatenation"
+
+
+ (test-equal "Token concatenation in function like macro"
+ (lex "ab")
+ (run "
+#define f() a ## b
+f()"))
+
+ (test-equal "token concatentanion in object like macro"
+ (lex "ab")
+ (run "
+#define x a ## b
+x"))
+
+ (test-equal "Token concatenation with parameter"
+ (lex "ab")
+ (run "
+#define f(x) x ## b
+f(a)"))
+
+
+ ;; 6.10.3.3 p. 4
+ (test-equal "x ## y"
+ (lex "char p[] = \"x ## y\"")
+ (run "
+#define hash_hash # ## #
+#define mkstr(a) # a
+#define in_between(a) mkstr(a)
+#define join(c, d) in_between(c hash_hash d)
+
+char p[] = join(x, y)")))
+
+(test-group "__VA_ARGS__"
+ (test-equal "__VA_ARGS__ split its arguments"
+ (lex "1")
+ (run "
+#define fst(x, y) x
+#define f(...) fst(__VA_ARGS__)
+f(1,2)
+"))
+
+ (test-equal
+ "Stringify __VA_ARGS__"
+ (lex "\"1,2\"")
+ (run "
+#define g(...) #__VA_ARGS__
+g(1,2)
+"))
+
+ (test-equal "__VA_ARGS__ keep whitespace"
+ (lex "x, y")
+ (run "
+#define args(...) __VA_ARGS__
+args(x, y)
+"))
+
+ (test-equal "Concat with __VA_ARGS__"
+ (lex "fx,y")
+ (run "
+#define wf(...) f ## __VA_ARGS__
+wf(x,y)
+"))
+
+ (test-equal
+ "Concat with __VA_ARGS__ (keeping whitespace)"
+ (lex "fx, y")
+ (run "
+#define wf(...) f ## __VA_ARGS__
+wf(x, y)
+")))
+
+
+(test-equal "Usage before #define"
+ (lex "X")
+ (run "X
+
+#define X 100"))
+
+(test-equal "#undef"
+ (lex "X\n10\nX")
+ (run "
+X
+#define X 10
+X
+#undef X
+X
+")
+ )
+
+(test-error "#error directive"
+ 'cpp-error-directive
+ (run "#error anything goes here"))
+
+(test-error "#error without body"
+ 'cpp-error-directive
+ (run "#error"))
+
+(test-group "Pragma"
+ (test-group "#pragma"
+ (test-equal "#Pragma STDC FP_CONTRACT ON"
+ (with-output-to-string (lambda () (run "#pragma STDC FP_CONTRACT ON")))))
+
+ (test-group "_Pragma"
+ (let ((e (extend-environment (make-environment)
+ (list
+ (@ (c preprocessor2) _Pragma-macro)))))
+ (test-equal "#Pragma STDC FP_CONTRACT ON"
+ (with-output-to-string
+ (lambda () (run "_Pragma(\"STDC FP_CONTRACT ON\")" e))))
+
+ ;; 6.10.9 example
+ (test-group "Non-standard #Pragma: listing on \"..\\\\listing.dir\""
+ ;; source: LISTING( ..\listing.dir )
+ ;; dest: _Pragma( "listing on \"..\\listing.dir\"")
+
+ (test-equal "Dry-run"
+ "pragma(\"listing on \\\"..\\\\\\\\listing.dir\\\"\")"
+ (unlex (run "
+#define LISTING(x) PRAGMA(listing on #x)
+#define PRAGMA(x) pragma(#x)
+LISTING(..\\listing.dir)"))
+ )
+
+ (test-equal "With _Pragma"
+ "Non-standard #Pragma: listing on \"..\\\\listing.dir\""
+ (with-output-to-string
+ (lambda ()
+ (run "
+#define LISTING(x) PRAGMA(listing on #x)
+#define PRAGMA(x) _Pragma(#x)
+LISTING(..\\listing.dir)
+" e))))))
+ ))
+
+(test-group "Next token matches?"
+ (test-assert "Zero tokens never match" (not (next-token-matches? (const #t) '())))
+
+ (test-assert "Non-matching token"
+ (not (next-token-matches? punctuator-token? (lex "x+y"))))
+
+ (test-assert "Maching token"
+ (next-token-matches? identifier-token? (lex "x+y")))
+
+ (test-assert "Matching token, after whitespace"
+ (next-token-matches? identifier-token? (lex " \n x + y"))))
+
+
+(test-equal "Function likes aren't expanded if not followed by a left parenthese"
+ (lex "f")
+ (run "
+#define f(x)
+f"))
+
+(test-equal "Parameter expansion times"
+ (lex "fx fy") (run "
+#define fw(x) f ## x
+#define ffw(x) fw(x)
+#define x y
+fw(x) ffw(x)
+"))
+
+(test-equal (lex "(5 + 10)") (run "
+#define x 10
+#define f(a) a
+#define g h
+#define h(x) (x + 10)
+f(g)(5)"))
+
+
+;; (expand-macro
+;; (extend-environment
+;; (make-environment)
+;; (list (object-like-macro identifier: "g"
+;; body: (lex "h"))
+;; (function-like-macro identifier: "h"
+;; identifier:-list '("x")
+;; body: (lex "(x + 10)"))))
+;; (function-like-macro identifier: "f"
+;; identifier:-list '("a")
+;; body: (lex "a"))
+;; '()
+;; (lex "(g)(5)"))
+
+;; ;; ⇒ #<<cpp-environment> cpp-if-status: (outside) cpp-variables: #<hash-table 7f6f5974d6a0 2/31> cpp-file-stack: (("*outside*" . 1))>
+;; ⇒ (#<<lexeme> type: preprocessing-token body: (identifier "h") noexpand: ("f" "h")>
+;; #<<lexeme> type: preprocessing-token body: (punctuator "(") noexpand: ()>
+;; #<<lexeme> type: preprocessing-token body: (pp-number "5") noexpand: ()>
+;; #<<lexeme> type: preprocessing-token body: (punctuator ")") noexpand: ()>)
+
+(test-equal "non-adjacent parameter list"
+ (lex "2*10")
+ (run "
+#define f(x) 2*x
+f (10)"))
+
+(test-equal "parameter-list on own line"
+ (lex "2*10")
+ (run "
+#define f(x) 2*x
+f
+
+
+(10)"))
+
+
+(test-group "6.10.3.5 Scope of macro definitions"
+
+ (test-equal "Example 3, except part below"
+ (unlex-aggressive (lex "f(2 * (y+1)) + f(2 * (f(2 * (z[0])))) % f(2 * (0)) + t(1);
+f(2 * (2+(3,4)-0,1)) | f(2 * (~ 5)) & ^m(0,1);
+int i[] = { 1, 23, 4, 5, };
+char c[2][6] = { \"hello\", \"\" };"))
+ (unlex-aggressive (run "
+#define x 3
+#define f(a) f(x * (a))
+#undef x
+#define x 2
+#define g f
+#define z z[0]
+#define h g(~
+#define m(a) a(w)
+#define w 0,1
+#define t(a) a
+#define p() int
+#define q(x) x
+#define r(x,y) x ## y
+#define str(x) # x
+
+f(y+1) + f(f(z)) % t(t(g)(0) + t)(1);
+g(x+(3,4)-w) | h 5) &
+ ^m(m);
+p() i[q()] = { q(1), r(2,3), r(4,), r(,5), r(,) };
+char c[2][6] = { str(hello), str() };"))
+ )
+
+ (test-group "Example 3"
+ (test-equal "Subtest 1, is result of function application further macro expanded?"
+ (unlex-aggressive (lex "f(2 * (0,1))"))
+ ((unval handle-preprocessing-tokens 1) (make-environment) (tokenize "
+#define m(a) a(0,1)
+#define f(a) f(2 * (a))
+m(f)")))
+
+
+ (test-equal "True test"
+ (unlex-aggressive (lex "f(2 * (y+1)) + f(2 * (f(2 * (z[0])))) % f(2 * (0)) + t(1);
+f(2 * (2+(3,4)-0,1)) | f(2 * (~ 5)) & f(2 * (0,1))^m(0,1);
+int i[] = { 1, 23, 4, 5, };
+char c[2][6] = { \"hello\", \"\" };"))
+ (unlex-aggressive (run "
+#define x 3
+#define f(a) f(x * (a))
+#undef x
+#define x 2
+#define g f
+#define z z[0]
+#define h g(~
+#define m(a) a(w)
+#define w 0,1
+#define t(a) a
+#define p() int
+#define q(x) x
+#define r(x,y) x ## y
+#define str(x) # x
+
+f(y+1) + f(f(z)) % t(t(g)(0) + t)(1);
+g(x+(3,4)-w) | h 5) & m
+ (f)^m(m);
+p() i[q()] = { q(1), r(2,3), r(4,), r(,5), r(,) };
+char c[2][6] = { str(hello), str() };"))))
+
+ ;; TODO Example 4 skipped due to #include in output
+
+ (test-equal "Example 5"
+ (unlex-aggressive (lex "int j[] = { 123, 45, 67, 89, 10, 11, 12, };"))
+ (unlex-aggressive (run "
+#define t(x,y,z) x ## y ## z
+int j[] = { t(1,2,3), t(,4,5), t(6,,7), t(8,9,), t(10,,), t(,11,), t(,,12), t(,,) };")))
+
+ (test-group "Example 6"
+ (test-assert "Valid redefinitions"
+ (run "
+#define OBJ_LIKE (1-1)
+#define OBJ_LIKE /* */ (1+1) /* */
+#define FUNC_LIKE(a) ( a )
+#define FUNC_LIKE( a )( /* */ \\
+ a /*
+ */ )"))
+
+ (test-error "Invalid redefinitions"
+ 'misc-error
+ (run "
+#define OBJ_LIKE (0)
+#define OBJ_LIKE (1 - 1)
+#define FUNC_LIKE(b) ( a )
+#define FUNC_LIKE(b) ( b )
+")))
+
+ (test-equal "Example 7"
+ (unlex-aggressive (lex "fprintf(stderr, \"Flag\");
+fprintf(stderr, \"X = %d\\n\", x);
+puts(\"The first, second, and third items.\");
+((x>y)?puts(\"x>y\"):
+ printf(\"x is %d but y is %d\", x, y));"))
+ (unlex-aggressive (run "
+#define debug(...) fprintf(stderr, __VA_ARGS__)
+#define showlist(...) puts(#__VA_ARGS__)
+#define report(test, ...) ((test)?puts(#test):\\
+ printf(__VA_ARGS__))
+debug(\"Flag\");
+debug(\"X = %d\\n\", x);
+showlist(The first, second, and third items.);
+report(x>y, \"x is %d but y is %d\", x, y);
+"))))
+
+
+(test-group "Misc"
+ (test-equal "Null directive"
+ (lex "1\n2")
+ (run "
+1
+#
+2"))
+
+ (test-error "Invalid directive"
+ 'cpp-error
+ (run "# invalid"))
+ )
+
+
+(test-group "Conditionals"
+ (test-group "ifdef"
+ (test-equal "#ifdef on non-defined"
+ (lex "")
+ (run "
+#ifdef X
+x
+#endif"))
+
+ (test-equal "#ifdef on defined"
+ (lex "x")
+ (run "
+#define X
+#ifdef X
+x
+#endif")))
+
+ (test-group "ifndef"
+ (test-equal "#ifndef on non-defined"
+ (lex "x")
+ (run "
+#ifndef X
+x
+#endif"))
+
+ (test-equal "#ifndef on defined"
+ (lex "")
+ (run "
+#define X
+#ifndef X
+x
+#endif
+")))
+
+ (test-group "else"
+ (test-equal "else from active to inactive"
+ (lex "1")
+ (run "
+#ifndef X
+1
+#else
+2
+#endif"))
+
+ (test-equal "else from inactive to active"
+ (lex "2")
+ (run "
+#ifdef X
+1
+#else
+2
+#endif")))
+
+ (test-assert "Pre-processing directives are ignored in non-active paths"
+ (run "
+#ifdef X
+#error
+#endif"))
+
+ ;; Should hold for all tokens, but _Pragma is the only one with observable
+ ;; side effects
+ (test-equal "Tokens aren't expanded in non-active paths"
+ ""
+ (with-output-to-string
+ (lambda ()
+ (run "
+#ifdef X
+_Pragma(\"not-called\")
+#endif"))))
+
+
+ (test-equal "Nested conditions"
+ (lex "a\n\nc")
+ (run "
+#define X
+#ifdef X
+a
+#ifdef Y
+b
+#endif
+c
+#endif
+"))
+
+ (test-equal
+ (lex "")
+ (run "
+#ifdef X
+a
+#ifdef Y
+b
+#endif /* Y */
+c
+#endif /* X */
+"))
+
+ (test-group "Unexpected if ends"
+ (test-error "#else outside if"
+ 'cpp-error (run "#else"))
+ (test-error "#endif outside if"
+ 'cpp-error (run "#endif"))
+ (test-error "#elif outside if"
+ 'cpp-error (run "#elif")))
+
+ (test-group "if"
+ (test-equal "Simple positive if"
+ (lex "x")
+ (run "
+#if 1
+x
+#endif"))
+
+ (test-equal "Simple negative if"
+ (lex "")
+ (run "
+#if 0
+x
+#endif"))
+
+ (test-equal "Elif isn't run when if is true"
+ (lex "a")
+ (run "
+#if 1
+a
+#elif 1
+b
+#endif"))
+
+ (test-equal "elif is run when if is false"
+ (lex "b")
+ (run "
+#if 0
+a
+#elif 1
+b
+#endif"))
+
+ ;; Note that defined is automatically added to the environment when
+ ;; evaluating #if.
+
+ (test-equal "#if with defined"
+ (lex "a")
+ (run "
+#define X
+#if defined(X)
+a
+#else
+b
+#endif")
+ )
+
+ (test-equal "#if with negative defined"
+ (lex "b")
+ (run "
+#if defined(X)
+a
+#else
+b
+#endif"))
+
+ ;; TODO test advanced constant expression
+ ))
diff --git a/tests/test/cpp/to-token.scm b/tests/test/cpp/to-token.scm
new file mode 100644
index 00000000..b633ce12
--- /dev/null
+++ b/tests/test/cpp/to-token.scm
@@ -0,0 +1,65 @@
+(define-module (test cpp to-token)
+ :use-module (srfi srfi-64)
+ :use-module (srfi srfi-64 test-error)
+ :use-module (srfi srfi-88)
+ :use-module (c to-token)
+ :use-module ((system base lalr)
+ :select (lexical-token-category
+ lexical-token-value))
+ :use-module ((c lex2) :select (lex))
+ )
+
+(test-group "string tokens"
+ (let ((v (preprocessing-token->token (car (lex "\"Hello\"")))))
+ (test-equal 'string-literal (lexical-token-category v))
+ (test-equal #vu8(#x48 #x65 #x6C #x6C #x6F 0) (lexical-token-value v))
+ ;; TODO prefixes
+ ))
+
+(test-group "identifier tokens"
+ (let ((v (preprocessing-token->token (car (lex "hello")))))
+ (test-equal 'identifier (lexical-token-category v))
+ (test-equal 'hello (lexical-token-value v))))
+
+(test-group "keywords"
+ (test-equal 'auto (preprocessing-token->token (car (lex "auto")))))
+
+(test-group "numbers"
+ (test-group "Integers"
+ (test-group "Base-10"
+ (let ((v (preprocessing-token->token (car (lex "1")))))
+ (test-equal 'constant (lexical-token-category v))
+ (test-equal 1 (lexical-token-value v))))
+
+ (test-equal "Base-16"
+ 16 (lexical-token-value (preprocessing-token->token (car (lex "0x10")))))
+ (test-equal "Base-8"
+ 8 (lexical-token-value (preprocessing-token->token (car (lex "010")))))
+ (test-group "Suffixes"
+ 'TODO
+ ))
+
+ ;; TODO floats
+ )
+
+(test-group "character constants"
+ (let ((v (preprocessing-token->token (car (lex "'a'")))))
+ (test-equal 'constant (lexical-token-category v))
+ (test-equal (char->integer #\a) (lexical-token-value v)) )
+ (let ((v (preprocessing-token->token (car (lex "'ab'")))))
+ (test-equal 'constant (lexical-token-category v))
+ (test-equal (char->integer #\b) (lexical-token-value v)))
+ (let ((v (preprocessing-token->token (car (lex "'\\x41'")))))
+ (test-equal 'constant (lexical-token-category v))
+ (test-equal #x41 (lexical-token-value v)))
+ ;; (lex "'\\x4142'")
+ ;; (lex "'L\\x4142'")
+ )
+
+(test-group "punctuators"
+ (test-equal '+ (preprocessing-token->token (car (lex "+"))))
+ (test-equal 'lbrace (preprocessing-token->token (car (lex "{")))))
+
+(test-group "other"
+ (test-error 'cpp-error (preprocessing-token->token (car (lex " ")))))
+
diff --git a/tests/test/cpp/util.scm b/tests/test/cpp/util.scm
new file mode 100644
index 00000000..8329294a
--- /dev/null
+++ b/tests/test/cpp/util.scm
@@ -0,0 +1,14 @@
+(define-module (test cpp util)
+ :use-module (srfi srfi-64)
+ :use-module (srfi srfi-88)
+ :use-module (c cpp-util)
+ :use-module ((c lex2) :select (lex lexeme)))
+
+(test-group "Merge string literals"
+ (test-equal "To simple strings"
+ (list (lexeme type: 'preprocessing-token
+ body: '(string-literal (encoding-prefix) "Hello" "World")))
+ (merge-string-literals (lex "\"Hello\"\"World\"")))
+
+ ;; TODO tests with prefixes
+ )
diff --git a/tests/test/datetime.scm b/tests/test/datetime.scm
index 2a5ac141..f73a0ad2 100644
--- a/tests/test/datetime.scm
+++ b/tests/test/datetime.scm
@@ -70,45 +70,44 @@
(test-error "Invalid second" 'wrong-type-arg (time second: #f))))
(test-group "Datetime"
- (let ((get-time% (@@ (datetime) get-time%)))
+ (let ()
(test-group "Empty datetime"
(let ((dt (datetime)))
- ;; TODO figure out propper export of get-time%
- (test-assert "Datetime date is date" (date? (get-date dt)))
- (test-assert "Datetime date is zero" (date-zero? (get-date dt)))
- (test-assert "Datetime time is time" (time? (get-time% dt)))
- (test-assert "Datetime time is zero" (time-zero? (get-time% dt)))
- (test-eqv "Defalut timezone is #f" #f (get-timezone dt))))
+ (test-assert "Datetime date is date" (date? (datetime-date dt)))
+ (test-assert "Datetime date is zero" (date-zero? (datetime-date dt)))
+ (test-assert "Datetime time is time" (time? (datetime-time dt)))
+ (test-assert "Datetime time is zero" (time-zero? (datetime-time dt)))
+ (test-eqv "Defalut timezone is #f" #f (tz dt))))
(test-group "Datetime with keys"
(let ((dt (datetime date: (date day: 10)
time: (time minute: 20))))
(test-equal "Given date is stored"
- 10 (day (get-date dt)))
+ 10 (day (datetime-date dt)))
(test-equal "Given time is stored"
- 20 (minute (get-time% dt))))
+ 20 (minute (datetime-time dt))))
(test-error "Date must be a date" 'wrong-type-arg (datetime date: 1))
(test-error "Date must be a date" 'wrong-type-arg (datetime date: (time)))
- (test-assert "Date: #f gives still constructs a date" (date? (get-date (datetime date: #f))))
+ (test-assert "Date: #f gives still constructs a date" (date? (datetime-date (datetime date: #f))))
(test-error "Time must be a time" 'wrong-type-arg (datetime time: 1))
(test-error "Time must be a time" 'wrong-type-arg (datetime time: (date)))
- (test-assert "Time: #f gives still constructs a time" (time? (get-time% (datetime time: #f))))
+ (test-assert "Time: #f gives still constructs a time" (time? (datetime-time (datetime time: #f))))
(let ((dt (datetime hour: 20 day: 30)))
- (test-equal "Time objects can be implicitly created" 20 (hour (get-time% dt)))
- (test-equal "Date objects can be implicitly created" 30 (day (get-date dt))))
+ (test-equal "Time objects can be implicitly created" 20 (hour (datetime-time dt)))
+ (test-equal "Date objects can be implicitly created" 30 (day (datetime-date dt))))
(let ((dt (datetime day: 30 time: (time hour: 20))))
(test-equal "\"Upper\" and \"lower\" keys can be mixed"
- 20 (hour (get-time% dt)))
+ 20 (hour (datetime-time dt)))
(test-equal "\"Upper\" and \"lower\" keys can be mixed"
- 30 (day (get-date dt))))
+ 30 (day (datetime-date dt))))
(let ((dt (datetime hour: 30 time: (time hour: 20))))
(test-equal "time: has priority over hour: (and the like)"
- 20 (hour (get-time% dt)))))
+ 20 (hour (datetime-time dt)))))
(let ((dt (datetime day: 30 date: (date day: 20))))
(test-equal "date: has priority over day: (and the like)"
- 20 (day (get-date dt)))))))
+ 20 (day (datetime-date dt)))))))
;; Before the general parser, since it's a dependency string->datetime.
(test-group "Parse Month"
@@ -384,7 +383,7 @@
(test-assert "Current datetime returns a datetime"
(datetime? (current-datetime)))
(test-equal "Current datetime returns with tz: UTC"
- "UTC" (get-timezone (current-datetime)))
+ "UTC" (tz (current-datetime)))
(test-assert "Current-date returns a date"
(date? (current-date)))
@@ -707,6 +706,11 @@ date-range
(not (datetime< (datetime day: 1) (datetime day: 2) (datetime day: 1)))))))
;; TODO
+date<=
+time<=
+datetime<=
+
+;; TODO
date/-time< date/-time<? date/-time<= date/-time<=?
date/-time> date/-time>? date/-time>= date/-time>=?
diff --git a/tests/test/lens.scm b/tests/test/lens.scm
new file mode 100644
index 00000000..0797e3aa
--- /dev/null
+++ b/tests/test/lens.scm
@@ -0,0 +1,21 @@
+(define-module (test lens)
+ :use-module (srfi srfi-64)
+ :use-module (srfi srfi-64 test-error)
+ :use-module (srfi srfi-88)
+ :use-module (hnh util lens))
+
+
+(define first (ref 0))
+
+(test-equal '((1)) (first '(((1)))))
+(test-equal '((2)) (set '(((1))) (compose-lenses first first) 2))
+(test-equal '(((2))) (set '(((1))) (compose-lenses first first first) 2))
+
+
+;; (list-change (iota 10) 5 'Hello)
+;; => (0 1 2 3 4 Hello 6 7 8 9)
+
+(test-equal '(1 (10) 3) (set '(1 (2) 3) (compose-lenses (ref 1) (ref 0)) 10))
+(test-equal '(1 (10) 3) (set '(1 (2) 3) (ref 1) (ref 0) 10))
+
+;; (set (list (iota 10)) first first 11)
diff --git a/tests/test/object.scm b/tests/test/object.scm
new file mode 100644
index 00000000..701c45c0
--- /dev/null
+++ b/tests/test/object.scm
@@ -0,0 +1,80 @@
+(define-module (test object)
+ :use-module (srfi srfi-64)
+ :use-module (srfi srfi-64 test-error)
+ :use-module (srfi srfi-88)
+ :use-module (hnh util object)
+ :use-module ((hnh util) :select (->)))
+
+(define-type (f) x)
+
+(test-group "Created procedures"
+ (test-assert "Constructor" (procedure? f))
+ (test-assert "Predicate" (procedure? f?))
+ (test-assert "Field access" (procedure? x)))
+
+;; (f)
+;; (f x: 10)
+;; (f? (f))
+
+(test-equal "Accessors are getters"
+ 10 (x (f x: 10)))
+(test-assert "Accessors update, returning a object of the original type"
+ (f? (x (f x: 10) 20)))
+(test-equal "A get after an update returns the new value"
+ 20 (-> (f x: 10)
+ (x 20)
+ x))
+
+
+(define-type (g) x)
+
+(test-assert "Second type can be created"
+ (g x: 10))
+
+(test-assert "Second type isn't first type"
+ (not (f? (g x: 10))))
+
+(test-assert "First type isn't second type"
+ (not (g? (f x: 10))))
+
+;; Tests that the old x gets shadowed
+;; (test-equal 10 (x (f x: 10)))
+;; (test-equal 10 (x (g x: 10)))
+
+;; field-level arguments
+;; - init:
+(define-type (f2) (f2-x default: 0 type: integer?))
+(test-equal 0 (f2-x (f2)))
+
+;; - type:
+
+(test-error "Giving an invalid type to the constructor throws an error"
+ 'wrong-type-arg (f2 f2-x: 'hello))
+(test-error "Giving an invalid type to a setter throws an error"
+ 'wrong-type-arg (f2-x (f2) 'hello))
+(test-equal "The error includes the name of the field, the expected type, and the given value"
+ '(f2-x integer? hello)
+ (catch 'wrong-type-arg (lambda () (f2-x (f2) 'hello))
+ (lambda (err proc fmt args data) args)))
+
+(test-equal "Typed setter updates the value"
+ (f2 f2-x: 10) (f2-x (f2) 10))
+
+;; type-level arguments
+;; - constructor:
+(define-type (f3 constructor: (lambda (make check)
+ (lambda* (#:key f3-x f3-y)
+ (check f3-x f3-y)
+ (make f3-x f3-y))))
+ (f3-x type: integer?)
+ (f3-y type: string?))
+
+(test-assert "Custom constructors create objcets"
+ (f3? (f3 f3-x: 10 f3-y: "Hello")))
+
+(test-error "Bad arguments to custom constructor"
+ 'wrong-type-arg (f3 f3-x: 'hello f3-y: 'world))
+
+;; - printer:
+(define-type (f4 printer: (lambda (r p) (display "something" p))))
+(test-equal "something" (with-output-to-string (lambda () (write (f4)))))
diff --git a/tests/test/recurrence-advanced.scm b/tests/test/recurrence-advanced.scm
index a291cc17..56f4cda6 100644
--- a/tests/test/recurrence-advanced.scm
+++ b/tests/test/recurrence-advanced.scm
@@ -27,6 +27,7 @@
:use-module ((datetime)
:select (parse-ics-datetime
datetime
+ datetime-date
time
date
datetime->string))
diff --git a/tests/test/util.scm b/tests/test/util.scm
index 1de96a37..5e2aab4e 100644
--- a/tests/test/util.scm
+++ b/tests/test/util.scm
@@ -187,6 +187,34 @@
(test-error 'wrong-type-arg (find-extreme '()))
+;; TODO group-by
+;; TODO split-by
+
+(test-group "Split-by-one-of"
+
+ (test-equal "Empty input"
+ '(()) (split-by-one-of '() '(+)))
+
+ (test-equal "No matching tokens"
+ '((1 + 2)) (split-by-one-of '(1 + 2) '(/)))
+
+ (test-equal "Matching tokens"
+ '((1) (+ 2) (- 3))
+ (split-by-one-of '(1 + 2 - 3) '(+ -)))
+
+ (test-equal "Maching tokens, multiple values in each group"
+ '((1 + 2) (* 3 + 4))
+ (split-by-one-of '(1 + 2 * 3 + 4) '(*))))
+
+
+(test-group "break/all"
+ (test-equal '((a b c)) (break/all (const #f) '(a b c)))
+ (test-equal '(()) (break/all (const #t) '()))
+ (test-equal '(() () () ()) (break/all (const #t) '(a b c)))
+ (test-equal '((a b) (c d)) (break/all number? '(a b 1 c d)))
+ (test-equal '(() ()) (break/all number? '(1))))
+
+
(call-with-values
(lambda ()
(span-upto