aboutsummaryrefslogtreecommitdiff
path: root/module/c/old/lex.scm
diff options
context:
space:
mode:
Diffstat (limited to 'module/c/old/lex.scm')
-rw-r--r--module/c/old/lex.scm163
1 files changed, 163 insertions, 0 deletions
diff --git a/module/c/old/lex.scm b/module/c/old/lex.scm
new file mode 100644
index 00000000..dcc7336d
--- /dev/null
+++ b/module/c/old/lex.scm
@@ -0,0 +1,163 @@
+(define-module (c old lex)
+ :use-module (ice-9 peg)
+ :use-module (c old operators)
+ :export (lex))
+
+
+;; Like define-peg-pattern, but body is evaluated
+(define-syntax define-peg-pattern*
+ (lambda (stx)
+ (syntax-case stx ()
+ ((_ sym accum pat)
+ #`(define sym
+ (let ((matchf (compile-peg-pattern (datum->syntax #'stx pat) 'accum)))
+ (let ((syn ((@ (ice-9 peg codegen) wrap-parser-for-users) #'stx matchf 'accum 'sym)))
+ ((@ (system base compile) compile)
+ ((@ (ice-9 peg cache) cg-cached-parser)
+ syn)))))))))
+
+
+
+
+(define-peg-pattern base-8-digit body
+ (range #\0 #\7))
+
+(define-peg-pattern base-10-digit body
+ (range #\0 #\9))
+
+(define-peg-pattern base-16-digit body
+ (or (range #\0 #\9)
+ (range #\A #\F)
+ (range #\a #\f)))
+
+;; https://en.cppreference.com/w/cpp/language/integer_literal
+(define-peg-pattern base-10 all (+ base-10-digit))
+(define-peg-pattern base-8 all (and (ignore "0") (+ base-8-digit)))
+(define-peg-pattern base-16 all (and (ignore (and "0" (or "x" "X")))
+ (+ base-16-digit)))
+
+;; accept anything now, ensure correctnes later
+(define-peg-pattern integer-suffix all
+ (* (or "u" "U" "l" "L")))
+
+(define-peg-pattern integer all
+ (and (or base-8 base-16 base-10) (? integer-suffix)))
+
+(define-peg-pattern float-suffix all
+ (* (or "f" "F" "l" "L")))
+
+(define-peg-pattern exponent all
+ (and (ignore (or "e" "E")) (? (or "+" "-")) base-10))
+
+;; Helper patterns for creating named groups in float
+(define-peg-pattern float-integer all base-10)
+(define-peg-pattern float-decimal all base-10)
+
+(define-peg-pattern float all
+ (or (and float-integer exponent (? float-suffix))
+ (and (? float-integer) (ignore ".") float-decimal (? exponent) (? float-suffix))
+ (and float-integer (ignore ".") (? exponent) (? float-suffix))))
+
+(define-peg-pattern number body
+ (or float integer))
+
+(define-peg-pattern group all
+ (and (ignore "(") expr (ignore ")")))
+
+(define-peg-pattern base-8-char all
+ (and base-8-digit
+ (? base-8-digit)
+ (? base-8-digit)))
+
+(define-peg-pattern base-16-char all
+ (and (ignore "x") base-16-digit (? base-16-digit)))
+
+(define-peg-pattern escaped-char all
+ (and (ignore "\\") (or base-16-char
+ base-8-char
+ peg-any)))
+
+(define-peg-pattern char all
+ (and (ignore "'") (or escaped-char peg-any) (ignore "'")))
+
+(define-peg-pattern quot none "\"")
+
+(define-peg-pattern string all
+ (and quot (* (or escaped-char (and (not-followed-by "\"") peg-any))) quot))
+
+(define-peg-pattern* operator all
+ `(or ,@(map symbol->string symbol-binary-operators)
+ ,@(map (lambda (op) `(and ,(symbol->string op) ws))
+ wordy-binary-operators)
+ "?" ":"))
+
+;; whitespace
+(define-peg-pattern ws none
+ (or " " " " "\n"))
+
+;; space (for when whitespace is optional)
+(define-peg-pattern sp none (* ws))
+
+(define-peg-pattern safe-letter body
+ (or "_"
+ (range #\A #\Z)
+ (range #\a #\z)))
+
+(define-peg-pattern variable all
+ (and safe-letter
+ (* (or safe-letter
+ base-10-digit))))
+
+(define-peg-pattern prefix-operator all
+ ;; It's important that ++ and -- are BEFORE + and -
+ ;; otherwise the first + is found, leaving the second +, which fails
+ ;; to lex since it's an invalid token
+ ;; TODO sizeof can be written as a prefix operator
+ ;; (without parenthesis) if the operand is an expression.
+ (or "*" "&" "++" "--"
+ "!" "~" "+" "-"))
+
+
+;;; Note that stacked pre or postfix operators without parenthesis
+;;; dosen't work. So `*&C' is invalid, while `*(&C)' is valid.
+
+(define-peg-pattern prefix all
+ (and prefix-operator sp (or variable group funcall literal)))
+
+(define-peg-pattern postfix-operator all
+ (or "++" "--" "*"))
+
+(define-peg-pattern postfix all
+ ;; literals can't be in-place incremented and decremented
+ ;; Make sure we don't match postfix-operator here, since
+ ;; that also gives us an infinite loop.
+ (and (or prefix funcall group variable) sp postfix-operator))
+
+(define-peg-pattern infix all
+ ;; first case is "same" as expr, but in different order to prevent
+ ;; infinite self reference. Pre and postfix not here, solved by having
+ ;; them before infix in expr
+ (and (or funcall postfix prefix group literal variable)
+ sp operator sp expr))
+
+(define-peg-pattern funcall all
+ (and variable sp group))
+
+(define-peg-pattern literal body
+ (or char string number))
+
+;;; main parser
+(define-peg-pattern expr body
+ (+ (and sp (or
+ ;; float must be BEFORE infix, otherwise 3.2 is parsed as (infix 3 (operator ".") 2)
+ ;; that however breaks the infix logic, meaning that floating point numbers can't be
+ ;; used in basic arithmetic.
+ ;; TODO remove all implicit order of operations handling in the lexer, and move it to
+ ;; the parser. This should also fix the case of typecasts being applied incorrectly.
+ float
+ infix postfix prefix funcall group literal variable)
+ sp)))
+
+
+(define (lex string)
+ (peg:tree (match-pattern expr string)))