diff options
Diffstat (limited to 'module/c/old/lex.scm')
-rw-r--r-- | module/c/old/lex.scm | 163 |
1 files changed, 163 insertions, 0 deletions
diff --git a/module/c/old/lex.scm b/module/c/old/lex.scm new file mode 100644 index 00000000..dcc7336d --- /dev/null +++ b/module/c/old/lex.scm @@ -0,0 +1,163 @@ +(define-module (c old lex) + :use-module (ice-9 peg) + :use-module (c old operators) + :export (lex)) + + +;; Like define-peg-pattern, but body is evaluated +(define-syntax define-peg-pattern* + (lambda (stx) + (syntax-case stx () + ((_ sym accum pat) + #`(define sym + (let ((matchf (compile-peg-pattern (datum->syntax #'stx pat) 'accum))) + (let ((syn ((@ (ice-9 peg codegen) wrap-parser-for-users) #'stx matchf 'accum 'sym))) + ((@ (system base compile) compile) + ((@ (ice-9 peg cache) cg-cached-parser) + syn))))))))) + + + + +(define-peg-pattern base-8-digit body + (range #\0 #\7)) + +(define-peg-pattern base-10-digit body + (range #\0 #\9)) + +(define-peg-pattern base-16-digit body + (or (range #\0 #\9) + (range #\A #\F) + (range #\a #\f))) + +;; https://en.cppreference.com/w/cpp/language/integer_literal +(define-peg-pattern base-10 all (+ base-10-digit)) +(define-peg-pattern base-8 all (and (ignore "0") (+ base-8-digit))) +(define-peg-pattern base-16 all (and (ignore (and "0" (or "x" "X"))) + (+ base-16-digit))) + +;; accept anything now, ensure correctnes later +(define-peg-pattern integer-suffix all + (* (or "u" "U" "l" "L"))) + +(define-peg-pattern integer all + (and (or base-8 base-16 base-10) (? integer-suffix))) + +(define-peg-pattern float-suffix all + (* (or "f" "F" "l" "L"))) + +(define-peg-pattern exponent all + (and (ignore (or "e" "E")) (? (or "+" "-")) base-10)) + +;; Helper patterns for creating named groups in float +(define-peg-pattern float-integer all base-10) +(define-peg-pattern float-decimal all base-10) + +(define-peg-pattern float all + (or (and float-integer exponent (? float-suffix)) + (and (? float-integer) (ignore ".") float-decimal (? exponent) (? float-suffix)) + (and float-integer (ignore ".") (? exponent) (? float-suffix)))) + +(define-peg-pattern number body + (or float integer)) + +(define-peg-pattern group all + (and (ignore "(") expr (ignore ")"))) + +(define-peg-pattern base-8-char all + (and base-8-digit + (? base-8-digit) + (? base-8-digit))) + +(define-peg-pattern base-16-char all + (and (ignore "x") base-16-digit (? base-16-digit))) + +(define-peg-pattern escaped-char all + (and (ignore "\\") (or base-16-char + base-8-char + peg-any))) + +(define-peg-pattern char all + (and (ignore "'") (or escaped-char peg-any) (ignore "'"))) + +(define-peg-pattern quot none "\"") + +(define-peg-pattern string all + (and quot (* (or escaped-char (and (not-followed-by "\"") peg-any))) quot)) + +(define-peg-pattern* operator all + `(or ,@(map symbol->string symbol-binary-operators) + ,@(map (lambda (op) `(and ,(symbol->string op) ws)) + wordy-binary-operators) + "?" ":")) + +;; whitespace +(define-peg-pattern ws none + (or " " " " "\n")) + +;; space (for when whitespace is optional) +(define-peg-pattern sp none (* ws)) + +(define-peg-pattern safe-letter body + (or "_" + (range #\A #\Z) + (range #\a #\z))) + +(define-peg-pattern variable all + (and safe-letter + (* (or safe-letter + base-10-digit)))) + +(define-peg-pattern prefix-operator all + ;; It's important that ++ and -- are BEFORE + and - + ;; otherwise the first + is found, leaving the second +, which fails + ;; to lex since it's an invalid token + ;; TODO sizeof can be written as a prefix operator + ;; (without parenthesis) if the operand is an expression. + (or "*" "&" "++" "--" + "!" "~" "+" "-")) + + +;;; Note that stacked pre or postfix operators without parenthesis +;;; dosen't work. So `*&C' is invalid, while `*(&C)' is valid. + +(define-peg-pattern prefix all + (and prefix-operator sp (or variable group funcall literal))) + +(define-peg-pattern postfix-operator all + (or "++" "--" "*")) + +(define-peg-pattern postfix all + ;; literals can't be in-place incremented and decremented + ;; Make sure we don't match postfix-operator here, since + ;; that also gives us an infinite loop. + (and (or prefix funcall group variable) sp postfix-operator)) + +(define-peg-pattern infix all + ;; first case is "same" as expr, but in different order to prevent + ;; infinite self reference. Pre and postfix not here, solved by having + ;; them before infix in expr + (and (or funcall postfix prefix group literal variable) + sp operator sp expr)) + +(define-peg-pattern funcall all + (and variable sp group)) + +(define-peg-pattern literal body + (or char string number)) + +;;; main parser +(define-peg-pattern expr body + (+ (and sp (or + ;; float must be BEFORE infix, otherwise 3.2 is parsed as (infix 3 (operator ".") 2) + ;; that however breaks the infix logic, meaning that floating point numbers can't be + ;; used in basic arithmetic. + ;; TODO remove all implicit order of operations handling in the lexer, and move it to + ;; the parser. This should also fix the case of typecasts being applied incorrectly. + float + infix postfix prefix funcall group literal variable) + sp))) + + +(define (lex string) + (peg:tree (match-pattern expr string))) |