blob: d36fcc05bf98b47e1452b9ebdfa69afa8e36722d (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
|
(define-module (c lex)
:use-module (ice-9 peg)
:use-module (c operators)
:export (lex))
;; Like the regular define-peg-pattern. But evaluates the
;; pattern before treating it as a peg rule.
(define-macro (define-define-peg-pattern name capture expr)
`(define-peg-pattern ,name ,capture
;; NOTE how does this work if we are in a different module?
;; It currently however isn't a problem since we don't export
;; this macro.
,(eval expr (current-module))))
(define-peg-pattern base-8-digit body
(range #\0 #\7))
(define-peg-pattern base-10-digit body
(range #\0 #\9))
(define-peg-pattern base-16-digit body
(or (range #\0 #\9)
(range #\A #\F)
(range #\a #\f)))
;; https://en.cppreference.com/w/cpp/language/integer_literal
(define-peg-pattern base-10 all (+ base-10-digit))
(define-peg-pattern base-8 all (and (ignore "0") (+ base-8-digit)))
(define-peg-pattern base-16 all (and (ignore (and "0" (or "x" "X")))
(+ base-16-digit)))
;; accept anything now, ensure correctnes later
(define-peg-pattern integer-suffix all
(* (or "u" "U" "l" "L")))
(define-peg-pattern integer all
(and (or base-8 base-16 base-10) (? integer-suffix)))
(define-peg-pattern number body
(or integer))
(define-peg-pattern group all
(and (ignore "(") expr (ignore ")")))
(define-peg-pattern base-8-char all
(and base-8-digit
(? base-8-digit)
(? base-8-digit)))
(define-peg-pattern base-16-char all
(and (ignore "x") base-16-digit (? base-16-digit)))
(define-peg-pattern escaped-char all
(and (ignore "\\") (or base-16-char
base-8-char
peg-any)))
(define-peg-pattern char all
(and (ignore "'") (or escaped-char peg-any) (ignore "'")))
(define-define-peg-pattern operator all
`(or ,@(map symbol->string symbol-binary-operators)
,@(map (lambda (op) `(and ,(symbol->string op) ws))
wordy-binary-operators)))
;; whitespace
(define-peg-pattern ws none
(or " " " " "\n"))
;; space (for when whitespace is optional)
(define-peg-pattern sp none (* ws))
(define-peg-pattern safe-letter body
(or "_"
(range #\A #\Z)
(range #\a #\z)))
(define-peg-pattern variable all
(and safe-letter
(* (or safe-letter
base-10-digit))))
(define-peg-pattern prefix-operator all
(or "!" "~" "*" "&" "++" "--" "+" "-"))
;;; Note that stacked pre or postfix operators without parenthesis
;;; dosen't work. So `*&C' is invalid, while `*(&C)' is valid.
(define-peg-pattern prefix all
(and prefix-operator sp (or variable group funcall #; postfix
)))
(define-peg-pattern postfix-operator all
(or "++" "--"))
(define-peg-pattern postfix all
;; literals can't be in-place incremented and decremented
;; Make sure we don't match postfix-operator here, since
;; that also gives us an infinite loop.
(and (or prefix funcall group variable) sp postfix-operator))
(define-peg-pattern infix all
;; first case is "same" as expr, but in different order to prevent
;; infinite self reference. Pre and postfix not here, solved by having
;; them before infix in expr
(and (or funcall postfix prefix group char number variable)
sp operator sp expr))
(define-peg-pattern funcall all
(and variable sp group))
;;; main parser
(define-peg-pattern expr body
(+ (and sp (or infix postfix prefix funcall group char number variable)
sp)))
(define (lex string)
(peg:tree (match-pattern expr string)))
|