aboutsummaryrefslogtreecommitdiff
path: root/module/c/lex.scm
blob: 34e52d88f10402a8c229dbcd2d108ef524726b6d (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
(define-module (c lex)
  :use-module (ice-9 peg)
  :use-module (c operators)
  :export (lex))


;; Like define-peg-pattern, but body is evaluated
(define-syntax define-peg-pattern*
  (lambda (stx)
    (syntax-case stx ()
      ((_ sym accum pat)
       #`(define sym
           (let ((matchf (compile-peg-pattern (datum->syntax #'stx pat) 'accum)))
             (let ((syn ((@ (ice-9 peg codegen) wrap-parser-for-users) #'stx matchf 'accum 'sym)))
               ((@ (system base compile) compile)
                ((@ (ice-9 peg cache) cg-cached-parser)
                 syn)))))))))




(define-peg-pattern base-8-digit body
  (range #\0 #\7))

(define-peg-pattern base-10-digit body
  (range #\0 #\9))

(define-peg-pattern base-16-digit body
  (or (range #\0 #\9)
      (range #\A #\F)
      (range #\a #\f)))

;; https://en.cppreference.com/w/cpp/language/integer_literal
(define-peg-pattern base-10 all (+ base-10-digit))
(define-peg-pattern base-8 all (and (ignore "0") (+ base-8-digit)))
(define-peg-pattern base-16 all (and (ignore (and "0" (or "x" "X")))
                                     (+ base-16-digit)))

;; accept anything now, ensure correctnes later
(define-peg-pattern integer-suffix all
  (* (or "u" "U" "l" "L")))

(define-peg-pattern integer all
  (and (or base-8 base-16 base-10) (? integer-suffix)))

(define-peg-pattern number body
  (or integer))

(define-peg-pattern group all
  (and (ignore "(") expr (ignore ")")))

(define-peg-pattern base-8-char all
  (and base-8-digit
       (? base-8-digit)
       (? base-8-digit)))

(define-peg-pattern base-16-char all
  (and (ignore "x") base-16-digit (? base-16-digit)))

(define-peg-pattern escaped-char all
  (and (ignore "\\") (or base-16-char
                         base-8-char
                         peg-any)))

(define-peg-pattern char all
  (and (ignore "'") (or escaped-char peg-any) (ignore "'")))


(define-peg-pattern* operator all
  `(or ,@(map symbol->string symbol-binary-operators)
       ,@(map (lambda (op) `(and ,(symbol->string op) ws))
              wordy-binary-operators)))

;; whitespace
(define-peg-pattern ws none
  (or " " "	" "\n"))

;; space (for when whitespace is optional)
(define-peg-pattern sp none (* ws))

(define-peg-pattern safe-letter body
  (or "_"
      (range #\A #\Z)
      (range #\a #\z)))

(define-peg-pattern variable all
  (and safe-letter
       (* (or safe-letter
              base-10-digit))))

(define-peg-pattern prefix-operator all
  (or "!" "~" "*" "&" "++" "--" "+" "-"))

;;; Note that stacked pre or postfix operators without parenthesis
;;; dosen't work. So `*&C' is invalid, while `*(&C)' is valid.

(define-peg-pattern prefix all
  (and prefix-operator sp (or variable group funcall #; postfix
                              )))

(define-peg-pattern postfix-operator all
  (or "++" "--"))

(define-peg-pattern postfix all
  ;; literals can't be in-place incremented and decremented
  ;; Make sure we don't match postfix-operator here, since
  ;; that also gives us an infinite loop.
  (and (or prefix funcall group variable) sp postfix-operator))

(define-peg-pattern infix all
  ;; first case is "same" as expr, but in different order to prevent
  ;; infinite self reference. Pre and postfix not here, solved by having
  ;; them before infix in expr
  (and (or funcall postfix prefix group char number variable)
       sp operator sp expr))

(define-peg-pattern funcall all
  (and variable sp group))

;;; main parser
(define-peg-pattern expr body
  (+ (and sp (or infix postfix prefix funcall group char number variable)
          sp)))


(define (lex string)
  (peg:tree (match-pattern expr string)))