aboutsummaryrefslogtreecommitdiff
path: root/module/c/lex2.scm
diff options
context:
space:
mode:
Diffstat (limited to 'module/c/lex2.scm')
-rw-r--r--module/c/lex2.scm323
1 files changed, 323 insertions, 0 deletions
diff --git a/module/c/lex2.scm b/module/c/lex2.scm
new file mode 100644
index 00000000..23fa9da4
--- /dev/null
+++ b/module/c/lex2.scm
@@ -0,0 +1,323 @@
+(define-module (c lex2)
+ :use-module (ice-9 peg)
+ :export (lex))
+
+;;; A.1 Lexical grammar
+;;; A.1.1 Lexical elements
+
+;; (6.4)
+(define-peg-pattern token all
+ (or keyword
+ identifier
+ constant
+ string-literal
+ punctuator
+ ))
+
+;; (6.4)
+(define-peg-pattern preprocessing-token all
+ ;; string literal moved before header-name since string literals
+ ;; otherwise became q-strings
+ (or string-literal
+ header-name
+ identifier
+ pp-number
+ character-constant
+ punctuator
+ ;; Each non-white-space character that cannot be one of the above
+ ))
+
+;;; A.1.2 Keywords
+
+;; (6.4.1)
+(define-peg-pattern keyword all
+ (or "auto" "break" "case" "char" "const" "continue" "default"
+ "do" "double" "else" "enum" "extern" "float" "for" "goto"
+ "if" "inline" "int" "long" "register" "restrict" "return"
+ "short" "signed" "sizeof" "static" "struct" "switch"
+ "typedef" "union" "unsigned" "void" "volatile" "while"
+ "_Alignas" "_Alignof" "_Atomic" "_Bool" "_Complex"
+ "_Generic" "_Imaginary" "_Noreturn" "_Static_assert"
+ "_Thread_local"))
+
+;;; A.1.3 Identifiers
+
+;; (6.4.2.1)
+(define-peg-pattern identifier all
+ (and identifier-nondigit (* (or identifier-nondigit digit))))
+
+;; (6.4.2.1)
+(define-peg-pattern identifier-nondigit body
+ (or nondigit
+ universal-character-name
+ ;; TODO other implementation-defined characters
+ ))
+
+;; (6.4.2.1)
+(define-peg-pattern nondigit body
+ (or "_"
+ (range #\A #\Z)
+ (range #\a #\z)))
+
+;; (6.4.2.1)
+(define-peg-pattern digit body
+ (range #\0 #\9))
+
+;;; A.1.4 Universal character names
+
+;; (6.4.3)
+(define-peg-pattern universal-character-name all
+ (or (and "\\u" hex-quad)
+ (and "\\U" hex-quad hex-quad)))
+
+;; (6.4.3)
+(define-peg-pattern hex-quad body
+ (and hexadecimal-digit hexadecimal-digit
+ hexadecimal-digit hexadecimal-digit))
+
+;;; A.1.5 Constants
+
+;; (6.4.4)
+(define-peg-pattern constant all
+ ;; Int and float swapped from standard since we need to try parsing
+ ;; the floats beforehand
+ (or floating-constant
+ integer-constant
+ enumeration-constant
+ character-constant))
+
+;; (6.4.4.1)
+(define-peg-pattern integer-constant all
+ (and (or decimal-constant
+ octal-constant
+ hexadecimal-constant)
+ integer-suffix))
+
+;; (6.4.4.1)
+(define-peg-pattern decimal-constant all
+ (and nonzero-digit
+ (+ digit)))
+
+;; (6.4.4.1)
+(define-peg-pattern octal-constant all
+ (+ octal-digit))
+
+;; (6.4.4.1)
+(define-peg-pattern hexadecimal-constant all
+ (and hexadecimal-prefix (+ hexadecimal-digit)))
+
+;; (6.4.4.1)
+(define-peg-pattern hexadecimal-prefix none
+ (or "0x" "0X"))
+
+;; (6.4.4.1)
+(define-peg-pattern nonzero-digit body
+ (range #\1 #\9))
+
+;; (6.4.4.1)
+(define-peg-pattern octal-digit body
+ (range #\0 #\7))
+
+;; (6.4.4.1)
+(define-peg-pattern hexadecimal-digit body
+ (or (range #\0 #\9)
+ (range #\a #\f)
+ (range #\A #\F)))
+
+;; (6.4.4.1)
+(define-peg-pattern integer-suffix all
+ (or (and unsigned-suffix (? long-suffix))
+ (and long-suffix (? unsigned-suffix))))
+
+;; (6.4.4.1)
+;; This is a merger of long-suffix and long-long-suffix
+(define-peg-pattern long-suffix body
+ (or "l" "L" "ll" "LL"))
+
+;; (6.4.4.1)
+(define-peg-pattern unsigned-suffix body
+ (or "u" "U"))
+
+;; (6.4.4.2)
+(define-peg-pattern floating-constant all
+ (or decimal-floating-constant
+ hexadecimal-floating-constant))
+
+;; (6.4.4.2)
+(define-peg-pattern floating-constant all
+ (or (and fractional-constant (? exponent-part) (? floating-suffix))
+ (and digit-sequence exponent-part (? floating-suffix))))
+
+;; (6.4.4.2)
+(define-peg-pattern hexadecimal-floating-constant all
+ (and hexadecimal-prefix
+ (or hexadecimal-fractional-constant
+ hexadecimal-digit-sequence)
+ binary-exponent-part
+ (? floating-suffix)))
+
+;; (6.4.4.2)
+(define-peg-pattern fractional-constant all
+ (or (and (? digit-sequence) "." digit-sequence)
+ (and digit-sequence ".")))
+
+;; (6.4.4.2)
+(define-peg-pattern exponent-part all
+ (and (or "e" "E") (? sign) digit-sequence))
+
+;; (6.4.4.2)
+(define-peg-pattern sign all
+ (or "+" "-"))
+
+;; (6.4.4.2)
+(define-peg-pattern digit-sequence body
+ (+ digit))
+
+;; (6.4.4.2)
+(define-peg-pattern hexadecimal-fractional-constant all
+ (or (and (? hexadecimal-digit-sequence) "." hexadecimal-digit-sequence)
+ (and hexadecimal-digit-sequence ".")))
+
+;; (6.4.4.2)
+(define-peg-pattern binary-exponent-part all
+ (and (or "p" "P")
+ (? sign)
+ digit-sequence))
+
+;; (6.4.4.2)
+(define-peg-pattern hexadecimal-digit-sequence body
+ (+ hexadecimal-digit))
+
+;; (6.4.4.2)
+(define-peg-pattern floating-suffix all
+ (or "f" "l" "F" "L"))
+
+;; (6.4.4.3)
+(define-peg-pattern enumeration-constant all
+ identifier)
+
+(define-peg-pattern character-prefix all
+ (or "L" "u" "U"))
+
+;; (6.4.4.4)
+(define-peg-pattern character-constant all
+ (and (? character-prefix)
+ (ignore "'")
+ (+ c-char)
+ (ignore "'")))
+
+;; (6.4.4.4)
+(define-peg-pattern c-char body
+ (or (and (not-followed-by (or "'" "\\" "\n")) peg-any)
+ escape-sequence))
+
+;; (6.4.4.4)
+(define-peg-pattern escape-sequence all
+ (or simple-escape-sequence
+ octal-escape-sequence
+ hexadecimal-escape-sequence
+ universal-character-name))
+
+;; (6.4.4.4)
+(define-peg-pattern simple-escape-sequence all
+ (and (ignore "\\") (or "'" "\"" "?" "\\"
+ "a" "b" "f" "n" "r" "t" "v")))
+
+;; (6.4.4.4)
+(define-peg-pattern octal-escape-sequence all
+ (and (ignore "\\") octal-digit (? octal-digit) (? octal-digit)))
+
+;; (6.4.4.4)
+(define-peg-pattern hexadecimal-escape-sequence all
+ (and (ignore "\\x") (+ hexadecimal-digit)))
+
+;; A.1.6 String literals
+
+;; (6.4.5)
+(define-peg-pattern string-literal all
+ (and (? encoding-prefix)
+ (ignore "\"")
+ (* s-char)
+ (ignore "\"")))
+
+;; (6.4.5)
+(define-peg-pattern encoding-prefix all
+ (or "u8" "u" "U" "L"))
+
+;; (6.4.5)
+(define-peg-pattern s-char body
+ (or (and (not-followed-by (or "\"" "\\" "\n")) peg-any)
+ escape-sequence))
+
+;;; A.1.7
+
+;; (6.4.6)
+(define-peg-pattern punctuator all
+ (or "[" "]" "(" ")" "{" "}" "." "->"
+ "++" "--" "&" "*" "+" "-" "~" "!"
+ "/" "%" "<<" ">>" "<" ">" "<=" ">=" "==" "!=" "^" "|" "&&" "||"
+ "?" ":" ";" "..."
+ "=" "*=" "/=" "%=" "+=" "-=" "<<=" ">>=" "&=" "^=" "|="
+ "," "#" "##"
+ "<:" ":>" "<%" "%>" "%:" "%:%:"))
+
+;;; A.1.8 Header names
+
+(define-peg-pattern h-string all (+ h-char))
+(define-peg-pattern q-string all (+ q-char))
+
+;; (6.4.7)
+(define-peg-pattern header-name all
+ (or (and (ignore "<") h-string (ignore ">"))
+ (and (ignore "\"") q-string (ignore "\""))))
+
+;; (6.4.7)
+(define-peg-pattern h-char body
+ (or (and (not-followed-by (or ">" "\n")) peg-any)
+ escape-sequence))
+
+;; (6.4.7)
+(define-peg-pattern q-char body
+ (or (and (not-followed-by (or "\"" "\n")) peg-any)
+ escape-sequence))
+
+;;; A.1.9 Preprocessing numbers
+
+;; (6.4.8)
+(define-peg-pattern pp-number all
+ (and (? ".") digit
+ (* (or digit
+ identifier-nondigit
+ (and (or "e" "E" "p" "P")
+ sign)
+ "."))))
+
+
+
+(define-peg-pattern whitespace all
+ (or "\t" "\n" "\v" "\f" " "
+ ;; "\r"
+ ))
+
+(define-peg-pattern block-comment body
+ (and (ignore "/*")
+ (* (and (not-followed-by "*/")
+ peg-any))
+ (ignore "*/")))
+
+(define-peg-pattern line-comment body
+ (and (ignore "//")
+ (* (and (not-followed-by "\n")
+ peg-any))))
+
+(define-peg-pattern comment all
+ (or line-comment block-comment))
+
+(define-peg-pattern preprocessing-tokens body
+ (* (or whitespace
+ comment
+ preprocessing-token)))
+
+
+(define (lex string)
+ (peg:tree (match-pattern preprocessing-tokens string)))