diff options
Diffstat (limited to 'module/c/lex2.scm')
-rw-r--r-- | module/c/lex2.scm | 323 |
1 files changed, 323 insertions, 0 deletions
diff --git a/module/c/lex2.scm b/module/c/lex2.scm new file mode 100644 index 00000000..23fa9da4 --- /dev/null +++ b/module/c/lex2.scm @@ -0,0 +1,323 @@ +(define-module (c lex2) + :use-module (ice-9 peg) + :export (lex)) + +;;; A.1 Lexical grammar +;;; A.1.1 Lexical elements + +;; (6.4) +(define-peg-pattern token all + (or keyword + identifier + constant + string-literal + punctuator + )) + +;; (6.4) +(define-peg-pattern preprocessing-token all + ;; string literal moved before header-name since string literals + ;; otherwise became q-strings + (or string-literal + header-name + identifier + pp-number + character-constant + punctuator + ;; Each non-white-space character that cannot be one of the above + )) + +;;; A.1.2 Keywords + +;; (6.4.1) +(define-peg-pattern keyword all + (or "auto" "break" "case" "char" "const" "continue" "default" + "do" "double" "else" "enum" "extern" "float" "for" "goto" + "if" "inline" "int" "long" "register" "restrict" "return" + "short" "signed" "sizeof" "static" "struct" "switch" + "typedef" "union" "unsigned" "void" "volatile" "while" + "_Alignas" "_Alignof" "_Atomic" "_Bool" "_Complex" + "_Generic" "_Imaginary" "_Noreturn" "_Static_assert" + "_Thread_local")) + +;;; A.1.3 Identifiers + +;; (6.4.2.1) +(define-peg-pattern identifier all + (and identifier-nondigit (* (or identifier-nondigit digit)))) + +;; (6.4.2.1) +(define-peg-pattern identifier-nondigit body + (or nondigit + universal-character-name + ;; TODO other implementation-defined characters + )) + +;; (6.4.2.1) +(define-peg-pattern nondigit body + (or "_" + (range #\A #\Z) + (range #\a #\z))) + +;; (6.4.2.1) +(define-peg-pattern digit body + (range #\0 #\9)) + +;;; A.1.4 Universal character names + +;; (6.4.3) +(define-peg-pattern universal-character-name all + (or (and "\\u" hex-quad) + (and "\\U" hex-quad hex-quad))) + +;; (6.4.3) +(define-peg-pattern hex-quad body + (and hexadecimal-digit hexadecimal-digit + hexadecimal-digit hexadecimal-digit)) + +;;; A.1.5 Constants + +;; (6.4.4) +(define-peg-pattern constant all + ;; Int and float swapped from standard since we need to try parsing + ;; the floats beforehand + (or floating-constant + integer-constant + enumeration-constant + character-constant)) + +;; (6.4.4.1) +(define-peg-pattern integer-constant all + (and (or decimal-constant + octal-constant + hexadecimal-constant) + integer-suffix)) + +;; (6.4.4.1) +(define-peg-pattern decimal-constant all + (and nonzero-digit + (+ digit))) + +;; (6.4.4.1) +(define-peg-pattern octal-constant all + (+ octal-digit)) + +;; (6.4.4.1) +(define-peg-pattern hexadecimal-constant all + (and hexadecimal-prefix (+ hexadecimal-digit))) + +;; (6.4.4.1) +(define-peg-pattern hexadecimal-prefix none + (or "0x" "0X")) + +;; (6.4.4.1) +(define-peg-pattern nonzero-digit body + (range #\1 #\9)) + +;; (6.4.4.1) +(define-peg-pattern octal-digit body + (range #\0 #\7)) + +;; (6.4.4.1) +(define-peg-pattern hexadecimal-digit body + (or (range #\0 #\9) + (range #\a #\f) + (range #\A #\F))) + +;; (6.4.4.1) +(define-peg-pattern integer-suffix all + (or (and unsigned-suffix (? long-suffix)) + (and long-suffix (? unsigned-suffix)))) + +;; (6.4.4.1) +;; This is a merger of long-suffix and long-long-suffix +(define-peg-pattern long-suffix body + (or "l" "L" "ll" "LL")) + +;; (6.4.4.1) +(define-peg-pattern unsigned-suffix body + (or "u" "U")) + +;; (6.4.4.2) +(define-peg-pattern floating-constant all + (or decimal-floating-constant + hexadecimal-floating-constant)) + +;; (6.4.4.2) +(define-peg-pattern floating-constant all + (or (and fractional-constant (? exponent-part) (? floating-suffix)) + (and digit-sequence exponent-part (? floating-suffix)))) + +;; (6.4.4.2) +(define-peg-pattern hexadecimal-floating-constant all + (and hexadecimal-prefix + (or hexadecimal-fractional-constant + hexadecimal-digit-sequence) + binary-exponent-part + (? floating-suffix))) + +;; (6.4.4.2) +(define-peg-pattern fractional-constant all + (or (and (? digit-sequence) "." digit-sequence) + (and digit-sequence "."))) + +;; (6.4.4.2) +(define-peg-pattern exponent-part all + (and (or "e" "E") (? sign) digit-sequence)) + +;; (6.4.4.2) +(define-peg-pattern sign all + (or "+" "-")) + +;; (6.4.4.2) +(define-peg-pattern digit-sequence body + (+ digit)) + +;; (6.4.4.2) +(define-peg-pattern hexadecimal-fractional-constant all + (or (and (? hexadecimal-digit-sequence) "." hexadecimal-digit-sequence) + (and hexadecimal-digit-sequence "."))) + +;; (6.4.4.2) +(define-peg-pattern binary-exponent-part all + (and (or "p" "P") + (? sign) + digit-sequence)) + +;; (6.4.4.2) +(define-peg-pattern hexadecimal-digit-sequence body + (+ hexadecimal-digit)) + +;; (6.4.4.2) +(define-peg-pattern floating-suffix all + (or "f" "l" "F" "L")) + +;; (6.4.4.3) +(define-peg-pattern enumeration-constant all + identifier) + +(define-peg-pattern character-prefix all + (or "L" "u" "U")) + +;; (6.4.4.4) +(define-peg-pattern character-constant all + (and (? character-prefix) + (ignore "'") + (+ c-char) + (ignore "'"))) + +;; (6.4.4.4) +(define-peg-pattern c-char body + (or (and (not-followed-by (or "'" "\\" "\n")) peg-any) + escape-sequence)) + +;; (6.4.4.4) +(define-peg-pattern escape-sequence all + (or simple-escape-sequence + octal-escape-sequence + hexadecimal-escape-sequence + universal-character-name)) + +;; (6.4.4.4) +(define-peg-pattern simple-escape-sequence all + (and (ignore "\\") (or "'" "\"" "?" "\\" + "a" "b" "f" "n" "r" "t" "v"))) + +;; (6.4.4.4) +(define-peg-pattern octal-escape-sequence all + (and (ignore "\\") octal-digit (? octal-digit) (? octal-digit))) + +;; (6.4.4.4) +(define-peg-pattern hexadecimal-escape-sequence all + (and (ignore "\\x") (+ hexadecimal-digit))) + +;; A.1.6 String literals + +;; (6.4.5) +(define-peg-pattern string-literal all + (and (? encoding-prefix) + (ignore "\"") + (* s-char) + (ignore "\""))) + +;; (6.4.5) +(define-peg-pattern encoding-prefix all + (or "u8" "u" "U" "L")) + +;; (6.4.5) +(define-peg-pattern s-char body + (or (and (not-followed-by (or "\"" "\\" "\n")) peg-any) + escape-sequence)) + +;;; A.1.7 + +;; (6.4.6) +(define-peg-pattern punctuator all + (or "[" "]" "(" ")" "{" "}" "." "->" + "++" "--" "&" "*" "+" "-" "~" "!" + "/" "%" "<<" ">>" "<" ">" "<=" ">=" "==" "!=" "^" "|" "&&" "||" + "?" ":" ";" "..." + "=" "*=" "/=" "%=" "+=" "-=" "<<=" ">>=" "&=" "^=" "|=" + "," "#" "##" + "<:" ":>" "<%" "%>" "%:" "%:%:")) + +;;; A.1.8 Header names + +(define-peg-pattern h-string all (+ h-char)) +(define-peg-pattern q-string all (+ q-char)) + +;; (6.4.7) +(define-peg-pattern header-name all + (or (and (ignore "<") h-string (ignore ">")) + (and (ignore "\"") q-string (ignore "\"")))) + +;; (6.4.7) +(define-peg-pattern h-char body + (or (and (not-followed-by (or ">" "\n")) peg-any) + escape-sequence)) + +;; (6.4.7) +(define-peg-pattern q-char body + (or (and (not-followed-by (or "\"" "\n")) peg-any) + escape-sequence)) + +;;; A.1.9 Preprocessing numbers + +;; (6.4.8) +(define-peg-pattern pp-number all + (and (? ".") digit + (* (or digit + identifier-nondigit + (and (or "e" "E" "p" "P") + sign) + ".")))) + + + +(define-peg-pattern whitespace all + (or "\t" "\n" "\v" "\f" " " + ;; "\r" + )) + +(define-peg-pattern block-comment body + (and (ignore "/*") + (* (and (not-followed-by "*/") + peg-any)) + (ignore "*/"))) + +(define-peg-pattern line-comment body + (and (ignore "//") + (* (and (not-followed-by "\n") + peg-any)))) + +(define-peg-pattern comment all + (or line-comment block-comment)) + +(define-peg-pattern preprocessing-tokens body + (* (or whitespace + comment + preprocessing-token))) + + +(define (lex string) + (peg:tree (match-pattern preprocessing-tokens string))) |