From db51e062af2984573535ba9681863787eab2feef Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Hugo=20H=C3=B6rnquist?= Date: Wed, 13 Jul 2022 17:15:46 +0200 Subject: Add C LALR parser. --- module/c/parse2.scm | 554 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 554 insertions(+) create mode 100644 module/c/parse2.scm diff --git a/module/c/parse2.scm b/module/c/parse2.scm new file mode 100644 index 00000000..fad2ffd8 --- /dev/null +++ b/module/c/parse2.scm @@ -0,0 +1,554 @@ +(define-module (c parse2) + :use-module (hnh util) + :use-module (system base lalr)) + +(define (make-parser) + (lalr-parser + (#{out-table:}# "/tmp/c-parser.txt") + (#{output:}# c-parser "/tmp/c-parser.scm") + ;; (#{driver:}# glr) + + + ( + ;; keywords + auto break case char const continue default do double else enum + extern float for goto if inline int long register restrict + return short signed sizeof static struct switch typedef union + unsigned void volatile while _Alignas _Alignof _Atomic _Bool + _Complex _Generic _Imaginary _Noreturn _Static_assert + _Thread_local + + ;; punctuators + ;; 6.4.6 + lbrack rbrack ; [] + lparen rparen ; () + lbrace rbrace ; {} + dot ; . + -> + ++ -- & * + - ~ ! + / % << >> < > <= >= == != ^ + pipe pipe2 ; | and || scheme handles these fine, but not emacs + && + ? : + semicolon + ... + = *= /= %= += -= <<= >>= &= ^= + pipe= ; |= + comma ; , + hash ; # + hash2 ; ## + ;; TODO digraphs + + ;; 6.4 + ;; keyword - already translated + identifier + constant + string-literal + ;; punctuator - already translated + ) + + ;; Primitives + + ;; (identifier) : $1 + ;; (constant) : $1 + ;; (string-literal) : $1 + + + ;; compounds + + (primary-expression + + ;; 6.5.1 + (identifier) + (constant) + (string-literal) + (lparen expression rparen) + (generic-selection)) + + (enumeration-constant + (identifier)) + + + ;; 6.5.1.1 + (generic-selection + (_Generic lparen assignment-expression comma generic-assoc-list)) + + (generic-assoc-list + (generic-association) + (generic-assoc-list comma generic-association)) + + (generic-association + (type-name : assignment-expression) + (default : assignment-expression)) + + ;; 6.5.2 + (postfix-expression + (primary-expression) + (postfix-expression lbrack expression rbrack) + (postfix-expression lparen rparen) + (postfix-expression lparen argument-expression-list rparen) + (postfix-expression dot identifier) + (postfix-expression -> identifier) + (postfix-expression ++) + (postfix-expression --) + (lparen type-name rparen lbrace initializer-list rbrace) + (lparen type-name rparen lbrace initializer-list comma rbrace)) + + (argument-expression-list + (assignment-expression) + (argument-expression-list comma assignment-expression)) + + ;; 6.5.3 + (unary-expression + (postfix-expression) + (++ unary-expression) + (-- unary-expression) + (unary-operator cast-expression) + (sizeof unary-expression) + (sizeof lparen type-name rparen) + (_Alignof lparen type-name rparen)) + + (unary-operator + (&) + (*) + (+) + (-) + (~) + (!)) + + ;; 6.5.4 + (cast-expression + (unary-expression) + (lparen type-name rparen cast-expression)) + + ;; 6.5.5 + (multiplicative-expression + (cast-expression) + (multiplicative-expression * cast-expression) + (multiplicative-expression / cast-expression) + (multiplicative-expression % cast-expression)) + + ;; 6.5.6 + (additive-expression + (multiplicative-expression) + (additive-expression + multiplicative-expression) + (additive-expression - multiplicative-expression)) + + + ;; 6.5.7 + (shift-expression + (additive-expression) + (shift-expression << additive-expression) + (shift-expression >> additive-expression)) + + ;; 6.5.8 + (relational-expression + (shift-expression) + (relational-expression < shift-expression) + (relational-expression > shift-expression) + (relational-expression <= shift-expression) + (relational-expression >= shift-expression)) + + ;; 6.5.9 + (equality-expression + (relational-expression) + (equality-expression == relational-expression) + (equality-expression != relational-expression)) + + + ;; 6.5.10 + (AND-expression + (equality-expression) + (AND-expression & equality-expression)) + + ;; 6.5.11 + (exclusive-OR-expression + (AND-expression) + (exclusive-OR-expression ^ AND-expression)) + + ;; 6.5.12 + (inclusive-OR-expression + (exclusive-OR-expression) + (inclusive-OR-expression pipe exclusive-OR-expression)) + + ;; 6.5.13 + (logical-AND-expression + (inclusive-OR-expression) + (logical-AND-expression && inclusive-OR-expression)) + + ;; 6.5.14 + (logical-OR-expression + (logical-AND-expression) + (logical-OR-expression pipe2 logical-AND-expression)) + + ;; 6.5.15 + (conditional-expression + (logical-OR-expression) + (logical-OR-expression ? expression : conditional-expression)) + + ;; 6.5.16 + (assignment-expression + (conditional-expression) + (unary-expression assignment-operator assignment-expression)) + + (assignment-operator + (=) + (*=) + (/=) + (%=) + (+=) + (-=) + (<<=) + (>>=) + (&=) + (^=) + (pipe=)) + + ;; 6.5.17 + (expression + (assignment-expression) + (expression comma assignment-expression)) + + ;; 6.6 constant expression + (constant-expression + (expression)) + + ;; 6.7 + (declaration + (declaration-specifiers semicolon) + (declaration-specifiers init-declarator-list semicolon) + (static_assert-declaration)) + + (declaration-specifiers + (storage-class-specifier) + (storage-class-specifier declaration-specifiers) + + (type-specifier) + (type-specifier declaration-specifiers) + + (type-qualifier) + (type-qualifier declaration-specifiers) + + (function-specifier) + (function-specifier declaration-specifiers) + + (alignment-specifier) + (alignment-specifier declaration-specifiers)) + + (init-declarator-list + (init-declarator) + (init-declarator-list comma init-declarator)) + + (init-declarator + (declarator) + (declarator = initializer)) + + + ;; 6.7.1 + (storage-class-specifier + (typedef) + (extern) + (static) + (_Thread_local) + (auto) + (register)) + + + ;; 6.7.2 + (type-specifier + (void) + (char) + (short) + (int) + (long) + (float) + (double) + (signed) + (unsigned) + (_Bool) + (_Complex) + (atomic-type-specifier) + (struct-or-union-specifier) + (enum-specifier) + (typedef-name)) + + ;; 6.7.2.1 + (struct-or-union-specifier + (struct-or-union lbrace struct-declaration-list rbrace) + (struct-or-union identifier lbrace struct-declaration-list rbrace) + (struct-or-union identifier)) + + (struct-or-union + (struct) + (union)) + + (struct-declaration-list + (struct-declaration) + (struct-declaration-list struct-declaration)) + + (struct-declaration + (specifier-qualifier-list semicolon) + (specifier-qualifier-list struct-declarator-list semicolon) + (static_assert-declaration)) + + (specifier-qualifier-list + (type-specifier) + (type-specifier specifier-qualifier-list) + + (type-qualifier) + (type-qualifier specifier-qualifier-list)) + + (struct-declarator-list + (struct-declarator) + (struct-declarator-list comma struct-declarator)) + + (struct-declarator + (declarator) + (: constant-expression) + (declarator : constant-expression)) + + ;; 6.7.2.2 + (enum-specifier + (enum identifier lbrace enumerator-list rbrace) + (enum lbrace enumerator-list rbrace) + + (enum identifier lbrace enumerator-list comma rbrace) + (enum lbrace enumerator-list comma rbrace) + + (enum identifier)) + + (enumerator-list + (enumerator) + (enumerator-list comma enumerator)) + + (enumerator + (enumeration-constant) + (enumeration-constant = constant-expression)) + + ;; 6.7.2.4 + (atomic-type-specifier + (_Atomic lparen type-name rparen)) + + ;; 6.7.3 + (type-qualifier + (const) + (restrict) + (volatile) + (_Atomic)) + + ;; 6.7.4 + (function-specifier + (inline) + (_Noreturn)) + + ;; 6.7.5 + (alignment-specifier + (_Alignas lparen type-name rparen) + (_Alignas lparen constant-expression rparen)) + + ;; 6.7.6 + + (declarator + (pointer direct-declarator) + (direct-declarator)) + + (direct-declarator + (identifier) + ( lparen declarator rparen ) + + (direct-declarator lbrack type-qualifier-list assignment-expression rbrack ) + (direct-declarator lbrack assignment-expression rbrack ) + (direct-declarator lbrack type-qualifier-list rbrack ) + (direct-declarator lbrack rbrack ) + + (direct-declarator lbrack static type-qualifier-list assignment-expression rbrack) + (direct-declarator lbrack static assignment-expression rbrack) + + (direct-declarator lbrack type-qualifier-list static assignment-expression rbrack) + + (direct-declarator lbrack type-qualifier-list * rbrack) + (direct-declarator lbrack * rbrack) + + (direct-declarator lparen parameter-type-list rparen ) + (direct-declarator lparen identifier-list rparen ) + (direct-declarator lparen rparen )) + + (pointer + (* type-qualifier-list) + (*) + (* type-qualifier-list pointer) + (* pointer)) + + (type-qualifier-list + (type-qualifier) + (type-qualifier-list type-qualifier)) + + (parameter-type-list + (parameter-list) + (parameter-list comma ...)) + + (parameter-list + (parameter-declaration) + (parameter-list comma parameter-declaration)) + + (parameter-declaration + (declaration-specifiers declarator) + (declaration-specifiers abstract-declarator) + (declaration-specifiers)) + + (identifier-list + (identifier) + (identifier-list comma identifier)) + + ;; 6.7.7 + (type-name + (specifier-qualifier-list) + (specifier-qualifier-list abstract-declarator)) + + (abstract-declarator + (pointer) + (pointer direct-abstract-declarator) + ( direct-abstract-declarator)) + + (direct-abstract-declarator + ( lparen abstract-declarator rparen ) + (direct-abstract-declarator lbrack type-qualifier-list assignment-expression rbrack ) + (direct-abstract-declarator lbrack type-qualifier-list rbrack ) + (direct-abstract-declarator lbrack assignment-expression rbrack ) + ( lbrack rbrack ) + ( lbrack type-qualifier-list assignment-expression rbrack ) + ( lbrack type-qualifier-list rbrack ) + (direct-abstract-declarator lbrack * rbrack) + ( lbrack * rbrack) + (direct-abstract-declarator lparen parameter-type-list rparen ) + (direct-abstract-declarator lparen rparen ) + ( lparen parameter-type-list rparen ) + ( lparen rparen )) + + ;; 6.7.8 + (typedef-name + (identifier)) + + ;; 6.7.9 + (initializer + (assignment-expression) + (lbrace initializer-list rbrace) + (lbrace initializer-list comma rbrace)) + + (initializer-list + (designation initializer) + (initializer) + (initializer-list comma designation initializer) + (initializer-list comma initializer)) + + (designation + (designator-list =)) + + (designator-list + (designator) + (designator-list designator)) + + (designator + (lbrack constant-expression rbrack) + (dot identifier)) + + ;; 6.7.10 + (static_assert-declaration + (_Static_assert lparen constant-expression comma string-literal rparen semicolon)) + + ;; 6.8 + (statement + (labeled-statement) + (compound-statement) + (expression-statement) + (selection-statement) + (iteration-statement) + (jump-statement)) + + ;; 6.8.1 + (labeled-statement + (identifier : statement) + (case constant-expression : statement) + (default : statement)) + + ;; 6.8.2 + (compound-statement + (lbrace block-item-list rbrace) + (lbrace rbrace)) + + (block-item-list + (block-item) + (block-item-list block-item)) + + (block-item + (declaration) + (statement)) + + ;; 6.8.3 + (expression-statement + (expression semicolon) + (semicolon)) + + (selection-statement + (if lparen expression rparen statement) + (if lparen expression rparen statement else statement) + (switch lparen expression rparen statement)) + + ;; 6.8.5 + (iteration-statement + (while lparen expression rparen statement) + (do statement while lparen expression rparen semicolon) + (for lparen expression semicolon expression semicolon expression rparen statement) + (for lparen expression semicolon expression semicolon rparen statement) + (for lparen expression semicolon semicolon expression rparen statement) + (for lparen semicolon semicolon rparen statement) + (for lparen semicolon expression semicolon expression rparen statement) + (for lparen semicolon expression semicolon rparen statement) + (for lparen declaration expression semicolon expression rparen statement) + (for lparen declaration expression semicolon rparen statement) + (for lparen declaration semicolon expression rparen statement) + (for lparen declaration semicolon rparen statement)) + + ;; 6.8.6 + (jump-statement + (goto identifier semicolon) + (continue semicolon) + (break semicolon) + (return expression semicolon) + (return semicolon)) + + + ;; 6.9 + (translation-unit + (external-declaration) + (translation-unit external-declaration)) + + (external-declaration + (function-definition) + (declaration)) + + ;; 6.9.1 + (function-definition + (declaration-specifiers declarator declaration-list compound-statement) + (declaration-specifiers declarator compound-statement)) + + (declaration-list + (declaration) + (declaration-list declaration)))) + + +(define (build-lexical-analyzer tokens) + (let ((tokens tokens)) + (lambda () + (if (null? tokens) + '*eoi* + (begin1 (car tokens) + (set! tokens (cdr tokens))))))) + + +;; (build-lexical-analyzer (list (cons 'string "hello"))) + +(define (error-procedure a b) + (throw 'parse-error a b)) + +;; (parser lexical-analyzer error-procedure) -- cgit v1.2.3