From cbddc0ec9431b759567fa631dd0c19526d0ff775 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Hugo=20H=C3=B6rnquist?= Date: Wed, 13 Jul 2022 17:15:27 +0200 Subject: Basis of token convertion. --- module/c/cpp-types.scm | 12 +++++++-- module/c/lex2.scm | 12 ++++++++- module/c/preprocessor2.scm | 2 +- module/c/to-token.scm | 62 ++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 84 insertions(+), 4 deletions(-) create mode 100644 module/c/to-token.scm (limited to 'module') diff --git a/module/c/cpp-types.scm b/module/c/cpp-types.scm index 82ebb922..b08e9810 100644 --- a/module/c/cpp-types.scm +++ b/module/c/cpp-types.scm @@ -10,10 +10,11 @@ newline-token? identifier-token? punctuator-token? - number-token? + pp-number? string-token? h-string-token? q-string-token? + character-constant? )) (define (whitespace-token? x) @@ -52,19 +53,26 @@ (`(punctuator ,x) x) (_ #f)))) -(define (number-token? token) +(define (pp-number? token) (and (preprocessing-token? token) (match (lexeme-body token) (`(pp-number ,x) x) (_ #f)))) ;; TODO this fails if there are multiple components in the string token +;; TODO rename to string-literal-token? (define (string-token? token) (and (preprocessing-token? token) (match (lexeme-body token) (`(string-literal ,x) x) (_ #f)))) +(define (character-constant? token) + (and (preprocessing-token? token) + (match (lexeme-body token) + (`(character-constant ,x) x) + (_ #f)))) + (define (h-string-token? token) (and (preprocessing-token? token) diff --git a/module/c/lex2.scm b/module/c/lex2.scm index 72f79f55..049cc48c 100644 --- a/module/c/lex2.scm +++ b/module/c/lex2.scm @@ -9,7 +9,10 @@ placemaker (type . lexeme-type) (body . lexeme-body) - (noexpand . lexeme-noexpand))) + (noexpand . lexeme-noexpand) + + parse-c-number + )) ;;; A.1 Lexical grammar ;;; A.1.1 Lexical elements @@ -371,6 +374,7 @@ + ;; At a number of places I chose token depending on the order of the rule. The ;; standard however says that the longest possible choice should be used. ;; 6.4 p. 4 @@ -392,3 +396,9 @@ "Parsing just failed. Chars: ~s" (list (peg:string result)) #f)) (cdr (peg:tree result)))))) + + + +(define (parse-c-number string) + (match-pattern constant string)) + diff --git a/module/c/preprocessor2.scm b/module/c/preprocessor2.scm index 755eaa7b..c1db3f08 100644 --- a/module/c/preprocessor2.scm +++ b/module/c/preprocessor2.scm @@ -529,7 +529,7 @@ (list tokens*) #f)))) (let loop ((%first-time #t) (tokens tokens*)) (cond ((null? tokens)) - ((number-token? (car tokens)) + ((pp-number? (car tokens)) => (lambda (line) (let ((line (string->number line)) (remaining (drop-whitespace (cdr tokens)))) diff --git a/module/c/to-token.scm b/module/c/to-token.scm new file mode 100644 index 00000000..c1efcc02 --- /dev/null +++ b/module/c/to-token.scm @@ -0,0 +1,62 @@ +(define-module (c to-token) + :use-module ((system base lalr) + :select (make-lexical-token)) + :use-module (c cpp-types) + :use-module ((c lex2) :select (parse-c-number)) + ;; :use-module (hnh util type) + :export (preprocessing-token->token)) + +(define (pp-number->c-number token) + (parse-c-number (pp-number? token))) + +(define keywords + '("auto" "break" "case" "char" "const" "continue" "default" + "do" "double" "else" "enum" "extern" "float" "for" "goto" + "if" "inline" "int" "long" "register" "restrict" "return" + "short" "signed" "sizeof" "static" "struct" "switch" + "typedef" "union" "unsigned" "void" "volatile" "while" + "_Alignas" "_Alignof" "_Atomic" "_Bool" "_Complex" + "_Generic" "_Imaginary" "_Noreturn" "_Static_assert" + "_Thread_local")) + +(define (preprocessing-token->token cpp-token) + (cond ((string-token? cpp-token) + => (lambda (content) + (make-lexical-token 'string-literal #f content))) + + ((identifier-token? cpp-token) + => (lambda (name) + (if (member name keywords) + (string->symbol name) + (make-lexical-token 'identifier #f name)))) + + ((pp-number? cpp-token) + => (lambda (content) + ;; TOOD should return an integer-constant or a floating-constant + (make-lexical-token 'constant #f content))) + + ((character-constant? cpp-token) + => (lambda (x) (make-lexical-token 'constant #f x))) + + ((punctuator-token? cpp-token) + => (lambda (s) + (cond ((string=? s "{") 'lbrace) + ((string=? s "}") 'rbrace) + ((string=? s "[") 'lbrack) + ((string=? s "]") 'rbrack) + ((string=? s "(") 'lparen) + ((string=? s ")") 'rparen) + ((string=? s ".") 'dot) + ((string=? s "|") 'pipe) + ((string=? s "||") 'pipe2) + ((string=? s ";") 'semicolon) + ((string=? s "|=") 'pipe=) + ((string=? s ",") 'comma) + ((string=? s "#") 'hash) + ((string=? s "##") 'hash2) + (else (string->symbol s))))) + + (else + (scm-error 'cpp-error "preprocessing-token->token" + "Can't convert ~s into a \"regular\" token." + (list cpp-token) #f)))) -- cgit v1.2.3