aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorHugo Hörnquist <hugo@lysator.liu.se>2022-07-13 17:15:27 +0200
committerHugo Hörnquist <hugo@lysator.liu.se>2022-07-13 17:15:27 +0200
commitcbddc0ec9431b759567fa631dd0c19526d0ff775 (patch)
treebaec265016b2357939ed00d7704c96762d50e3e9
parentComment out typechecks on token stream. (diff)
downloadcalp-cbddc0ec9431b759567fa631dd0c19526d0ff775.tar.gz
calp-cbddc0ec9431b759567fa631dd0c19526d0ff775.tar.xz
Basis of token convertion.
-rw-r--r--module/c/cpp-types.scm12
-rw-r--r--module/c/lex2.scm12
-rw-r--r--module/c/preprocessor2.scm2
-rw-r--r--module/c/to-token.scm62
4 files changed, 84 insertions, 4 deletions
diff --git a/module/c/cpp-types.scm b/module/c/cpp-types.scm
index 82ebb922..b08e9810 100644
--- a/module/c/cpp-types.scm
+++ b/module/c/cpp-types.scm
@@ -10,10 +10,11 @@
newline-token?
identifier-token?
punctuator-token?
- number-token?
+ pp-number?
string-token?
h-string-token?
q-string-token?
+ character-constant?
))
(define (whitespace-token? x)
@@ -52,19 +53,26 @@
(`(punctuator ,x) x)
(_ #f))))
-(define (number-token? token)
+(define (pp-number? token)
(and (preprocessing-token? token)
(match (lexeme-body token)
(`(pp-number ,x) x)
(_ #f))))
;; TODO this fails if there are multiple components in the string token
+;; TODO rename to string-literal-token?
(define (string-token? token)
(and (preprocessing-token? token)
(match (lexeme-body token)
(`(string-literal ,x) x)
(_ #f))))
+(define (character-constant? token)
+ (and (preprocessing-token? token)
+ (match (lexeme-body token)
+ (`(character-constant ,x) x)
+ (_ #f))))
+
(define (h-string-token? token)
(and (preprocessing-token? token)
diff --git a/module/c/lex2.scm b/module/c/lex2.scm
index 72f79f55..049cc48c 100644
--- a/module/c/lex2.scm
+++ b/module/c/lex2.scm
@@ -9,7 +9,10 @@
placemaker
(type . lexeme-type)
(body . lexeme-body)
- (noexpand . lexeme-noexpand)))
+ (noexpand . lexeme-noexpand)
+
+ parse-c-number
+ ))
;;; A.1 Lexical grammar
;;; A.1.1 Lexical elements
@@ -371,6 +374,7 @@
+
;; At a number of places I chose token depending on the order of the rule. The
;; standard however says that the longest possible choice should be used.
;; 6.4 p. 4
@@ -392,3 +396,9 @@
"Parsing just failed. Chars: ~s"
(list (peg:string result)) #f))
(cdr (peg:tree result))))))
+
+
+
+(define (parse-c-number string)
+ (match-pattern constant string))
+
diff --git a/module/c/preprocessor2.scm b/module/c/preprocessor2.scm
index 755eaa7b..c1db3f08 100644
--- a/module/c/preprocessor2.scm
+++ b/module/c/preprocessor2.scm
@@ -529,7 +529,7 @@
(list tokens*) #f))))
(let loop ((%first-time #t) (tokens tokens*))
(cond ((null? tokens))
- ((number-token? (car tokens))
+ ((pp-number? (car tokens))
=> (lambda (line)
(let ((line (string->number line))
(remaining (drop-whitespace (cdr tokens))))
diff --git a/module/c/to-token.scm b/module/c/to-token.scm
new file mode 100644
index 00000000..c1efcc02
--- /dev/null
+++ b/module/c/to-token.scm
@@ -0,0 +1,62 @@
+(define-module (c to-token)
+ :use-module ((system base lalr)
+ :select (make-lexical-token))
+ :use-module (c cpp-types)
+ :use-module ((c lex2) :select (parse-c-number))
+ ;; :use-module (hnh util type)
+ :export (preprocessing-token->token))
+
+(define (pp-number->c-number token)
+ (parse-c-number (pp-number? token)))
+
+(define keywords
+ '("auto" "break" "case" "char" "const" "continue" "default"
+ "do" "double" "else" "enum" "extern" "float" "for" "goto"
+ "if" "inline" "int" "long" "register" "restrict" "return"
+ "short" "signed" "sizeof" "static" "struct" "switch"
+ "typedef" "union" "unsigned" "void" "volatile" "while"
+ "_Alignas" "_Alignof" "_Atomic" "_Bool" "_Complex"
+ "_Generic" "_Imaginary" "_Noreturn" "_Static_assert"
+ "_Thread_local"))
+
+(define (preprocessing-token->token cpp-token)
+ (cond ((string-token? cpp-token)
+ => (lambda (content)
+ (make-lexical-token 'string-literal #f content)))
+
+ ((identifier-token? cpp-token)
+ => (lambda (name)
+ (if (member name keywords)
+ (string->symbol name)
+ (make-lexical-token 'identifier #f name))))
+
+ ((pp-number? cpp-token)
+ => (lambda (content)
+ ;; TOOD should return an integer-constant or a floating-constant
+ (make-lexical-token 'constant #f content)))
+
+ ((character-constant? cpp-token)
+ => (lambda (x) (make-lexical-token 'constant #f x)))
+
+ ((punctuator-token? cpp-token)
+ => (lambda (s)
+ (cond ((string=? s "{") 'lbrace)
+ ((string=? s "}") 'rbrace)
+ ((string=? s "[") 'lbrack)
+ ((string=? s "]") 'rbrack)
+ ((string=? s "(") 'lparen)
+ ((string=? s ")") 'rparen)
+ ((string=? s ".") 'dot)
+ ((string=? s "|") 'pipe)
+ ((string=? s "||") 'pipe2)
+ ((string=? s ";") 'semicolon)
+ ((string=? s "|=") 'pipe=)
+ ((string=? s ",") 'comma)
+ ((string=? s "#") 'hash)
+ ((string=? s "##") 'hash2)
+ (else (string->symbol s)))))
+
+ (else
+ (scm-error 'cpp-error "preprocessing-token->token"
+ "Can't convert ~s into a \"regular\" token."
+ (list cpp-token) #f))))