aboutsummaryrefslogtreecommitdiff
path: root/module/c/lex2.scm
diff options
context:
space:
mode:
Diffstat (limited to 'module/c/lex2.scm')
-rw-r--r--module/c/lex2.scm44
1 files changed, 41 insertions, 3 deletions
diff --git a/module/c/lex2.scm b/module/c/lex2.scm
index 049cc48c..647eff55 100644
--- a/module/c/lex2.scm
+++ b/module/c/lex2.scm
@@ -1,9 +1,12 @@
(define-module (c lex2)
:use-module (ice-9 peg)
:use-module (ice-9 match)
+ :use-module ((hnh util) :select (->))
:use-module (hnh util object)
:use-module (hnh util type)
:use-module (srfi srfi-88)
+ :use-module ((c trigraph) :select (replace-trigraphs))
+ :use-module ((c line-fold) :select (fold-lines))
:export (lex
lexeme lexeme?
placemaker
@@ -12,6 +15,8 @@
(noexpand . lexeme-noexpand)
parse-c-number
+
+ tokenize
))
;;; A.1 Lexical grammar
@@ -268,10 +273,15 @@
(or "[" "]" "(" ")" "{" "}"
"..." ; Moved to be before "."
"." "->"
- "++" "--" "&" "*" "+" "-" "~" "!"
- "/" "%" "<<" ">>" "<" ">" "<=" ">=" "==" "!=" "^" "|" "&&" "||"
+ "&&" "||"
+ "!="
+ "++" "--"
+ "*=" "/=" "%=" "+=" "-=" "<<=" ">>=" "&=" "^=" "|="
+ "<=" ">=" "=="
+ "="
+ "/" "%" "<<" ">>" "<" ">" "^" "|"
"?" ":" ";"
- "=" "*=" "/=" "%=" "+=" "-=" "<<=" ">>=" "&=" "^=" "|="
+ "&" "*" "+" "-" "~" "!"
"," "##" "#" ; # and ## flipped
"<:" ":>" "<%" "%>" "%:%:" "%:" ; %: and %:%: flipped
))
@@ -284,6 +294,8 @@
;; (6.4.7)
(define-peg-pattern header-name all
(or (and (ignore "<") h-string (ignore ">"))
+ ;; NOTE this case will never be reached, since it's treated as a regular
+ ;; string instead
(and (ignore "\"") q-string (ignore "\""))))
;; (6.4.7)
@@ -402,3 +414,29 @@
(define (parse-c-number string)
(match-pattern constant string))
+
+
+
+;;; 5.1.11.2 Translation phases
+
+(define (tokenize string)
+ (-> string
+;;; 1. trigraph replacement
+ replace-trigraphs
+;;; 2. Line folding
+ fold-lines
+;;; 3. Decomposition into preprocenning tokens, whitespaces, and comments
+ lex
+ comments->whitespace))
+
+;; These really belong in (c cpp-types), but that would create a dependency cycle
+
+(define (comment->whitespace token)
+ (if ;; (comment-token? token)
+ (and (lexeme? token)
+ (eq? 'comment (type token)))
+ (car (lex " "))
+ token))
+
+(define (comments->whitespace tokens)
+ (map comment->whitespace tokens))