diff options
author | Hugo Hörnquist <hugo@lysator.liu.se> | 2022-07-14 23:44:03 +0200 |
---|---|---|
committer | Hugo Hörnquist <hugo@lysator.liu.se> | 2022-07-14 23:44:03 +0200 |
commit | 64375c16c8316b8381ad59fa3538ad84732d90b7 (patch) | |
tree | cec344cb9306f1353ff0c4a4daaf7a81506eef5a /module/c/lex2.scm | |
parent | Add C LALR parser. (diff) | |
download | calp-64375c16c8316b8381ad59fa3538ad84732d90b7.tar.gz calp-64375c16c8316b8381ad59fa3538ad84732d90b7.tar.xz |
work
Diffstat (limited to 'module/c/lex2.scm')
-rw-r--r-- | module/c/lex2.scm | 44 |
1 files changed, 41 insertions, 3 deletions
diff --git a/module/c/lex2.scm b/module/c/lex2.scm index 049cc48c..647eff55 100644 --- a/module/c/lex2.scm +++ b/module/c/lex2.scm @@ -1,9 +1,12 @@ (define-module (c lex2) :use-module (ice-9 peg) :use-module (ice-9 match) + :use-module ((hnh util) :select (->)) :use-module (hnh util object) :use-module (hnh util type) :use-module (srfi srfi-88) + :use-module ((c trigraph) :select (replace-trigraphs)) + :use-module ((c line-fold) :select (fold-lines)) :export (lex lexeme lexeme? placemaker @@ -12,6 +15,8 @@ (noexpand . lexeme-noexpand) parse-c-number + + tokenize )) ;;; A.1 Lexical grammar @@ -268,10 +273,15 @@ (or "[" "]" "(" ")" "{" "}" "..." ; Moved to be before "." "." "->" - "++" "--" "&" "*" "+" "-" "~" "!" - "/" "%" "<<" ">>" "<" ">" "<=" ">=" "==" "!=" "^" "|" "&&" "||" + "&&" "||" + "!=" + "++" "--" + "*=" "/=" "%=" "+=" "-=" "<<=" ">>=" "&=" "^=" "|=" + "<=" ">=" "==" + "=" + "/" "%" "<<" ">>" "<" ">" "^" "|" "?" ":" ";" - "=" "*=" "/=" "%=" "+=" "-=" "<<=" ">>=" "&=" "^=" "|=" + "&" "*" "+" "-" "~" "!" "," "##" "#" ; # and ## flipped "<:" ":>" "<%" "%>" "%:%:" "%:" ; %: and %:%: flipped )) @@ -284,6 +294,8 @@ ;; (6.4.7) (define-peg-pattern header-name all (or (and (ignore "<") h-string (ignore ">")) + ;; NOTE this case will never be reached, since it's treated as a regular + ;; string instead (and (ignore "\"") q-string (ignore "\"")))) ;; (6.4.7) @@ -402,3 +414,29 @@ (define (parse-c-number string) (match-pattern constant string)) + + + +;;; 5.1.11.2 Translation phases + +(define (tokenize string) + (-> string +;;; 1. trigraph replacement + replace-trigraphs +;;; 2. Line folding + fold-lines +;;; 3. Decomposition into preprocenning tokens, whitespaces, and comments + lex + comments->whitespace)) + +;; These really belong in (c cpp-types), but that would create a dependency cycle + +(define (comment->whitespace token) + (if ;; (comment-token? token) + (and (lexeme? token) + (eq? 'comment (type token))) + (car (lex " ")) + token)) + +(define (comments->whitespace tokens) + (map comment->whitespace tokens)) |