(define-module (test cpp lex2)
:use-module (srfi srfi-64)
:use-module (srfi srfi-88)
:use-module (ice-9 peg)
:use-module (c lex2))
(test-equal "Integer literal"
(list (lexeme type: 'preprocessing-token body: '(pp-number "10")))
(lex "10"))
(test-equal "String literal"
(list (lexeme type: 'preprocessing-token body: '(string-literal "Hello")))
(lex "\"Hello\""))
(test-equal "Mulitple tokens, including whitespace"
(list (lexeme type: 'whitespace body: " ")
(lexeme type: 'preprocessing-token body: '(pp-number "10"))
(lexeme type: 'whitespace body: " "))
(lex " 10 "))
(test-equal "Char literal"
(list (lexeme type: 'preprocessing-token body: '(character-constant "a")))
(lex "'a'"))
(test-equal "Comment inside string"
(list (lexeme type: 'preprocessing-token body: '(string-literal "Hel/*lo")))
(lex "\"Hel/*lo\""))
(test-equal "#define line"
(list
(lexeme type: 'preprocessing-token body: '(punctuator "#"))
(lexeme type: 'preprocessing-token body: '(identifier "define"))
(lexeme type: 'whitespace body: " ")
(lexeme type: 'preprocessing-token body: '(identifier "f"))
(lexeme type: 'preprocessing-token body: '(punctuator "("))
(lexeme type: 'preprocessing-token body: '(identifier "x"))
(lexeme type: 'preprocessing-token body: '(punctuator ")"))
(lexeme type: 'whitespace body: " ")
(lexeme type: 'preprocessing-token body: '(pp-number "10")))
(lex "#define f(x) 10"))
(test-equal "Nested parenthesis"
(list
(lexeme type: 'preprocessing-token body: '(identifier "f"))
(lexeme type: 'preprocessing-token body: '(punctuator "("))
(lexeme type: 'preprocessing-token body: '(pp-number "1"))
(lexeme type: 'preprocessing-token body: '(punctuator ","))
(lexeme type: 'whitespace body: " ")
(lexeme type: 'preprocessing-token body: '(punctuator "("))
(lexeme type: 'preprocessing-token body: '(pp-number "2"))
(lexeme type: 'preprocessing-token body: '(punctuator ","))
(lexeme type: 'whitespace body: " ")
(lexeme type: 'preprocessing-token body: '(pp-number "3"))
(lexeme type: 'preprocessing-token body: '(punctuator ")"))
(lexeme type: 'preprocessing-token body: '(punctuator ","))
(lexeme type: 'whitespace body: " ")
(lexeme type: 'preprocessing-token body: '(pp-number "4"))
(lexeme type: 'preprocessing-token body: '(punctuator ")")))
(lex "f(1, (2, 3), 4)"))
;; Generating a single lexeme
;; (whitespace " ")
;; would also be ok
(test-equal "Grouped whitespace"
(list (lexeme type: 'whitespace body: " ")
(lexeme type: 'whitespace body: " "))
(lex " "))
(test-equal "Newlines get sepparate whitespace tokens"
(list (lexeme type: 'whitespace body: " ")
(lexeme type: 'whitespace body: " ")
(lexeme type: 'whitespace body: "\n")
(lexeme type: 'whitespace body: " "))
(lex " \n "))
;; Refer to 6.4 p.1 for the syntax requirement
;; 6.10.9 p. 2 for the sample string
(test-equal "each non-white-space character that cannot be one of the above"
(list (lexeme type: 'preprocessing-token body: '(punctuator "."))
(lexeme type: 'preprocessing-token body: '(punctuator "."))
(lexeme type: 'other body: "\\") ; <- Interesting part
(lexeme type: 'preprocessing-token body: '(identifier "listing"))
(lexeme type: 'preprocessing-token body: '(punctuator "."))
(lexeme type: 'preprocessing-token body: '(identifier "dir")))
(lex "..\\listing.dir"))
(test-equal "Propper H-string"
(list (lexeme type: 'preprocessing-token body: '(header-name (h-string "a"))))
(lex ""))
(test-equal "Unexpected h-string"
(list (lexeme type: 'preprocessing-token body: '(pp-number "1"))
(lexeme type: 'whitespace body: " ")
(lexeme type: 'preprocessing-token body: '(header-name (h-string " 2 ")))
(lexeme type: 'whitespace body: " ")
(lexeme type: 'preprocessing-token body: '(pp-number "3")))
(lex "1 < 2 > 3"))
(test-equal "Quotation mark inside h-string"
(list (lexeme type: 'preprocessing-token body: '(header-name (h-string "a\"b"))))
(lex ""))
(test-equal "Interaction of h-strings and regular strings"
(test-equal "Less than string, not h-string"
(list (lexeme type: 'preprocessing-token body: '(pp-number "1"))
(lexeme type: 'preprocessing-token body: '(string-literal "<"))
(lexeme type: 'preprocessing-token body: '(punctuator ">")))
(lex "1\"<\">"))
(test-equal "H-string, not string"
(list (lexeme type: 'preprocessing-token body: '(pp-number "1"))
(lexeme type: 'preprocessing-token body: '(header-name (h-string "\"")))
(lexeme type: 'other body: "\""))
(lex "1<\">\"")))
(test-equal "Q-strings are lexed as regular strings"
(list (lexeme type: 'preprocessing-token body: '(punctuator "#"))
(lexeme type: 'preprocessing-token body: '(identifier "include"))
(lexeme type: 'whitespace body: " ")
(lexeme type: 'preprocessing-token body: '(string-literal "test")))
;; # include here, since generated tokens could possible depend on that context,
;; and the reason regular strings are returned is since the lexer doesn't check
;; that context
(lex "#include \"test\"")
)
(test-group "Unicode"
(test-equal "In string literals"
(list (lexeme type: 'preprocessing-token body: '(string-literal "åäö")))
(lex "\"åäö\""))
(test-equal "Outside string literals"
(list (lexeme type: 'other body: "å")
(lexeme type: 'other body: "ä")
(lexeme type: 'other body: "ö"))
(lex "åäö")))