(define-module (test cpp lex2)
:use-module (srfi srfi-64)
:use-module (srfi srfi-88)
:use-module (ice-9 peg)
:use-module (c lex2))
(define (l s) (lexeme type: 'preprocessing-token body: s))
(define (ls . xs)
(map l xs))
(test-equal "Integer literal"
(ls '(pp-number "10"))
(lex "10"))
(test-equal "String literal"
(ls `(string-literal (encoding-prefix) "Hello"))
(lex "\"Hello\""))
(test-equal "Mulitple tokens, including whitespace"
(list (lexeme type: 'whitespace body: " ")
(l '(pp-number "10"))
(lexeme type: 'whitespace body: " "))
(lex " 10 "))
(test-equal "Char literal"
(ls `(character-constant (character-prefix) "a"))
(lex "'a'"))
(test-equal "Comment inside string"
(ls `(string-literal (encoding-prefix) "Hel/*lo"))
(lex "\"Hel/*lo\""))
(test-equal "#define line"
(list
(l '(punctuator "#"))
(l '(identifier "define"))
(lexeme type: 'whitespace body: " ")
(l '(identifier "f"))
(l '(punctuator "("))
(l '(identifier "x"))
(l '(punctuator ")"))
(lexeme type: 'whitespace body: " ")
(l '(pp-number "10")))
(lex "#define f(x) 10"))
(test-equal "Nested parenthesis"
(list
(l '(identifier "f"))
(l '(punctuator "("))
(l '(pp-number "1"))
(l '(punctuator ","))
(lexeme type: 'whitespace body: " ")
(l '(punctuator "("))
(l '(pp-number "2"))
(l '(punctuator ","))
(lexeme type: 'whitespace body: " ")
(l '(pp-number "3"))
(l '(punctuator ")"))
(l '(punctuator ","))
(lexeme type: 'whitespace body: " ")
(l '(pp-number "4"))
(l '(punctuator ")")))
(lex "f(1, (2, 3), 4)"))
;; Generating a single lexeme
;; (whitespace " ")
;; would also be ok
(test-equal "Grouped whitespace"
(list (lexeme type: 'whitespace body: " ")
(lexeme type: 'whitespace body: " "))
(lex " "))
(test-equal "Newlines get sepparate whitespace tokens"
(list (lexeme type: 'whitespace body: " ")
(lexeme type: 'whitespace body: " ")
(lexeme type: 'whitespace body: "\n")
(lexeme type: 'whitespace body: " "))
(lex " \n "))
;; Refer to 6.4 p.1 for the syntax requirement
;; 6.10.9 p. 2 for the sample string
(test-equal "each non-white-space character that cannot be one of the above"
(list (l '(punctuator "."))
(l '(punctuator "."))
(lexeme type: 'other body: "\\") ; <- Interesting part
(l '(identifier "listing"))
(l '(punctuator "."))
(l '(identifier "dir")))
(lex "..\\listing.dir"))
(test-equal "Propper H-string"
(ls '(header-name (h-string "a")))
(lex ""))
(test-equal "Unexpected h-string"
(list (l '(pp-number "1"))
(lexeme type: 'whitespace body: " ")
(l '(header-name (h-string " 2 ")))
(lexeme type: 'whitespace body: " ")
(l '(pp-number "3")))
(lex "1 < 2 > 3"))
(test-equal "Quotation mark inside h-string"
(ls '(header-name (h-string "a\"b")))
(lex ""))
(test-equal "Interaction of h-strings and regular strings"
(test-equal "Less than string, not h-string"
(ls '(pp-number "1")
'(string-literal (encoding-prefix) "<")
'(punctuator ">"))
(lex "1\"<\">"))
(test-equal "H-string, not string"
(list (lexeme type: 'preprocessing-token body: '(pp-number "1"))
(lexeme type: 'preprocessing-token body: '(header-name (h-string "\"")))
(lexeme type: 'other body: "\""))
(lex "1<\">\"")))
(test-equal "Q-strings are lexed as regular strings"
(list (l '(punctuator "#"))
(l '(identifier "include"))
(lexeme type: 'whitespace body: " ")
(l '(string-literal (encoding-prefix) "test")))
;; # include here, since generated tokens could possible depend on that context,
;; and the reason regular strings are returned is since the lexer doesn't check
;; that context
(lex "#include \"test\"")
)
(test-group "Unicode"
(test-equal "In string literals"
(ls '(string-literal (encoding-prefix) "åäö"))
(lex "\"åäö\""))
(test-equal "Outside string literals"
(list (lexeme type: 'other body: "å")
(lexeme type: 'other body: "ä")
(lexeme type: 'other body: "ö"))
(lex "åäö")))
(test-group "Characters with prefixes"
(test-equal (ls '(character-constant (character-prefix . "u")
"a"))
(lex "u'a'"))
(test-equal (ls '(character-constant (character-prefix . "U")
"a"))
(lex "U'a'"))
(test-equal (ls '(character-constant (character-prefix . "L")
"a"))
(lex "L'a'")))
;; Note that these strings have 0 "data" components
(test-group "Strings with prefixes"
(test-equal (ls '(string-literal (encoding-prefix . "u8")))
(lex "u8\"\""))
(test-equal (ls '(string-literal (encoding-prefix . "u")))
(lex "u\"\""))
(test-equal (ls '(string-literal (encoding-prefix . "U")))
(lex "U\"\""))
(test-equal (ls '(string-literal (encoding-prefix . "L")))
(lex "L\"\"")))