(define-module (test cpp lex2) :use-module (srfi srfi-64) :use-module (srfi srfi-88) :use-module (ice-9 peg) :use-module (c lex2)) (define (l s) (lexeme type: 'preprocessing-token body: s)) (define (ls . xs) (map l xs)) (test-equal "Integer literal" (ls '(pp-number "10")) (lex "10")) (test-equal "String literal" (ls `(string-literal "Hello")) (lex "\"Hello\"")) (test-equal "Mulitple tokens, including whitespace" (list (lexeme type: 'whitespace body: " ") (l '(pp-number "10")) (lexeme type: 'whitespace body: " ")) (lex " 10 ")) (test-equal "Char literal" (ls `(character-constant (character-prefix) "a")) (lex "'a'")) (test-equal "Comment inside string" (ls `(string-literal "Hel/*lo")) (lex "\"Hel/*lo\"")) (test-equal "#define line" (list (l '(punctuator "#")) (l '(identifier "define")) (lexeme type: 'whitespace body: " ") (l '(identifier "f")) (l '(punctuator "(")) (l '(identifier "x")) (l '(punctuator ")")) (lexeme type: 'whitespace body: " ") (l '(pp-number "10"))) (lex "#define f(x) 10")) (test-equal "Nested parenthesis" (list (l '(identifier "f")) (l '(punctuator "(")) (l '(pp-number "1")) (l '(punctuator ",")) (lexeme type: 'whitespace body: " ") (l '(punctuator "(")) (l '(pp-number "2")) (l '(punctuator ",")) (lexeme type: 'whitespace body: " ") (l '(pp-number "3")) (l '(punctuator ")")) (l '(punctuator ",")) (lexeme type: 'whitespace body: " ") (l '(pp-number "4")) (l '(punctuator ")"))) (lex "f(1, (2, 3), 4)")) ;; Generating a single lexeme ;; (whitespace " ") ;; would also be ok (test-equal "Grouped whitespace" (list (lexeme type: 'whitespace body: " ") (lexeme type: 'whitespace body: " ")) (lex " ")) (test-equal "Newlines get sepparate whitespace tokens" (list (lexeme type: 'whitespace body: " ") (lexeme type: 'whitespace body: " ") (lexeme type: 'whitespace body: "\n") (lexeme type: 'whitespace body: " ")) (lex " \n ")) ;; Refer to 6.4 p.1 for the syntax requirement ;; 6.10.9 p. 2 for the sample string (test-equal "each non-white-space character that cannot be one of the above" (list (l '(punctuator ".")) (l '(punctuator ".")) (lexeme type: 'other body: "\\") ; <- Interesting part (l '(identifier "listing")) (l '(punctuator ".")) (l '(identifier "dir"))) (lex "..\\listing.dir")) (test-equal "Propper H-string" (ls '(header-name (h-string "a"))) (lex "")) (test-equal "Unexpected h-string" (list (l '(pp-number "1")) (lexeme type: 'whitespace body: " ") (l '(header-name (h-string " 2 "))) (lexeme type: 'whitespace body: " ") (l '(pp-number "3"))) (lex "1 < 2 > 3")) (test-equal "Quotation mark inside h-string" (ls '(header-name (h-string "a\"b"))) (lex "")) (test-equal "Interaction of h-strings and regular strings" (test-equal "Less than string, not h-string" (ls '(pp-number "1") '(string-literal "<") '(punctuator ">")) (lex "1\"<\">")) (test-equal "H-string, not string" (list (lexeme type: 'preprocessing-token body: '(pp-number "1")) (lexeme type: 'preprocessing-token body: '(header-name (h-string "\""))) (lexeme type: 'other body: "\"")) (lex "1<\">\""))) (test-equal "Q-strings are lexed as regular strings" (list (l '(punctuator "#")) (l '(identifier "include")) (lexeme type: 'whitespace body: " ") (l '(string-literal "test"))) ;; # include here, since generated tokens could possible depend on that context, ;; and the reason regular strings are returned is since the lexer doesn't check ;; that context (lex "#include \"test\"") ) (test-group "Unicode" (test-equal "In string literals" (ls '(string-literal "åäö")) (lex "\"åäö\"")) (test-equal "Outside string literals" (list (lexeme type: 'other body: "å") (lexeme type: 'other body: "ä") (lexeme type: 'other body: "ö")) (lex "åäö")))