(define-module (test cpp lex2) :use-module (srfi srfi-64) :use-module (srfi srfi-88) :use-module (ice-9 peg) :use-module (c lex2)) (test-equal "Integer literal" (list (lexeme type: 'preprocessing-token body: '(pp-number "10"))) (lex "10")) (test-equal "String literal" (list (lexeme type: 'preprocessing-token body: '(string-literal "Hello"))) (lex "\"Hello\"")) (test-equal "Mulitple tokens, including whitespace" (list (lexeme type: 'whitespace body: " ") (lexeme type: 'preprocessing-token body: '(pp-number "10")) (lexeme type: 'whitespace body: " ")) (lex " 10 ")) (test-equal "Char literal" (list (lexeme type: 'preprocessing-token body: '(character-constant "a"))) (lex "'a'")) (test-equal "Comment inside string" (list (lexeme type: 'preprocessing-token body: '(string-literal "Hel/*lo"))) (lex "\"Hel/*lo\"")) (test-equal "#define line" (list (lexeme type: 'preprocessing-token body: '(punctuator "#")) (lexeme type: 'preprocessing-token body: '(identifier "define")) (lexeme type: 'whitespace body: " ") (lexeme type: 'preprocessing-token body: '(identifier "f")) (lexeme type: 'preprocessing-token body: '(punctuator "(")) (lexeme type: 'preprocessing-token body: '(identifier "x")) (lexeme type: 'preprocessing-token body: '(punctuator ")")) (lexeme type: 'whitespace body: " ") (lexeme type: 'preprocessing-token body: '(pp-number "10"))) (lex "#define f(x) 10")) (test-equal "Nested parenthesis" (list (lexeme type: 'preprocessing-token body: '(identifier "f")) (lexeme type: 'preprocessing-token body: '(punctuator "(")) (lexeme type: 'preprocessing-token body: '(pp-number "1")) (lexeme type: 'preprocessing-token body: '(punctuator ",")) (lexeme type: 'whitespace body: " ") (lexeme type: 'preprocessing-token body: '(punctuator "(")) (lexeme type: 'preprocessing-token body: '(pp-number "2")) (lexeme type: 'preprocessing-token body: '(punctuator ",")) (lexeme type: 'whitespace body: " ") (lexeme type: 'preprocessing-token body: '(pp-number "3")) (lexeme type: 'preprocessing-token body: '(punctuator ")")) (lexeme type: 'preprocessing-token body: '(punctuator ",")) (lexeme type: 'whitespace body: " ") (lexeme type: 'preprocessing-token body: '(pp-number "4")) (lexeme type: 'preprocessing-token body: '(punctuator ")"))) (lex "f(1, (2, 3), 4)")) ;; Generating a single lexeme ;; (whitespace " ") ;; would also be ok (test-equal "Grouped whitespace" (list (lexeme type: 'whitespace body: " ") (lexeme type: 'whitespace body: " ")) (lex " ")) (test-equal "Newlines get sepparate whitespace tokens" (list (lexeme type: 'whitespace body: " ") (lexeme type: 'whitespace body: " ") (lexeme type: 'whitespace body: "\n") (lexeme type: 'whitespace body: " ")) (lex " \n ")) ;; Refer to 6.4 p.1 for the syntax requirement ;; 6.10.9 p. 2 for the sample string (test-equal "each non-white-space character that cannot be one of the above" (list (lexeme type: 'preprocessing-token body: '(punctuator ".")) (lexeme type: 'preprocessing-token body: '(punctuator ".")) (lexeme type: 'other body: "\\") ; <- Interesting part (lexeme type: 'preprocessing-token body: '(identifier "listing")) (lexeme type: 'preprocessing-token body: '(punctuator ".")) (lexeme type: 'preprocessing-token body: '(identifier "dir"))) (lex "..\\listing.dir")) (test-equal "Propper H-string" (list (lexeme type: 'preprocessing-token body: '(header-name (h-string "a")))) (lex "")) (test-equal "Unexpected h-string" (list (lexeme type: 'preprocessing-token body: '(pp-number "1")) (lexeme type: 'whitespace body: " ") (lexeme type: 'preprocessing-token body: '(header-name (h-string " 2 "))) (lexeme type: 'whitespace body: " ") (lexeme type: 'preprocessing-token body: '(pp-number "3"))) (lex "1 < 2 > 3")) (test-equal "Quotation mark inside h-string" (list (lexeme type: 'preprocessing-token body: '(header-name (h-string "a\"b")))) (lex "")) (test-equal "Interaction of h-strings and regular strings" (test-equal "Less than string, not h-string" (list (lexeme type: 'preprocessing-token body: '(pp-number "1")) (lexeme type: 'preprocessing-token body: '(string-literal "<")) (lexeme type: 'preprocessing-token body: '(punctuator ">"))) (lex "1\"<\">")) (test-equal "H-string, not string" (list (lexeme type: 'preprocessing-token body: '(pp-number "1")) (lexeme type: 'preprocessing-token body: '(header-name (h-string "\""))) (lexeme type: 'other body: "\"")) (lex "1<\">\""))) (test-equal "Q-strings are lexed as regular strings" (list (lexeme type: 'preprocessing-token body: '(punctuator "#")) (lexeme type: 'preprocessing-token body: '(identifier "include")) (lexeme type: 'whitespace body: " ") (lexeme type: 'preprocessing-token body: '(string-literal "test"))) ;; # include here, since generated tokens could possible depend on that context, ;; and the reason regular strings are returned is since the lexer doesn't check ;; that context (lex "#include \"test\"") ) (test-group "Unicode" (test-equal "In string literals" (list (lexeme type: 'preprocessing-token body: '(string-literal "åäö"))) (lex "\"åäö\"")) (test-equal "Outside string literals" (list (lexeme type: 'other body: "å") (lexeme type: 'other body: "ä") (lexeme type: 'other body: "ö")) (lex "åäö")))