(define-module (test cpp lex2)
  :use-module (srfi srfi-64)
  :use-module (srfi srfi-88)
  :use-module (ice-9 peg)
  :use-module (c lex2))

(define (l s)  (lexeme type: 'preprocessing-token body: s))

(define (ls . xs)
  (map l xs))

(test-equal "Integer literal"
  (ls '(pp-number "10"))
  (lex "10"))

(test-equal "String literal"
  (ls `(string-literal (encoding-prefix) "Hello"))
  (lex "\"Hello\""))


(test-equal "Mulitple tokens, including whitespace"
  (list (lexeme type: 'whitespace body: " ")
        (l '(pp-number "10"))
        (lexeme type: 'whitespace body: " "))
  (lex " 10 "))

(test-equal "Char literal"
  (ls `(character-constant (character-prefix) "a"))
  (lex "'a'"))


(test-equal "Comment inside string"
  (ls `(string-literal (encoding-prefix) "Hel/*lo"))
  (lex "\"Hel/*lo\""))

(test-equal "#define line"
  (list
   (l '(punctuator "#"))
   (l '(identifier "define"))
   (lexeme type: 'whitespace body: " ")
   (l '(identifier "f"))
   (l '(punctuator "("))
   (l '(identifier "x"))
   (l '(punctuator ")"))
   (lexeme type: 'whitespace body: " ")
   (l '(pp-number "10")))
  (lex "#define f(x) 10"))


(test-equal "Nested parenthesis"
  (list
   (l '(identifier "f"))
   (l '(punctuator "("))
   (l '(pp-number "1"))
   (l '(punctuator ","))
   (lexeme type: 'whitespace body: " ")
   (l '(punctuator "("))
   (l '(pp-number "2"))
   (l '(punctuator ","))
   (lexeme type: 'whitespace body: " ")
   (l '(pp-number "3"))
   (l '(punctuator ")"))
   (l '(punctuator ","))
   (lexeme type: 'whitespace body: " ")
   (l '(pp-number "4"))
   (l '(punctuator ")")))
  (lex "f(1, (2, 3), 4)"))


;; Generating a single lexeme
;; (whitespace "  ")
;; would also be ok
(test-equal "Grouped whitespace"
  (list (lexeme type: 'whitespace body: " ")
        (lexeme type: 'whitespace body: " "))
  (lex "  "))

(test-equal "Newlines get sepparate whitespace tokens"
  (list (lexeme type: 'whitespace body: " ")
        (lexeme type: 'whitespace body: " ")
        (lexeme type: 'whitespace body: "\n")
        (lexeme type: 'whitespace body: " "))
  (lex "  \n "))


;; Refer to 6.4 p.1 for the syntax requirement
;; 6.10.9 p. 2 for the sample string
(test-equal "each non-white-space character that cannot be one of the above"
  (list (l '(punctuator "."))
        (l '(punctuator "."))
        (lexeme type: 'other body: "\\")  ; <- Interesting part
        (l '(identifier "listing"))
        (l '(punctuator "."))
        (l '(identifier "dir")))
  (lex "..\\listing.dir"))


(test-equal "Propper H-string"
  (ls '(header-name (h-string "a")))
  (lex "<a>"))

(test-equal "Unexpected h-string"
  (list (l '(pp-number "1"))
        (lexeme type: 'whitespace body: " ")
        (l '(header-name (h-string " 2 ")))
        (lexeme type: 'whitespace body: " ")
        (l '(pp-number "3")))
  (lex "1 < 2 > 3"))

(test-equal "Quotation mark inside h-string"
  (ls '(header-name (h-string "a\"b")))
  (lex "<a\"b>"))

(test-equal "Interaction of h-strings and regular strings"
 (test-equal "Less than string, not h-string"
   (ls '(pp-number "1")
       '(string-literal (encoding-prefix) "<")
       '(punctuator ">"))
   (lex "1\"<\">"))

 (test-equal "H-string, not string"
   (list (lexeme type: 'preprocessing-token body: '(pp-number "1"))
         (lexeme type: 'preprocessing-token body: '(header-name (h-string "\"")))
         (lexeme type: 'other body: "\""))
   (lex "1<\">\"")))

(test-equal "Q-strings are lexed as regular strings"
  (list (l '(punctuator "#"))
        (l '(identifier "include"))
        (lexeme type: 'whitespace body: " ")
        (l '(string-literal (encoding-prefix) "test")))
  ;; # include here, since generated tokens could possible depend on that context,
  ;; and the reason regular strings are returned is since the lexer doesn't check
  ;; that context
  (lex "#include \"test\"")
  )


(test-group "Unicode"
  (test-equal "In string literals"
    (ls '(string-literal (encoding-prefix) "åäö"))
    (lex "\"åäö\""))

  (test-equal "Outside string literals"
    (list (lexeme type: 'other body: "å")
          (lexeme type: 'other body: "ä")
          (lexeme type: 'other body: "ö"))
    (lex "åäö")))


(test-group "Characters with prefixes"
  (test-equal (ls '(character-constant (character-prefix . "u")
                                       "a"))
    (lex "u'a'"))
  (test-equal (ls '(character-constant (character-prefix . "U")
                                       "a"))
    (lex "U'a'"))
  (test-equal (ls '(character-constant (character-prefix . "L")
                                       "a"))
    (lex "L'a'")))

;; Note that these strings have 0 "data" components
(test-group "Strings with prefixes"
  (test-equal (ls '(string-literal (encoding-prefix . "u8")))
    (lex "u8\"\""))
  (test-equal (ls '(string-literal (encoding-prefix . "u")))
    (lex "u\"\""))
  (test-equal (ls '(string-literal (encoding-prefix . "U")))
    (lex "U\"\""))
  (test-equal (ls '(string-literal (encoding-prefix . "L")))
    (lex "L\"\"")))