(define-module (test cpp lex2)
  :use-module (srfi srfi-64)
  :use-module (srfi srfi-88)
  :use-module (ice-9 peg)
  :use-module (c lex2))


(test-equal "Integer literal"
  (list (lexeme type: 'preprocessing-token body: '(pp-number "10")))
  (lex "10"))

(test-equal "String literal"
  (list (lexeme type: 'preprocessing-token body: '(string-literal "Hello")))
  (lex "\"Hello\""))


(test-equal "Mulitple tokens, including whitespace"
  (list (lexeme type: 'whitespace body: " ")
        (lexeme type: 'preprocessing-token body: '(pp-number "10"))
        (lexeme type: 'whitespace body: " "))
  (lex " 10 "))

(test-equal "Char literal"
  (list (lexeme type: 'preprocessing-token body: '(character-constant "a")))
  (lex "'a'"))


(test-equal "Comment inside string"
  (list (lexeme type: 'preprocessing-token body: '(string-literal "Hel/*lo")))
  (lex "\"Hel/*lo\""))

(test-equal "#define line"
  (list
   (lexeme type: 'preprocessing-token body: '(punctuator "#"))
   (lexeme type: 'preprocessing-token body: '(identifier "define"))
   (lexeme type: 'whitespace body: " ")
   (lexeme type: 'preprocessing-token body: '(identifier "f"))
   (lexeme type: 'preprocessing-token body: '(punctuator "("))
   (lexeme type: 'preprocessing-token body: '(identifier "x"))
   (lexeme type: 'preprocessing-token body: '(punctuator ")"))
   (lexeme type: 'whitespace body: " ")
   (lexeme type: 'preprocessing-token body: '(pp-number "10")))
  (lex "#define f(x) 10"))


(test-equal "Nested parenthesis"
  (list
   (lexeme type: 'preprocessing-token body: '(identifier "f"))
   (lexeme type: 'preprocessing-token body: '(punctuator "("))
   (lexeme type: 'preprocessing-token body: '(pp-number "1"))
   (lexeme type: 'preprocessing-token body: '(punctuator ","))
   (lexeme type: 'whitespace body: " ")
   (lexeme type: 'preprocessing-token body: '(punctuator "("))
   (lexeme type: 'preprocessing-token body: '(pp-number "2"))
   (lexeme type: 'preprocessing-token body: '(punctuator ","))
   (lexeme type: 'whitespace body: " ")
   (lexeme type: 'preprocessing-token body: '(pp-number "3"))
   (lexeme type: 'preprocessing-token body: '(punctuator ")"))
   (lexeme type: 'preprocessing-token body: '(punctuator ","))
   (lexeme type: 'whitespace body: " ")
   (lexeme type: 'preprocessing-token body: '(pp-number "4"))
   (lexeme type: 'preprocessing-token body: '(punctuator ")")))
  (lex "f(1, (2, 3), 4)"))


;; Generating a single lexeme
;; (whitespace "  ")
;; would also be ok
(test-equal "Grouped whitespace"
  (list (lexeme type: 'whitespace body: " ")
        (lexeme type: 'whitespace body: " "))
  (lex "  "))

(test-equal "Newlines get sepparate whitespace tokens"
  (list (lexeme type: 'whitespace body: " ")
        (lexeme type: 'whitespace body: " ")
        (lexeme type: 'whitespace body: "\n")
        (lexeme type: 'whitespace body: " "))
  (lex "  \n "))


;; Refer to 6.4 p.1 for the syntax requirement
;; 6.10.9 p. 2 for the sample string
(test-equal "each non-white-space character that cannot be one of the above"
  (list (lexeme type: 'preprocessing-token body: '(punctuator "."))
        (lexeme type: 'preprocessing-token body: '(punctuator "."))
        (lexeme type: 'other body: "\\")  ; <- Interesting part
        (lexeme type: 'preprocessing-token body: '(identifier "listing"))
        (lexeme type: 'preprocessing-token body: '(punctuator "."))
        (lexeme type: 'preprocessing-token body: '(identifier "dir")))
  (lex "..\\listing.dir"))


(test-equal "Propper H-string"
  (list (lexeme type: 'preprocessing-token body: '(header-name (h-string "a"))))
  (lex "<a>"))

(test-equal "Unexpected h-string"
  (list (lexeme type: 'preprocessing-token body: '(pp-number "1"))
        (lexeme type: 'whitespace body: " ")
        (lexeme type: 'preprocessing-token body: '(header-name (h-string " 2 ")))
        (lexeme type: 'whitespace body: " ")
        (lexeme type: 'preprocessing-token body: '(pp-number "3")))
  (lex "1 < 2 > 3"))

(test-equal "Quotation mark inside h-string"
  (list (lexeme type: 'preprocessing-token body: '(header-name (h-string "a\"b"))))
  (lex "<a\"b>"))

(test-equal "Interaction of h-strings and regular strings"
 (test-equal "Less than string, not h-string"
   (list (lexeme type: 'preprocessing-token body: '(pp-number "1"))
         (lexeme type: 'preprocessing-token body: '(string-literal "<"))
         (lexeme type: 'preprocessing-token body: '(punctuator ">")))
   (lex "1\"<\">"))

 (test-equal "H-string, not string"
   (list (lexeme type: 'preprocessing-token body: '(pp-number "1"))
         (lexeme type: 'preprocessing-token body: '(header-name (h-string "\"")))
         (lexeme type: 'other body: "\""))
   (lex "1<\">\"")))

(test-equal "Q-strings are lexed as regular strings"
  (list (lexeme type: 'preprocessing-token body: '(punctuator "#"))
        (lexeme type: 'preprocessing-token body: '(identifier "include"))
        (lexeme type: 'whitespace body: " ")
        (lexeme type: 'preprocessing-token body: '(string-literal "test")))
  ;; # include here, since generated tokens could possible depend on that context,
  ;; and the reason regular strings are returned is since the lexer doesn't check
  ;; that context
  (lex "#include \"test\"")
  )


(test-group "Unicode"
  (test-equal "In string literals"
    (list (lexeme type: 'preprocessing-token body: '(string-literal "åäö")))
    (lex "\"åäö\""))

  (test-equal "Outside string literals"
    (list (lexeme type: 'other body: "å")
          (lexeme type: 'other body: "ä")
          (lexeme type: 'other body: "ö"))
    (lex "åäö")))