From 690034ca8abcd931e2fb6bb8129450deee701179 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Hugo=20H=C3=B6rnquist?= Date: Tue, 28 Jun 2022 23:46:08 +0200 Subject: Major work on c parser. --- tests/test/cpp.scm | 407 ++++++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 389 insertions(+), 18 deletions(-) (limited to 'tests/test/cpp.scm') diff --git a/tests/test/cpp.scm b/tests/test/cpp.scm index 9c720fde..f3b9ff72 100644 --- a/tests/test/cpp.scm +++ b/tests/test/cpp.scm @@ -8,32 +8,403 @@ :use-module ((c lex) :select (lex)) :use-module ((c parse) :select (parse-lexeme-tree))) +;; Note that the lexer's output isn't stable. +;; The tests here are more to see where the lexer succeeds but the parser fails. +;; So changing the lexer test cases isn't a problem +;; but don't change the parser test cases + (define run (compose parse-lexeme-tree lex)) -(test-equal - '(+ (post-increment (dereference C)) 3) - (run "(*C)++ + 3")) +(define-syntax let-group + (syntax-rules () + ((let ((form name) rest ...) body ...) + (test-group name + (let ((form name) + rest ...) + body ...))))) + +(let-group + ((form "(*C)++ + 3")) + (test-equal '(infix (postfix (group (prefix (prefix-operator "*") + (variable "C"))) + (postfix-operator "++")) + (operator "+") + (integer (base-10 "3"))) + (lex form)) + (test-equal '(+ (post-increment (dereference C)) 3) + (run form))) + +(let-group + ((form "*C++ + 3")) + (test-equal '(infix (postfix (prefix (prefix-operator "*") + (variable "C")) + (postfix-operator "++")) + (operator "+") + (integer (base-10 "3"))) + (lex form)) + (test-equal '(+ (post-increment (dereference C)) 3) + (run form))) + +(let-group + ((form "*C++")) + (test-equal '(postfix (prefix (prefix-operator "*") + (variable "C")) + (postfix-operator "++")) + (lex form)) + (test-equal '(post-increment (dereference C)) + (run form))) + +(let-group + ((form "C++ + C++")) + (test-equal '(infix (postfix (variable "C") + (postfix-operator "++")) + (operator "+") + (postfix (variable "C") + (postfix-operator "++"))) + (lex form)) + (test-equal '(+ (post-increment C) (post-increment C)) + (run form))) + +(let-group + ((form "++C + ++C")) + (test-equal '(infix (prefix (prefix-operator "++") + (variable "C")) + (operator "+") + (prefix (prefix-operator "++") + (variable "C"))) + (lex form)) + (test-equal '(+ (pre-increment C) (pre-increment C)) + (run form))) + +(let-group + ((form "2 + 2 * 2")) + (test-equal '(infix (integer (base-10 "2")) + (operator "+") + (infix (integer (base-10 "2")) + (operator "*") + (integer (base-10 "2")))) + (lex form)) + (test-equal '(+ 2 (* 2 2)) (run form))) + +(let-group + ((form "2 * 2 + 2")) + (test-equal '(infix (integer (base-10 "2")) + (operator "*") + (infix (integer (base-10 "2")) + (operator "+") + (integer (base-10 "2")))) + (lex form)) + (test-equal '(+ (* 2 2) 2) (run form))) + +(let-group + ((form "2+2+2")) + (test-equal '(infix (integer (base-10 "2")) + (operator "+") + (infix (integer (base-10 "2")) + (operator "+") + (integer (base-10 "2")))) (lex form)) + (test-equal '(+ 2 2 2) (run form))) + + + + +(test-skip "Strings") +(test-skip "__asm__") +(test-skip "Token concatenation") +(test-skip "Floating point numbers") + +;; Hand picked forms from output of `cpp -dM /usr/include/termios.h` on +;; FreeBSD 13.1-RELEASE releng/13.1-n250148-fc952ac2212 GENERIC amd64 +;; 2022-06-28 + +(let ((form "00000200")) + (test-equal '(integer (base-8 "0000200")) (lex form)) + (test-equal 128 (run form))) + +(let ((form "0")) + (test-equal '(integer (base-10 "0")) (lex form)) + (test-equal 0 (run form))) + +(let ((form "1000000U")) + (test-equal '(integer (base-10 "1000000") (integer-suffix "U")) (lex form)) + (test-equal '(as-type (unsigned) 1000000) (run form))) + + +(let ((form "0x10c")) + (test-equal '(integer (base-16 "10c")) (lex form)) + (test-equal 268 (run form))) + +;; Lexer keeps original case, handled later by parser +(let ((form "0X10C")) + (test-equal '(integer (base-16 "10C")) (lex form)) + (test-equal 268 (run form))) + +(let ((form "a != b")) + (test-equal '(infix (variable "a") + (operator "!=") + (variable "b")) + (lex form)) + (test-equal '(not_eq a b) (run form))) + +(let ((form "((c) == (val) && (val) != _POSIX_VDISABLE)")) + ;; (test-equal '() (lex form)) + (test-equal '(and (== c val) + (not_eq val _POSIX_VDISABLE)) + (run form))) + +(let ((form "CTRL('O')")) + (test-equal '(funcall (variable "CTRL") (group (char "O"))) (lex form)) + (test-equal '(funcall CTRL 79) (run form))) + +(let ((form "CREPRINT")) + (test-equal '(variable "CREPRINT") (lex form)) + (test-equal 'CREPRINT (run form))) + +(let ((form "(CCTS_OFLOW | CRTS_IFLOW)")) + (test-equal '(group (infix (variable "CCTS_OFLOW") + (operator "|") + (variable "CRTS_IFLOW"))) + (lex form)) + (test-equal '(bitor CCTS_OFLOW CRTS_IFLOW) (run form))) + +;; ((x) >= 'a' && (x) <= 'z' +;; ? ((x) - 'a' + 1) +;; : (((x) - 'a' + 1) & 0x7f)) +(let ((form "((x) >= 'a' && (x) <= 'z' ? ((x) - 'a' + 1) : (((x) - 'a' + 1) & 0x7f))")) + ;; (test-equal '() (lex form)) + (test-equal '(ternary + (and (>= x #x61) + (<= x #x7A)) + (- x (+ #x61 1)) + (bitand (- x (+ #x61 1)) 127)) + (run form))) + +(let ((form "((x) & ~(IOCPARM_MASK << 16))")) + ;; (test-equal '() (lex form)) + (test-equal '(bitand x (compl (<< IOCPARM_MASK 16))) (run form))) + +(let ((form "(((x) >> 8) & 0xff)")) + ;; (test-equal '() (lex form)) + (test-equal '(bitand (>> x 8) 255) (run form))) + +(let ((form "(((x) >> 16) & IOCPARM_MASK)")) + ;; (test-equal '() (lex form)) + (test-equal '(bitand (>> x 16) IOCPARM_MASK) (run form))) + +(let ((form "((1 << IOCPARM_SHIFT) - 1)")) + ;; (test-equal '() (lex form)) + (test-equal '(- (<< 1 IOCPARM_SHIFT) 1) (run form))) + +(let ((form "_IO('t', 120)")) + (test-equal '(funcall + (variable "_IO") + (group (infix (char "t") + (operator ",") + (integer (base-10 "120"))))) + (lex form)) + (test-equal '(funcall _IO (#{,}# 116 120)) (run form))) + +;; note the lone type +(let ((form "_IOW('t', 98, int)")) + ;; (test-equal '() (lex form)) + (test-equal '(funcall _IOW (#{,}# 116 98 int)) + (run form))) + +;; note the multi-word type +(let ((form "_IOR('t', 19, struct termios)")) + ;; (test-equal '() (lex form)) + (test-equal '(funcall _IOR (#{,}# 116 19 (struct-type termios))) (run form))) + + +(test-group "Token concatenation" + (let ((form "x ## y")) + (test-equal '() (lex form)) + (test-equal '0 (run form)))) + +(test-group "Floating point numbers" + (let ((form "4.9406564584124654e-324")) + (test-equal '(float (base-10 "4") (base-10)) (lex form)) + (test-equal '0 (run form))) + + (let ((form "1.7976931348623157e+308")) + (test-equal '() (lex form)) + (test-equal '0 (run form)))) + +(test-group "Typecasts" + + (let ((form "(unsigned) 5")) + (test-equal '((group (variable "unsigned")) + (integer (base-10 "5"))) + (lex form)) + (test-equal '(as-type (unsigned) 5) + (run form))) + + (let ((form "(unsigned integer) 5")) + (test-equal '((group (variable "unsigned") + (variable "integer")) + (integer (base-10 "5"))) + (lex form)) + (test-equal '(as-type (unsigned integer) 5) (run form))) + + (test-group "Pointer with space" + (let ((form "(int *) 5")) + (test-equal '((group (postfix (variable "int") + (postfix-operator "*"))) + (integer (base-10 "5"))) + (lex form)) + (test-equal '(as-type (int *) 5) + (run form)))) + + (test-group "Pointer without space" + (let ((form "(int*) 5")) + (test-equal '((group (postfix (variable "int") + (postfix-operator "*"))) + (integer (base-10 "5"))) + (lex form)) + (test-equal '(as-type (int *) 5) + (run form)))) + + (test-group "Multi word type pointer" + (let ((form "(unsigned int*) 5")) + (test-equal '((group (variable "unsigned") + (postfix (variable "int") + (postfix-operator "*"))) + (integer (base-10 "5"))) + (lex form)) + (test-equal '(as-type (unsigned int *) 5) + (run form)))) + + (test-group "Double cast" + (let ((form "(int)(unsigned) 5")) + (test-equal '((group (variable "int")) + (group (variable "unsigned")) + (integer (base-10 "5"))) (lex form)) + (test-equal '(as-type (int) (as-type (unsigned) 5)) + (run form)))) + + (test-group "Cast with operation" + (let ((form "(int) 5 + 7")) + (test-equal '((group (variable "int")) + (infix (integer (base-10 "5")) + (operator "+") + (integer (base-10 "7")))) + (lex form)) + + (test-equal '(+ (as-type (int) 5) 7) + (run form)))) + + + + (test-group "Tripple cast, with value inside paranthesis" + (let ((form "(type)(__uintptr_t)(const void *)(var)")) + (test-equal '((group (variable "type")) + (group (variable "__uintptr_t")) + (group (variable "const") + (postfix (variable "void") + (postfix-operator "*"))) + (group (variable "var"))) + (lex form)) + (test-equal '(as-type (type) + (as-type (__uintptr_t) + (as-type (const void *) + var))) + (run form)))) + + (test-group "Same as above, but whole thing inside parenthesis" + (let ((form "((type)(__uintptr_t)(const void *)(var))")) + (test-equal '(group (group (variable "type")) + (group (variable "__uintptr_t")) + (group (variable "const") + (postfix (variable "void") + (postfix-operator "*"))) + (group (variable "var"))) + (lex form)) + (test-equal '(as-type (type) + (as-type (__uintptr_t) + (as-type (const void *) + var))) + (run form)))) + + (let ((form "((type)(__uintptr_t)(const volatile void *)(var))")) + ;; (test-equal '() (lex form)) + (test-equal '(as-type (type) + (as-type (__uintptr_t) + (as-type (const volatile void *) + var))) + (run form))) + + (let ((form "((unsigned long) ((inout) | (((len) & IOCPARM_MASK) << 16) | ((group) << 8) | (num)))")) + (test-equal '(group (group (variable "unsigned") (variable "long")) + (group (infix (group (variable "inout")) + (operator "|") + (infix (group (infix (group (infix (group (variable "len")) + (operator "&") + (variable "IOCPARM_MASK"))) + (operator "<<") + (integer (base-10 "16")))) + (operator "|") + (infix (group (infix (group (variable "group")) + (operator "<<") + (integer (base-10 "8")))) + (operator "|") + (group (variable "num"))))))) + (lex form)) + (test-equal '(as-type (unsigned long) + (bitor (<< (bitand len IOCPARM_MASK) 16) + (<< group 8) + num)) + (run form)))) -(test-equal - '(+ (post-increment (dereference C)) 3) - (run "*C++ + 3")) -(test-equal - '(post-increment (dereference C)) - (run "*C++")) +(test-group "Strings" + (test-group "Empty string" + (let ((form "\"\"")) + (test-equal '(string "") (lex form)) + (test-equal #vu8(0) (run form)))) -(test-equal - '(+ (post-increment C) (post-increment C)) - (run "C++ + C++")) + (test-group "Simple string" + (let ((form "\"li\"")) + (test-equal '(string "li") (lex form)) + (test-equal #vu8(#x6C #x69 0) (run form)))) -(test-equal - '(+ (pre-increment C) (pre-increment C)) - (run "++C + ++C")) + (test-group "Implicit concatenation of strings" + (let ((form "\"a\" \"b\"")) + (test-equal '((string "a") + (string "b")) + (lex form)) + (test-equal #vu8(#x61 #x62 0) + (run form)))) -(test-equal '(+ 2 (* 2 2)) (run "2 + 2 * 2")) + (test-group "Implicit concatenation of string and macro" + (let ((form "\"a\" MACRO")) + (test-equal '() (lex form)) + (test-equal '() (run form)))) -(test-equal '(+ (* 2 2) 2) (run "2 * 2 + 2")) + (test-group "String with escape" + (let ((form (string #\\ #\"))) + (test-equal `(string ,(string #\")) (lex form)) + (test-equal #vu8(34 0) (run form))))) -(test-equal '(+ 2 2 2) (run "2+2+2")) +(test-group "__asm__" + (let ((form "__asm__(\".globl \" __XSTRING(sym))")) + (test-equal '() (lex form)) + ;; TODO implicit string concatenation + (test-equal '(funcall __asm__ + (string ".globl ") + (funcall __XSTRING sym)) (run form)))) +(let ((form "__attribute__((__aligned__(x)))")) + (test-equal '(funcall (variable "__attribute__") + (group (group (funcall (variable "__aligned__") + (group (variable "x")))))) + (lex form)) + ;; This drops the extra set of parenthesis. Do we care? + (test-equal '(funcall __attribute__ + (funcall __aligned__ x)) + (run form))) +;; TODO concatenation rules +;; #define __CONCAT(x,y) __CONCAT1(x,y) +;; #define __CONCAT1(x,y) x ## y +;; #define __CONSTANT_CFSTRINGS__ 1 +;; #define __COPYRIGHT(s) __IDSTRING(__CONCAT(__copyright_,__LINE__),s) -- cgit v1.2.3