;;; Commentary: ;; Tests my parser for a subset of the C programming language. ;;; Code: (define-module (test cpp) :use-module (srfi srfi-64) :use-module (srfi srfi-88) :use-module ((c lex) :select (lex)) :use-module ((c parse) :select (parse-lexeme-tree))) ;; Note that the lexer's output isn't stable. ;; The tests here are more to see where the lexer succeeds but the parser fails. ;; So changing the lexer test cases isn't a problem ;; but don't change the parser test cases (define run (compose parse-lexeme-tree lex)) (define-syntax let-group (syntax-rules () ((let ((form name) rest ...) body ...) (test-group name (let ((form name) rest ...) body ...))))) (let-group ((form "(*C)++ + 3")) (test-equal '(infix (postfix (group (prefix (prefix-operator "*") (variable "C"))) (postfix-operator "++")) (operator "+") (integer (base-10 "3"))) (lex form)) (test-equal '(+ (post-increment (dereference C)) 3) (run form))) (let-group ((form "*C++ + 3")) (test-equal '(infix (postfix (prefix (prefix-operator "*") (variable "C")) (postfix-operator "++")) (operator "+") (integer (base-10 "3"))) (lex form)) (test-equal '(+ (post-increment (dereference C)) 3) (run form))) (let-group ((form "*C++")) (test-equal '(postfix (prefix (prefix-operator "*") (variable "C")) (postfix-operator "++")) (lex form)) (test-equal '(post-increment (dereference C)) (run form))) (let-group ((form "C++ + C++")) (test-equal '(infix (postfix (variable "C") (postfix-operator "++")) (operator "+") (postfix (variable "C") (postfix-operator "++"))) (lex form)) (test-equal '(+ (post-increment C) (post-increment C)) (run form))) (let-group ((form "++C + ++C")) (test-equal '(infix (prefix (prefix-operator "++") (variable "C")) (operator "+") (prefix (prefix-operator "++") (variable "C"))) (lex form)) (test-equal '(+ (pre-increment C) (pre-increment C)) (run form))) (let-group ((form "2 + 2 * 2")) (test-equal '(infix (integer (base-10 "2")) (operator "+") (infix (integer (base-10 "2")) (operator "*") (integer (base-10 "2")))) (lex form)) (test-equal '(+ 2 (* 2 2)) (run form))) (let-group ((form "2 * 2 + 2")) (test-equal '(infix (integer (base-10 "2")) (operator "*") (infix (integer (base-10 "2")) (operator "+") (integer (base-10 "2")))) (lex form)) (test-equal '(+ (* 2 2) 2) (run form))) (let-group ((form "2+2+2")) (test-equal '(infix (integer (base-10 "2")) (operator "+") (infix (integer (base-10 "2")) (operator "+") (integer (base-10 "2")))) (lex form)) (test-equal '(+ 2 2 2) (run form))) (test-skip "Strings") (test-skip "__asm__") (test-skip "Token concatenation") (test-skip "Floating point numbers") ;; Hand picked forms from output of `cpp -dM /usr/include/termios.h` on ;; FreeBSD 13.1-RELEASE releng/13.1-n250148-fc952ac2212 GENERIC amd64 ;; 2022-06-28 (let ((form "00000200")) (test-equal '(integer (base-8 "0000200")) (lex form)) (test-equal 128 (run form))) (let ((form "0")) (test-equal '(integer (base-10 "0")) (lex form)) (test-equal 0 (run form))) (let ((form "1000000U")) (test-equal '(integer (base-10 "1000000") (integer-suffix "U")) (lex form)) (test-equal '(as-type (unsigned) 1000000) (run form))) (let ((form "0x10c")) (test-equal '(integer (base-16 "10c")) (lex form)) (test-equal 268 (run form))) ;; Lexer keeps original case, handled later by parser (let ((form "0X10C")) (test-equal '(integer (base-16 "10C")) (lex form)) (test-equal 268 (run form))) (let ((form "a != b")) (test-equal '(infix (variable "a") (operator "!=") (variable "b")) (lex form)) (test-equal '(not_eq a b) (run form))) (let ((form "((c) == (val) && (val) != _POSIX_VDISABLE)")) ;; (test-equal '() (lex form)) (test-equal '(and (== c val) (not_eq val _POSIX_VDISABLE)) (run form))) (let ((form "CTRL('O')")) (test-equal '(funcall (variable "CTRL") (group (char "O"))) (lex form)) (test-equal '(funcall CTRL 79) (run form))) (let ((form "CREPRINT")) (test-equal '(variable "CREPRINT") (lex form)) (test-equal 'CREPRINT (run form))) (let ((form "(CCTS_OFLOW | CRTS_IFLOW)")) (test-equal '(group (infix (variable "CCTS_OFLOW") (operator "|") (variable "CRTS_IFLOW"))) (lex form)) (test-equal '(bitor CCTS_OFLOW CRTS_IFLOW) (run form))) ;; ((x) >= 'a' && (x) <= 'z' ;; ? ((x) - 'a' + 1) ;; : (((x) - 'a' + 1) & 0x7f)) (let ((form "((x) >= 'a' && (x) <= 'z' ? ((x) - 'a' + 1) : (((x) - 'a' + 1) & 0x7f))")) ;; (test-equal '() (lex form)) (test-equal '(ternary (and (>= x #x61) (<= x #x7A)) (- x (+ #x61 1)) (bitand (- x (+ #x61 1)) 127)) (run form))) (let ((form "((x) & ~(IOCPARM_MASK << 16))")) ;; (test-equal '() (lex form)) (test-equal '(bitand x (compl (<< IOCPARM_MASK 16))) (run form))) (let ((form "(((x) >> 8) & 0xff)")) ;; (test-equal '() (lex form)) (test-equal '(bitand (>> x 8) 255) (run form))) (let ((form "(((x) >> 16) & IOCPARM_MASK)")) ;; (test-equal '() (lex form)) (test-equal '(bitand (>> x 16) IOCPARM_MASK) (run form))) (let ((form "((1 << IOCPARM_SHIFT) - 1)")) ;; (test-equal '() (lex form)) (test-equal '(- (<< 1 IOCPARM_SHIFT) 1) (run form))) (let ((form "_IO('t', 120)")) (test-equal '(funcall (variable "_IO") (group (infix (char "t") (operator ",") (integer (base-10 "120"))))) (lex form)) (test-equal '(funcall _IO (#{,}# 116 120)) (run form))) ;; note the lone type (let ((form "_IOW('t', 98, int)")) ;; (test-equal '() (lex form)) (test-equal '(funcall _IOW (#{,}# 116 98 int)) (run form))) ;; note the multi-word type (let ((form "_IOR('t', 19, struct termios)")) ;; (test-equal '() (lex form)) (test-equal '(funcall _IOR (#{,}# 116 19 (struct-type termios))) (run form))) (test-group "Token concatenation" (let ((form "x ## y")) (test-equal '() (lex form)) (test-equal '0 (run form)))) (test-group "Floating point numbers" (let ((form "4.9406564584124654e-324")) (test-equal '(float (base-10 "4") (base-10)) (lex form)) (test-equal '0 (run form))) (let ((form "1.7976931348623157e+308")) (test-equal '() (lex form)) (test-equal '0 (run form)))) (test-group "Typecasts" (let ((form "(unsigned) 5")) (test-equal '((group (variable "unsigned")) (integer (base-10 "5"))) (lex form)) (test-equal '(as-type (unsigned) 5) (run form))) (let ((form "(unsigned integer) 5")) (test-equal '((group (variable "unsigned") (variable "integer")) (integer (base-10 "5"))) (lex form)) (test-equal '(as-type (unsigned integer) 5) (run form))) (test-group "Pointer with space" (let ((form "(int *) 5")) (test-equal '((group (postfix (variable "int") (postfix-operator "*"))) (integer (base-10 "5"))) (lex form)) (test-equal '(as-type (int *) 5) (run form)))) (test-group "Pointer without space" (let ((form "(int*) 5")) (test-equal '((group (postfix (variable "int") (postfix-operator "*"))) (integer (base-10 "5"))) (lex form)) (test-equal '(as-type (int *) 5) (run form)))) (test-group "Multi word type pointer" (let ((form "(unsigned int*) 5")) (test-equal '((group (variable "unsigned") (postfix (variable "int") (postfix-operator "*"))) (integer (base-10 "5"))) (lex form)) (test-equal '(as-type (unsigned int *) 5) (run form)))) (test-group "Double cast" (let ((form "(int)(unsigned) 5")) (test-equal '((group (variable "int")) (group (variable "unsigned")) (integer (base-10 "5"))) (lex form)) (test-equal '(as-type (int) (as-type (unsigned) 5)) (run form)))) (test-group "Cast with operation" (let ((form "(int) 5 + 7")) (test-equal '((group (variable "int")) (infix (integer (base-10 "5")) (operator "+") (integer (base-10 "7")))) (lex form)) (test-equal '(+ (as-type (int) 5) 7) (run form)))) (test-group "Tripple cast, with value inside paranthesis" (let ((form "(type)(__uintptr_t)(const void *)(var)")) (test-equal '((group (variable "type")) (group (variable "__uintptr_t")) (group (variable "const") (postfix (variable "void") (postfix-operator "*"))) (group (variable "var"))) (lex form)) (test-equal '(as-type (type) (as-type (__uintptr_t) (as-type (const void *) var))) (run form)))) (test-group "Same as above, but whole thing inside parenthesis" (let ((form "((type)(__uintptr_t)(const void *)(var))")) (test-equal '(group (group (variable "type")) (group (variable "__uintptr_t")) (group (variable "const") (postfix (variable "void") (postfix-operator "*"))) (group (variable "var"))) (lex form)) (test-equal '(as-type (type) (as-type (__uintptr_t) (as-type (const void *) var))) (run form)))) (let ((form "((type)(__uintptr_t)(const volatile void *)(var))")) ;; (test-equal '() (lex form)) (test-equal '(as-type (type) (as-type (__uintptr_t) (as-type (const volatile void *) var))) (run form))) (let ((form "((unsigned long) ((inout) | (((len) & IOCPARM_MASK) << 16) | ((group) << 8) | (num)))")) (test-equal '(group (group (variable "unsigned") (variable "long")) (group (infix (group (variable "inout")) (operator "|") (infix (group (infix (group (infix (group (variable "len")) (operator "&") (variable "IOCPARM_MASK"))) (operator "<<") (integer (base-10 "16")))) (operator "|") (infix (group (infix (group (variable "group")) (operator "<<") (integer (base-10 "8")))) (operator "|") (group (variable "num"))))))) (lex form)) (test-equal '(as-type (unsigned long) (bitor (<< (bitand len IOCPARM_MASK) 16) (<< group 8) num)) (run form)))) (test-group "Strings" (test-group "Empty string" (let ((form "\"\"")) (test-equal '(string "") (lex form)) (test-equal #vu8(0) (run form)))) (test-group "Simple string" (let ((form "\"li\"")) (test-equal '(string "li") (lex form)) (test-equal #vu8(#x6C #x69 0) (run form)))) (test-group "Implicit concatenation of strings" (let ((form "\"a\" \"b\"")) (test-equal '((string "a") (string "b")) (lex form)) (test-equal #vu8(#x61 #x62 0) (run form)))) (test-group "Implicit concatenation of string and macro" (let ((form "\"a\" MACRO")) (test-equal '() (lex form)) (test-equal '() (run form)))) (test-group "String with escape" (let ((form (string #\\ #\"))) (test-equal `(string ,(string #\")) (lex form)) (test-equal #vu8(34 0) (run form))))) (test-group "__asm__" (let ((form "__asm__(\".globl \" __XSTRING(sym))")) (test-equal '() (lex form)) ;; TODO implicit string concatenation (test-equal '(funcall __asm__ (string ".globl ") (funcall __XSTRING sym)) (run form)))) (let ((form "__attribute__((__aligned__(x)))")) (test-equal '(funcall (variable "__attribute__") (group (group (funcall (variable "__aligned__") (group (variable "x")))))) (lex form)) ;; This drops the extra set of parenthesis. Do we care? (test-equal '(funcall __attribute__ (funcall __aligned__ x)) (run form))) ;; TODO concatenation rules ;; #define __CONCAT(x,y) __CONCAT1(x,y) ;; #define __CONCAT1(x,y) x ## y ;; #define __CONSTANT_CFSTRINGS__ 1 ;; #define __COPYRIGHT(s) __IDSTRING(__CONCAT(__copyright_,__LINE__),s)