;;; Commentary: ;; Tests my parser for a subset of the C programming language. ;;; Code: (define-module (test cpp) :use-module (srfi srfi-1) :use-module (srfi srfi-64) :use-module (srfi srfi-88) :use-module ((c lex) :select (lex)) :use-module ((c parse) :select (parse-lexeme-tree)) :use-module ((c eval) :select (c-eval)) :use-module ((c eval environment) :select (make-environment env-set!)) :use-module ((rnrs arithmetic bitwise) :select (bitwise-xor))) ;; Note that the lexer's output isn't stable. ;; The tests here are more to see where the lexer succeeds but the parser fails. ;; So changing the lexer test cases isn't a problem ;; but don't change the parser test cases ;; __asm__ always has strings as arguments (test-skip "__asm__") ;; Lexer produces garbage when attempted. Fixing this would also fix cast ;; operations. (test-skip "Float in infix expression") ;; order of operation is wrong, leading to an incorrect result (test-skip "Cast with operation") ;; not implemented (test-skip "Token concatenation") ;; A string follewed by a macro (which expands to a string) ;; should be concatenated. This is however not yet implemented (test-skip "Implicit concatenation of string and macro") (define run (compose parse-lexeme-tree lex)) (define (alist->environment alist) (fold (lambda (pair env) (apply env-set! env pair)) (make-environment) alist)) (define (exec form . base-bindings) (call-with-values (lambda () (c-eval (alist->environment base-bindings) (run form))) (lambda (env value) value))) (define-syntax let-group (syntax-rules () ((let ((form name) rest ...) body ...) (test-group name (let ((form name) rest ...) body ...))))) (let-group ((form "(*C)++ + 3")) (test-equal '(infix (postfix (group (prefix (prefix-operator "*") (variable "C"))) (postfix-operator "++")) (operator "+") (integer (base-10 "3"))) (lex form)) (test-equal '(+ (post-increment (dereference C)) 3) (run form))) (let-group ((form "*C++ + 3")) (test-equal '(infix (postfix (prefix (prefix-operator "*") (variable "C")) (postfix-operator "++")) (operator "+") (integer (base-10 "3"))) (lex form)) (test-equal '(+ (post-increment (dereference C)) 3) (run form))) (let-group ((form "*C++")) (test-equal '(postfix (prefix (prefix-operator "*") (variable "C")) (postfix-operator "++")) (lex form)) (test-equal '(post-increment (dereference C)) (run form))) (let-group ((form "C++ + C++")) (test-equal '(infix (postfix (variable "C") (postfix-operator "++")) (operator "+") (postfix (variable "C") (postfix-operator "++"))) (lex form)) (test-equal '(+ (post-increment C) (post-increment C)) (run form))) (let-group ((form "++C + ++C")) (test-equal '(infix (prefix (prefix-operator "++") (variable "C")) (operator "+") (prefix (prefix-operator "++") (variable "C"))) (lex form)) (test-equal '(+ (pre-increment C) (pre-increment C)) (run form))) (let-group ((form "2 + 2 * 2")) (test-equal '(infix (integer (base-10 "2")) (operator "+") (infix (integer (base-10 "2")) (operator "*") (integer (base-10 "2")))) (lex form)) (test-equal '(+ 2 (* 2 2)) (run form)) (test-equal 6 (exec form))) (let-group ((form "2 * 2 + 2")) (test-equal '(infix (integer (base-10 "2")) (operator "*") (infix (integer (base-10 "2")) (operator "+") (integer (base-10 "2")))) (lex form)) (test-equal '(+ (* 2 2) 2) (run form)) (test-equal 6 (exec form))) (let-group ((form "2+2+2")) (test-equal '(infix (integer (base-10 "2")) (operator "+") (infix (integer (base-10 "2")) (operator "+") (integer (base-10 "2")))) (lex form)) (test-equal '(+ 2 2 2) (run form)) (test-equal 6 (exec form))) (test-group "Unary minus" (test-group "Without space" (let ((form "-1")) (test-equal '(prefix (prefix-operator "-") (integer (base-10 "1"))) (lex form)) (test-equal '(- 1) (run form)) (test-equal -1 (exec form)))) (test-group "With space" (let ((form "- 1")) (test-equal '(prefix (prefix-operator "-") (integer (base-10 "1"))) (lex form)) (test-equal '(- 1) (run form)) (test-equal -1 (exec form)))) (test-group "Before variable" (let ((form "-x")) (test-equal '(prefix (prefix-operator "-") (variable "x")) (lex form)) (test-equal '(- x) (run form)) (test-equal -5 (exec form '(x 5))))) (test-group "Before infix" (let ((form "-x+3")) (test-equal '(infix (prefix (prefix-operator "-") (variable "x")) (operator "+") (integer (base-10 "3"))) (lex form)) (test-equal '(+ (- x) 3) (run form)) (test-equal -2 (exec form '(x 5))))) (test-group "Inside infix expression" (let ((form "x+-3")) (test-equal '(infix (variable "x") (operator "+") (prefix (prefix-operator "-") (integer (base-10 "3")))) (lex form)) (test-equal '(+ x (- 3)) (run form)) (test-equal 2 (exec form '(x 5))))) ) ;; Hand picked forms from output of `cpp -dM /usr/include/termios.h` on ;; FreeBSD 13.1-RELEASE releng/13.1-n250148-fc952ac2212 GENERIC amd64 ;; 2022-06-28 (let ((form "00000200")) (test-equal '(integer (base-8 "0000200")) (lex form)) (test-equal 128 (run form))) (let ((form "0")) (test-equal '(integer (base-10 "0")) (lex form)) (test-equal 0 (run form))) (let ((form "1000000U")) (test-equal '(integer (base-10 "1000000") (integer-suffix "U")) (lex form)) (test-equal '(as-type (unsigned) 1000000) (run form)) (test-equal 1000000 (exec form))) (let ((form "0x10c")) (test-equal '(integer (base-16 "10c")) (lex form)) (test-equal 268 (run form))) ;; Lexer keeps original case, handled later by parser (let ((form "0X10C")) (test-equal '(integer (base-16 "10C")) (lex form)) (test-equal 268 (run form))) (let ((form "a != b")) (test-equal '(infix (variable "a") (operator "!=") (variable "b")) (lex form)) (test-equal '(not_eq a b) (run form)) (test-equal 1 (exec form '(a 1) '(b 2))) (test-equal 0 (exec form '(a 1) '(b 1))) ) (let ((form "((c) == (val) && (val) != _POSIX_VDISABLE)")) ;; (test-equal '() (lex form)) (test-equal '(and (== c val) (not_eq val _POSIX_VDISABLE)) (run form)) (test-equal 0 (exec form '(c 1) '(val 2) '(_POSIX_VDISABLE 3))) ) (let ((form "CTRL('O')")) (test-equal '(funcall (variable "CTRL") (group (char "O"))) (lex form)) (test-equal '(funcall CTRL 79) (run form)) (test-equal (bitwise-xor #x40 (char->integer #\O)) (exec form ;; Definition copied from our parsers output of ;; preprocessing output as defined above '(CTRL (lambda (x) (ternary (and (>= x 97) (<= x 122)) (+ (- x 97) 1) (bitand (+ (- x 65) 1) 127))))))) (let ((form "CREPRINT")) (test-equal '(variable "CREPRINT") (lex form)) (test-equal 'CREPRINT (run form))) (let ((form "(CCTS_OFLOW | CRTS_IFLOW)")) (test-equal '(group (infix (variable "CCTS_OFLOW") (operator "|") (variable "CRTS_IFLOW"))) (lex form)) (test-equal '(bitor CCTS_OFLOW CRTS_IFLOW) (run form))) ;; ((x) >= 'a' && (x) <= 'z' ;; ? ((x) - 'a' + 1) ;; : (((x) - 'a' + 1) & 0x7f)) (let ((form "((x) >= 'a' && (x) <= 'z' ? ((x) - 'a' + 1) : (((x) - 'a' + 1) & 0x7f))")) ;; (test-equal '() (lex form)) (test-equal '(ternary (and (>= x #x61) (<= x #x7A)) (+ (- x #x61) 1) (bitand (+ (- x #x61) 1) 127)) (run form))) (let ((form "((x) & ~(IOCPARM_MASK << 16))")) ;; (test-equal '() (lex form)) (test-equal '(bitand x (compl (<< IOCPARM_MASK 16))) (run form))) (let ((form "(((x) >> 8) & 0xff)")) ;; (test-equal '() (lex form)) (test-equal '(bitand (>> x 8) 255) (run form))) (let ((form "(((x) >> 16) & IOCPARM_MASK)")) ;; (test-equal '() (lex form)) (test-equal '(bitand (>> x 16) IOCPARM_MASK) (run form))) (let ((form "((1 << IOCPARM_SHIFT) - 1)")) ;; (test-equal '() (lex form)) (test-equal '(- (<< 1 IOCPARM_SHIFT) 1) (run form))) (let ((form "_IO('t', 120)")) (test-equal '(funcall (variable "_IO") (group (infix (char "t") (operator ",") (integer (base-10 "120"))))) (lex form)) (test-equal '(funcall _IO (#{,}# 116 120)) (run form))) ;; note the lone type (let ((form "_IOW('t', 98, int)")) ;; (test-equal '() (lex form)) (test-equal '(funcall _IOW (#{,}# 116 98 int)) (run form))) ;; note the multi-word type (let ((form "_IOR('t', 19, struct termios)")) ;; (test-equal '() (lex form)) (test-equal '(funcall _IOR (#{,}# 116 19 (struct-type termios))) (run form))) ;; TODO concatenation rules ;; #define __CONCAT(x,y) __CONCAT1(x,y) ;; #define __CONCAT1(x,y) x ## y ;; #define __CONSTANT_CFSTRINGS__ 1 ;; #define __COPYRIGHT(s) __IDSTRING(__CONCAT(__copyright_,__LINE__),s) (test-group "Token concatenation" (let ((form "x ## y")) (test-equal '() (lex form)) (test-equal '0 (run form)))) (test-group "Floating point numbers" (test-group "Diffent forms" (test-group "No decimal point, exponent, no suffix" (let ((form "10e10")) (test-equal '(float (float-integer (base-10 "10")) (exponent (base-10 "10"))) (lex form)) (test-equal 10e10 (run form)))) (test-group "No decimal point, negative exponent" (let ((form "10e-10")) (test-equal '(float (float-integer (base-10 "10")) (exponent "-" (base-10 "10"))) (lex form)) (test-equal 10e-10 (run form)))) (test-group "No decimal point, exponent and suffix" (let ((form "10e10L")) (test-equal '(float (float-integer (base-10 "10")) (exponent (base-10 "10")) (float-suffix "L")) (lex form)) (test-equal '(as-type (long double) 10e10) (run form)))) (test-group "Leading period, no exponent or suffix" (let ((form ".1")) (test-equal '(float (float-decimal (base-10 "1"))) (lex form)) (test-equal 0.1 (run form)))) (test-group "Trailing period, no exponent or suffix" (let ((form "1.")) (test-equal '(float (float-integer (base-10 "1"))) (lex form)) (test-equal 1.0 (run form))))) (test-group "Negative float" (let ((form "-1.0")) (test-equal '(prefix (prefix-operator "-") (float (float-integer (base-10 "1")) (float-decimal (base-10 "0")))) (lex form)) (test-equal '(- 1.0) (run form)))) (test-group "Real world examples" (let ((form "4.9406564584124654e-324")) (test-equal '(float (float-integer (base-10 "4")) (float-decimal (base-10 "9406564584124654")) (exponent "-" (base-10 "324"))) (lex form)) (test-equal 4.9406564584124654e-324 (run form))) (let ((form "1.7976931348623157e+308")) (test-equal '(float (float-integer (base-10 "1")) (float-decimal (base-10 "7976931348623157")) (exponent "+" (base-10 "308"))) (lex form)) (test-equal 1.7976931348623157e+308 (run form)))) (test-group "Float in infix expression" (test-group "Simple case" (let ((form "1. + .1")) (test-equal '(infix (float (float-integer (base-10 "1"))) (operator "+") (float (float-decimal (base-10 "1")))) (lex form)) (test-equal '(+ 1.0 0.1) (run form)))) ;; (test-group "Complicated case") )) (test-group "Typecasts" (let ((form "(unsigned) 5")) (test-equal '((group (variable "unsigned")) (integer (base-10 "5"))) (lex form)) (test-equal '(as-type (unsigned) 5) (run form))) (let ((form "(unsigned integer) 5")) (test-equal '((group (variable "unsigned") (variable "integer")) (integer (base-10 "5"))) (lex form)) (test-equal '(as-type (unsigned integer) 5) (run form))) (test-group "Pointer with space" (let ((form "(int *) 5")) (test-equal '((group (postfix (variable "int") (postfix-operator "*"))) (integer (base-10 "5"))) (lex form)) (test-equal '(as-type (int *) 5) (run form)))) (test-group "Pointer without space" (let ((form "(int*) 5")) (test-equal '((group (postfix (variable "int") (postfix-operator "*"))) (integer (base-10 "5"))) (lex form)) (test-equal '(as-type (int *) 5) (run form)))) (test-group "Multi word type pointer" (let ((form "(unsigned int*) 5")) (test-equal '((group (variable "unsigned") (postfix (variable "int") (postfix-operator "*"))) (integer (base-10 "5"))) (lex form)) (test-equal '(as-type (unsigned int *) 5) (run form)))) (test-group "Double cast" (let ((form "(int)(unsigned) 5")) (test-equal '((group (variable "int")) (group (variable "unsigned")) (integer (base-10 "5"))) (lex form)) (test-equal '(as-type (int) (as-type (unsigned) 5)) (run form)))) (test-group "Cast with operation" (let ((form "(int) 5 + 7")) (test-equal '((group (variable "int")) (infix (integer (base-10 "5")) (operator "+") (integer (base-10 "7")))) (lex form)) (test-equal '(+ (as-type (int) 5) 7) (run form)))) (test-group "Tripple cast, with value inside paranthesis" (let ((form "(type)(__uintptr_t)(const void *)(var)")) (test-equal '((group (variable "type")) (group (variable "__uintptr_t")) (group (variable "const") (postfix (variable "void") (postfix-operator "*"))) (group (variable "var"))) (lex form)) (test-equal '(as-type (type) (as-type (__uintptr_t) (as-type (const void *) var))) (run form)))) (test-group "Same as above, but whole thing inside parenthesis" (let ((form "((type)(__uintptr_t)(const void *)(var))")) (test-equal '(group (group (variable "type")) (group (variable "__uintptr_t")) (group (variable "const") (postfix (variable "void") (postfix-operator "*"))) (group (variable "var"))) (lex form)) (test-equal '(as-type (type) (as-type (__uintptr_t) (as-type (const void *) var))) (run form)))) (let ((form "((type)(__uintptr_t)(const volatile void *)(var))")) ;; (test-equal '() (lex form)) (test-equal '(as-type (type) (as-type (__uintptr_t) (as-type (const volatile void *) var))) (run form))) (let ((form "((unsigned long) ((inout) | (((len) & IOCPARM_MASK) << 16) | ((group) << 8) | (num)))")) (test-equal '(group (group (variable "unsigned") (variable "long")) (group (infix (group (variable "inout")) (operator "|") (infix (group (infix (group (infix (group (variable "len")) (operator "&") (variable "IOCPARM_MASK"))) (operator "<<") (integer (base-10 "16")))) (operator "|") (infix (group (infix (group (variable "group")) (operator "<<") (integer (base-10 "8")))) (operator "|") (group (variable "num"))))))) (lex form)) (test-equal '(as-type (unsigned long) (bitor inout (<< (bitand len IOCPARM_MASK) 16) (<< group 8) num)) (run form)))) (test-group "Characters" (let ((form "'c'")) (test-equal '(char "c") (lex form)) (test-equal #x63 (run form))) (let ((form "'\\n'")) (test-equal '(char (escaped-char "n")) (lex form)) (test-equal (char->integer #\newline) (run form)))) (test-group "Strings" (test-group "Empty string" (let ((form "\"\"")) (test-equal 'string (lex form)) (test-equal #vu8(0) (run form)))) (test-group "Simple string" (let ((form "\"li\"")) (test-equal '(string "li") (lex form)) (test-equal #vu8(#x6C #x69 0) (run form)))) (test-group "Implicit concatenation of strings" (let ((form "\"a\" \"b\"")) (test-equal '((string "a") (string "b")) (lex form)) (test-equal #vu8(#x61 #x62 0) (run form)))) (test-group "Implicit concatenation of string and macro" (let ((form "\"a\" MACRO")) (test-equal '((string "a") (variable "MACRO")) (lex form)) (test-equal '() (run form)))) (test-group "String with only escape" (let ((form (string #\" #\\ #\" #\"))) (test-equal `(string (escaped-char "\"")) (lex form)) (test-equal #vu8(34 0) (run form)))) (test-group "String with escape at start" (let ((form (string #\" #\\ #\" #\a #\"))) (test-equal `(string (escaped-char "\"") "a") (lex form)) (test-equal #vu8(34 #x61 0) (run form)))) (test-group "String with escape at end" (let ((form (string #\" #\a #\\ #\" #\"))) (test-equal `(string "a" (escaped-char "\"")) (lex form)) (test-equal #vu8(#x61 34 0) (run form)))) (test-group "String with escape in middle" (let ((form (string #\" #\a #\\ #\" #\b #\"))) (test-equal `(string "a" (escaped-char "\"") "b") (lex form)) (test-equal #vu8(#x61 34 #x62 0) (run form)))) ;; \e is semi non-standard (test-group "String with bakslash-e esacpe" (let ((form "\"\\e\"")) (test-equal '(string (escaped-char "e")) (lex form)) (test-equal #vu8(#x1b 0) (run form)))) (test-group "String with byte secquence escape" (let ((form "\"\\xf0\\x9f\\x92\\xa9\"")) (test-equal '(string (escaped-char (base-16-char "f0")) (escaped-char (base-16-char "9f")) (escaped-char (base-16-char "92")) (escaped-char (base-16-char "a9"))) (lex form)) (test-equal #vu8(#xf0 #x9f #x92 #xa9 0) (run form))))) (test-group "__asm__" (let ((form "__asm__(\".globl \" __XSTRING(sym))")) (test-equal '() (lex form)) ;; TODO implicit string concatenation (test-equal '(funcall __asm__ (string ".globl ") (funcall __XSTRING sym)) (run form)))) (let ((form "__attribute__((__aligned__(x)))")) (test-equal '(funcall (variable "__attribute__") (group (group (funcall (variable "__aligned__") (group (variable "x")))))) (lex form)) ;; This drops the extra set of parenthesis. Do we care? (test-equal '(funcall __attribute__ (funcall __aligned__ x)) (run form)))