From 527dbb981e87ca70d3f425b965b08d3b3420198a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Hugo=20H=C3=B6rnquist?= Date: Mon, 15 Jun 2020 01:05:31 +0200 Subject: Work on parser. --- module/vcomponent/duration.scm | 13 +- module/vcomponent/parse/component.scm | 235 +++++++++++++++++++++------------ module/vcomponent/parse/types.scm | 11 +- module/vcomponent/recurrence/parse.scm | 7 +- tests/recurring.scm | 2 +- 5 files changed, 176 insertions(+), 92 deletions(-) diff --git a/module/vcomponent/duration.scm b/module/vcomponent/duration.scm index 049c8821..42bb4ca4 100644 --- a/module/vcomponent/duration.scm +++ b/module/vcomponent/duration.scm @@ -27,8 +27,8 @@ (define-peg-pattern time-pattern body (and (ignore "T") - (and (capture (and number "H")) - (? (and (capture (and number "M")) + (and (? (capture (and number "H"))) + (? (and (? (capture (and number "M"))) (? (capture (and number "S")))))))) (define-peg-pattern dur-pattern body @@ -42,7 +42,7 @@ (define (parse-duration str) (let ((m (match-pattern dur-pattern str))) (unless m - (error "~a doesn't appar to be a duration" str)) + (throw 'parse-error "~a doesn't appar to be a duration" str)) (unless (= (peg:end m) (string-length str)) (warning "Garbage at end of duration")) @@ -63,13 +63,16 @@ [(S) `(second: ,n)] [else (error "Invalid key")]))] [#\T '()]) - (cadr (member "P" tree)))))) + (cdr (member "P" tree)))))) (apply duration (cons* sign: sign (let loop ((rem lst)) (if (null? rem) '() - (if (eqv? hour: (car rem)) + ;; NOTE a potentially prettier way would be + ;; to capture the T above, and use that as + ;; the delimiter for the time. + (if (memv (car rem) '(hour: minute: second:)) (list time: (apply time rem)) (cons* (car rem) (cadr rem) (loop (cddr rem))))))))))) diff --git a/module/vcomponent/parse/component.scm b/module/vcomponent/parse/component.scm index 9ab9c004..78482b61 100644 --- a/module/vcomponent/parse/component.scm +++ b/module/vcomponent/parse/component.scm @@ -5,6 +5,7 @@ :use-module (vcomponent base) :use-module (datetime) :use-module (srfi srfi-1) + :use-module (srfi srfi-9 gnu) :use-module (srfi srfi-26) :use-module (vcomponent parse types) ) @@ -12,37 +13,59 @@ (define-public (parse-calendar port) (parse (map tokenize (read-file port)))) +(define-immutable-record-type + (make-line string file line) + line? + (string get-string) + (file get-file) + (line get-line)) + ;; port → (list string) (define (read-file port) - (let loop ((done '())) - (let ((line (read-line port))) - (if (eof-object? line) - (reverse! done) - (let ((line (string-trim-right line))) - (loop - (if (char=? #\space (string-ref line 0)) - ;; Line Wrapping - ;; TODO if the line is split inside a unicode character - ;; then this produces multiple broken unicode characters. - ;; It could be solved by checking the start of the new line, - ;; and the tail of the old line for broken char - (cons (string-append (car done) - (string-drop line 1)) - (cdr done)) - (cons line done)))))))) - -;; (list string) → (list (key kv ... value)) -(define (tokenize line) + (define fname (port-filename port)) + (let loop ((line-number 1) (done '())) + (let ((line (read-line port))) + (if (eof-object? line) + (reverse! done) + (let ((line (string-trim-right line))) + (loop + (1+ line-number) + (if (char=? #\space (string-ref line 0)) + ;; Line Wrapping + ;; TODO if the line is split inside a unicode character + ;; then this produces multiple broken unicode characters. + ;; It could be solved by checking the start of the new line, + ;; and the tail of the old line for broken char + (cons (make-line (string-append (get-string (car done)) + (string-drop line 1)) + fname + (get-line (car done))) + (cdr done)) + (cons (make-line line fname line-number) + done)))))))) + +(define-immutable-record-type + (make-tokens metadata data) + tokens? + (metadata get-metadata) ; + (data get-data) ; (key kv ... value) + ) + +;; (list ) → (list (key kv ... value)) +(define (tokenize line-obj) + (define line (get-string line-obj)) (define colon-idx (string-index line #\:)) (define semi-idxs (let loop ((idx 0)) (aif (string-index line #\; idx colon-idx) (cons it (loop (1+ it))) (list colon-idx (string-length line))))) - (map (lambda (start end) - (substring line (1+ start) end)) - (cons -1 semi-idxs) - semi-idxs)) + (make-tokens + line-obj + (map (lambda (start end) + (substring line (1+ start) end)) + (cons -1 semi-idxs) + semi-idxs))) (define (x-property? symb) (string=? "X-" (string-take (symbol->string symb) 2))) @@ -68,25 +91,29 @@ (let ((vv (parser params value))) (when (list? vv) (error "")) - (let ((v (symbol->string vv))) + (let ((v (string->symbol vv))) (unless (memv v enum) - (warning "")) + (warning "~a ∉ { ~{~a~^, ~} }" + v enum)) v))))) ;; params could be made optional, with an empty hashtable as default (define (build-vline key value params) (let ((parser (cond - [(hashq-ref params 'TYPE) => get-parser] + [(and=> (hashq-ref params 'VALUE) string->symbol) => get-parser] [(memv key '(COMPLETED DTEND DUE DTSTART RECURRENCE-ID RDATE - CREATED DTSTAMP LAST-MODIFIED)) + CREATED DTSTAMP LAST-MODIFIED + ;; only on VALARM + ACKNOWLEDGED + )) (get-parser 'DATE-TIME)] [(memv key '(EXDATE)) (list-parser 'DATE-TIME)] - [(memv key '(DURATION)) + [(memv key '(TRIGGER DURATION)) (get-parser 'DURATION)] [(memv key '(FREEBUSY)) @@ -98,7 +125,7 @@ (lambda (params value) (let ((v ((get-parser 'TEXT) params value))) (when (list? v) - (warning "List in non-list field")) + (warning "List in non-list field: ~s" v)) v))] ;; TEXT, but allow a list @@ -108,7 +135,7 @@ [(memv key '(VERSION)) (lambda (params value) (let ((v ((get-parser 'TEXT) params value))) - (unless (string=? "2.0" v) + (unless (and (string? v) (string=? "2.0" v)) (warning "File of unsuported version. Proceed with caution"))))] [(memv key '(TRANSP)) @@ -117,11 +144,22 @@ [(memv key '(CLASS)) (enum-parser '(PUBLIC PRIVATE CONFIDENTIAL) #t)] + [(memv key '(PARTSTAT)) + (enum-parser '(NEEDS-ACTION + ACCEPTED DECLINED + TENTATIVE DELEGATED + IN-PROCESS) + #t)] + ;; TODO [(memv key '(REQUEST-STATUS))] [(memv key '(ACTION)) - (enum-parser '(AUDIO DISPLAY EMAIL) #t)] + (enum-parser '(AUDIO DISPLAY EMAIL + NONE ; I don't know where NONE is from + ; but it appears to be prevelant. + ) + #t)] [(memv key '(TZOFFSETFROM TZOFFSETTO)) (get-parser 'UTC-OFFSET)] @@ -140,7 +178,6 @@ ((get-parser 'FLOAT) params right))))] [(memv key '(RRULE)) - ;; TODO date/datetime (get-parser 'RECUR)] [(memv key '(ORGANIZER ATTENDEE)) @@ -171,57 +208,93 @@ (loop (cdr rem)))))) -;; (list (key kv ... value)) → +;; (list ) → (define (parse lst) (let loop ((lst lst) (stack '())) (if (null? lst) stack - (let ((head (car lst))) - (cond [(string=? "BEGIN" (car head)) - (loop (cdr lst) (cons (make-vcomponent (string->symbol (cadr head))) stack))] - [(string=? "END" (car head)) - - (when (eq? (type (car stack)) 'VEVENT) - - ;; This isn't part of the field values since we "need" - ;; the type of DTSTART for UNTIL to work. - ;; This could however be side steped by auto detecting - ;; @type{date}s vs @type{datetime}s in @function{parse-recurrence-rule}. - (when (attr (car stack) 'RRULE) - (set! (attr (car stack) 'RRULE) - ((@ (vcomponent recurrence) parse-recurrence-rule) - (attr (car stack) 'RRULE) - (if (date? (attr (car stack) 'DTSTART)) - parse-ics-date parse-ics-datetime))))) - - (loop (cdr lst) - (if (null? (cdr stack)) - ;; return - (car stack) - (begin (add-child! (cadr stack) (car stack)) - (cdr stack))))] - [else - (let* ((key value params (parse-itemline head))) - (call-with-values (lambda () (build-vline key value params)) - (lambda vlines - (for vline in vlines - (define key (vline-key vline)) - - ;; See RFC 5545 p.53 for list of all repeating types - ;; (for vcomponent) - ;; TODO split on comman (,) here? - (if (memv key '(ATTACH ATTENDEE CATEGORIES - COMMENT CONTACT EXDATE - REQUEST-STATUS RELATED-TO - RESOURCES RDATE - ;; x-prop - ;; iana-prop - )) - (aif (attr* (car stack) key) - (set! (attr* (car stack) key) (cons vline it)) - (set! (attr* (car stack) key) (list vline))) - ;; else - (set! (attr* (car stack) key) vline)))))) - - (loop (cdr lst) stack)]))))) + (let* ((head* (car lst)) + (head (get-data head*))) + (catch 'parse-error + (lambda () + (parameterize + ((warning-handler + (lambda (fmt . args) + (let ((linedata (get-metadata head*))) + (format + #f "WARNING parse error around ~a + ~? + line ~a ~a~%" + (get-string linedata) + fmt args + (get-line linedata) + (get-file linedata) + ))))) + (cond [(string=? "BEGIN" (car head)) + (loop (cdr lst) + (cons (make-vcomponent (string->symbol (cadr head))) + stack))] + [(string=? "END" (car head)) + + (when (eq? (type (car stack)) 'VEVENT) + + ;; This isn't part of the field values since we "need" + ;; the type of DTSTART for UNTIL to work. + ;; This could however be side steped by auto detecting + ;; @type{date}s vs @type{datetime}s in @function{parse-recurrence-rule}. + #t + #; + (when (attr (car stack) 'RRULE) ; ; + (set! (attr (car stack) 'RRULE) ; ; + ((@ (vcomponent recurrence) parse-recurrence-rule) ; ; + (attr (car stack) 'RRULE) ; ; + (if (date? (attr (car stack) 'DTSTART)) ; ; + parse-ics-date parse-ics-datetime))))) + + (loop (cdr lst) + (if (null? (cdr stack)) + ;; return + (car stack) + (begin (add-child! (cadr stack) (car stack)) + (cdr stack))))] + [else + (let* ((key value params (parse-itemline head))) + (call-with-values (lambda () (build-vline key value params)) + (lambda vlines + (for vline in vlines + (define key (vline-key vline)) + + ;; See RFC 5545 p.53 for list of all repeating types + ;; (for vcomponent) + ;; TODO split on comman (,) here? + (if (memv key '(ATTACH ATTENDEE CATEGORIES + COMMENT CONTACT EXDATE + REQUEST-STATUS RELATED-TO + RESOURCES RDATE + ;; x-prop + ;; iana-prop + )) + (aif (attr* (car stack) key) + (set! (attr* (car stack) key) (cons vline it)) + (set! (attr* (car stack) key) (list vline))) + ;; else + (set! (attr* (car stack) key) vline)))))) + + (loop (cdr lst) stack)]))) + (lambda (err fmt . args) + (let ((linedata (get-metadata head*))) + (display (format + #f "ERROR parse error around ~a + ~? + line ~a ~a + Defaulting to string~%" + (get-string linedata) + fmt args + (get-line linedata) + (get-file linedata)) + (current-error-port)) + (let* ((key value params (parse-itemline head))) + (set! (attr* (car stack) key) + (make-vline key value params)) + (loop (cdr lst) stack))))))))) diff --git a/module/vcomponent/parse/types.scm b/module/vcomponent/parse/types.scm index 7472b8c2..78efe2ff 100644 --- a/module/vcomponent/parse/types.scm +++ b/module/vcomponent/parse/types.scm @@ -75,7 +75,10 @@ (str '()) (done '())) (if (null? rem) - (cons (list->string str) done) + (let ((final (reverse-list->string str))) + (if (null? done) + final + (cons final done))) (case (car rem) [(#\\) (case (cadr rem) @@ -110,10 +113,10 @@ (define (parse-utc-offset props value) (make-utc-offset (string->symbol (substring value 0 1)) - (number->string (substring value 1 3)) - (number->string (substring value 3 5)) + (string->number (substring value 1 3)) + (string->number (substring value 3 5)) (if (= 7 (string-length value)) - (number->string (substring value 5 7)) + (string->number (substring value 5 7)) 0))) (define type-parsers (make-hash-table)) diff --git a/module/vcomponent/recurrence/parse.scm b/module/vcomponent/recurrence/parse.scm index 3df5cc25..efcf984c 100644 --- a/module/vcomponent/recurrence/parse.scm +++ b/module/vcomponent/recurrence/parse.scm @@ -79,7 +79,12 @@ (let* (((key val) kv)) (let-lazy ((symb (string->symbol val)) - (date (datetime-parser val)) + ;; TODO this is an ugly hack. + ;; But sending in datetime-parser instead + ;; leads to dependency problems in vcomponent. + (date (catch 'parse-error + (lambda () (parse-ics-datetime val)) + (lambda _ (parse-ics-date val)))) (day (rfc->datetime-weekday (string->symbol val))) (days (map parse-day-spec (string-split val #\,))) (num (string->number val)) diff --git a/tests/recurring.scm b/tests/recurring.scm index 024dad55..c5ae43c6 100644 --- a/tests/recurring.scm +++ b/tests/recurring.scm @@ -10,7 +10,7 @@ (define ev (call-with-input-string "BEGIN:VEVENT -DTSTART:20190302 +DTSTART;VALUE=DATE:20190302 RRULE:FREQ=DAILY END:VEVENT" parse-calendar)) -- cgit v1.2.3