From 742ae45ce7bdaae3c95a4a74afa5d17381fc76b6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Hugo=20H=C3=B6rnquist?= Date: Sat, 9 May 2020 22:16:24 +0200 Subject: Remove old parser, rename new parser to component. --- module/vcomponent/parse/component.scm | 143 +++++++++++++++ module/vcomponent/parse/new.scm | 143 --------------- module/vcomponent/parse/old.scm | 315 ---------------------------------- 3 files changed, 143 insertions(+), 458 deletions(-) create mode 100644 module/vcomponent/parse/component.scm delete mode 100644 module/vcomponent/parse/new.scm delete mode 100644 module/vcomponent/parse/old.scm (limited to 'module/vcomponent/parse') diff --git a/module/vcomponent/parse/component.scm b/module/vcomponent/parse/component.scm new file mode 100644 index 00000000..565c129d --- /dev/null +++ b/module/vcomponent/parse/component.scm @@ -0,0 +1,143 @@ +(define-module (vcomponent parse component) + :use-module (util) + :use-module (util exceptions) + :use-module ((ice-9 rdelim) :select (read-line)) + :use-module (vcomponent base) + :use-module (datetime) + :use-module (srfi srfi-1) + :use-module (srfi srfi-26) + ) + +(define-public (parse-calendar port) + (parse (map tokenize (read-file port)))) + +;; port → (list string) +(define (read-file port) + (let loop ((done '())) + (let ((line (read-line port))) + (if (eof-object? line) + (reverse! done) + (let ((line (string-trim-right line))) + (loop + (if (char=? #\space (string-ref line 0)) + (cons (string-append (car done) + (string-drop line 1)) + (cdr done)) + (cons line done)))))))) + +;; (list string) → (list (key kv ... value)) +(define (tokenize line) + (define colon-idx (string-index line #\:)) + (define semi-idxs + (let loop ((idx 0)) + (aif (string-index line #\; idx colon-idx) + (cons it (loop (1+ it))) + (list colon-idx (string-length line))))) + (map (lambda (start end) + (substring line (1+ start) end)) + (cons -1 semi-idxs) + semi-idxs)) + +;; params could be made optional, with an empty hashtable as default +(define (build-vline key value params) + (case key + [(DTSTART DTEND RECURRENCE-ID LAST-MODIFIED DTSTAMP EXDATE) + + ;; '("Africa/Ceuta" "Europe/Stockholm" "local") + (let ((tz (or (hashq-ref params 'TZID) + (and (string= "Z" (string-take-right value 1)) "UTC")))) + + (let ((type (hashq-ref params 'VALUE))) + (if (or (and=> type (cut string=? <> "DATE-TIME")) + (string-index value #\T)) + ;; we move all parsed datetimes to local time here. This + ;; gives a MASSIVE performance boost over calling get-datetime + ;; in all procedures which want to guarantee local time for proper calculations. + ;; 20s vs 70s runtime on my laptop. + (let ((datetime (parse-ics-datetime value tz))) + (hashq-set! params 'VALUE 'DATE-TIME) + (values (make-vline key (get-datetime datetime) params) + (make-vline (symbol-append 'X-ORIGINAL- key) datetime params))) + (begin (hashq-set! params 'VALUE 'DATE) + (make-vline key (parse-ics-date value) params)))))] + + [else + (make-vline key + (list->string + (let loop ((rem (string->list value))) + (if (null? rem) + '() + (if (char=? #\\ (car rem)) + (case (cadr rem) + [(#\n #\N) (cons #\newline (loop (cddr rem)))] + [(#\; #\, #\\) => (lambda (c) (cons c (loop (cddr rem))))] + [else => (lambda (c) (warning "Non-escapable character: ~a" c) + (loop (cddr rem)))]) + (cons (car rem) (loop (cdr rem))))))) + params)])) + +;; (parse-itemline '("DTEND" "20200407T130000")) +;; => DTEND +;; => "20200407T130000" +;; => # +(define (parse-itemline itemline) + (define key (string->symbol (car itemline))) + (define parameters (make-hash-table)) + (let loop ((rem (cdr itemline))) + (if (null? (cdr rem)) + (values key (car rem) parameters ) + (let* ((kv (car rem)) + (idx (string-index kv #\=))) + (hashq-set! parameters (string->symbol (substring kv 0 idx)) + (substring kv (1+ idx))) + (loop (cdr rem)))))) + + +;; (list (key kv ... value)) → +(define (parse lst) + (let loop ((lst lst) + (stack '())) + (if (null? lst) + stack + (let ((head (car lst))) + (cond [(string=? "BEGIN" (car head)) + (loop (cdr lst) (cons (make-vcomponent (string->symbol (cadr head))) stack))] + [(string=? "END" (car head)) + + ;; TODO This is an ugly hack until the rest of the code is updated + ;; to work on events without an explicit DTEND attribute. + (when (and (eq? (type (car stack)) 'VEVENT) + (not (attr (car stack) 'DTEND))) + (set! (attr (car stack) 'DTEND) + (let ((start (attr (car stack) 'DTSTART))) + ;; p. 54, 3.6.1 + ;; If DTSTART is a date then it's an all + ;; day event. If DTSTART instead is a + ;; datetime then the event has a length + ;; of 0? + (if (date? start) + (date+ start (date day: 1)) + (datetime+ start (datetime time: (time hour: 1))))))) + + (loop (cdr lst) + (if (null? (cdr stack)) + ;; return + (car stack) + (begin (add-child! (cadr stack) (car stack)) + (cdr stack))))] + [else + (let* ((key value params (parse-itemline head))) + (call-with-values (lambda () (build-vline key value params)) + (lambda vlines + (for vline in vlines + (define key (vline-key vline)) + + ;; Which types are allowed to be given multiple times + (if (memv (vline-key vline) '(EXDATE ATTENDEE)) + (aif (attr* (car stack) key) + (set! (attr* (car stack) key) (cons vline it)) + (set! (attr* (car stack) key) (list vline))) + ;; else + (set! (attr* (car stack) key) vline)))))) + + (loop (cdr lst) stack)]))))) diff --git a/module/vcomponent/parse/new.scm b/module/vcomponent/parse/new.scm deleted file mode 100644 index 4244725a..00000000 --- a/module/vcomponent/parse/new.scm +++ /dev/null @@ -1,143 +0,0 @@ -(define-module (vcomponent parse new) - :use-module (util) - :use-module (util exceptions) - :use-module ((ice-9 rdelim) :select (read-line)) - :use-module (vcomponent base) - :use-module (datetime) - :use-module (srfi srfi-1) - :use-module (srfi srfi-26) - ) - -(define-public (parse-calendar port) - (parse (map tokenize (read-file port)))) - -;; port → (list string) -(define (read-file port) - (let loop ((done '())) - (let ((line (read-line port))) - (if (eof-object? line) - (reverse! done) - (let ((line (string-trim-right line))) - (loop - (if (char=? #\space (string-ref line 0)) - (cons (string-append (car done) - (string-drop line 1)) - (cdr done)) - (cons line done)))))))) - -;; (list string) → (list (key kv ... value)) -(define (tokenize line) - (define colon-idx (string-index line #\:)) - (define semi-idxs - (let loop ((idx 0)) - (aif (string-index line #\; idx colon-idx) - (cons it (loop (1+ it))) - (list colon-idx (string-length line))))) - (map (lambda (start end) - (substring line (1+ start) end)) - (cons -1 semi-idxs) - semi-idxs)) - -;; params could be made optional, with an empty hashtable as default -(define (build-vline key value params) - (case key - [(DTSTART DTEND RECURRENCE-ID LAST-MODIFIED DTSTAMP EXDATE) - - ;; '("Africa/Ceuta" "Europe/Stockholm" "local") - (let ((tz (or (hashq-ref params 'TZID) - (and (string= "Z" (string-take-right value 1)) "UTC")))) - - (let ((type (hashq-ref params 'VALUE))) - (if (or (and=> type (cut string=? <> "DATE-TIME")) - (string-index value #\T)) - ;; we move all parsed datetimes to local time here. This - ;; gives a MASSIVE performance boost over calling get-datetime - ;; in all procedures which want to guarantee local time for proper calculations. - ;; 20s vs 70s runtime on my laptop. - (let ((datetime (parse-ics-datetime value tz))) - (hashq-set! params 'VALUE 'DATE-TIME) - (values (make-vline key (get-datetime datetime) params) - (make-vline (symbol-append 'X-ORIGINAL- key) datetime params))) - (begin (hashq-set! params 'VALUE 'DATE) - (make-vline key (parse-ics-date value) params)))))] - - [else - (make-vline key - (list->string - (let loop ((rem (string->list value))) - (if (null? rem) - '() - (if (char=? #\\ (car rem)) - (case (cadr rem) - [(#\n #\N) (cons #\newline (loop (cddr rem)))] - [(#\; #\, #\\) => (lambda (c) (cons c (loop (cddr rem))))] - [else => (lambda (c) (warning "Non-escapable character: ~a" c) - (loop (cddr rem)))]) - (cons (car rem) (loop (cdr rem))))))) - params)])) - -;; (parse-itemline '("DTEND" "20200407T130000")) -;; => DTEND -;; => "20200407T130000" -;; => # -(define (parse-itemline itemline) - (define key (string->symbol (car itemline))) - (define parameters (make-hash-table)) - (let loop ((rem (cdr itemline))) - (if (null? (cdr rem)) - (values key (car rem) parameters ) - (let* ((kv (car rem)) - (idx (string-index kv #\=))) - (hashq-set! parameters (string->symbol (substring kv 0 idx)) - (substring kv (1+ idx))) - (loop (cdr rem)))))) - - -;; (list (key kv ... value)) → -(define (parse lst) - (let loop ((lst lst) - (stack '())) - (if (null? lst) - stack - (let ((head (car lst))) - (cond [(string=? "BEGIN" (car head)) - (loop (cdr lst) (cons (make-vcomponent (string->symbol (cadr head))) stack))] - [(string=? "END" (car head)) - - ;; TODO This is an ugly hack until the rest of the code is updated - ;; to work on events without an explicit DTEND attribute. - (when (and (eq? (type (car stack)) 'VEVENT) - (not (attr (car stack) 'DTEND))) - (set! (attr (car stack) 'DTEND) - (let ((start (attr (car stack) 'DTSTART))) - ;; p. 54, 3.6.1 - ;; If DTSTART is a date then it's an all - ;; day event. If DTSTART instead is a - ;; datetime then the event has a length - ;; of 0? - (if (date? start) - (date+ start (date day: 1)) - (datetime+ start (datetime time: (time hour: 1))))))) - - (loop (cdr lst) - (if (null? (cdr stack)) - ;; return - (car stack) - (begin (add-child! (cadr stack) (car stack)) - (cdr stack))))] - [else - (let* ((key value params (parse-itemline head))) - (call-with-values (lambda () (build-vline key value params)) - (lambda vlines - (for vline in vlines - (define key (vline-key vline)) - - ;; Which types are allowed to be given multiple times - (if (memv (vline-key vline) '(EXDATE ATTENDEE)) - (aif (attr* (car stack) key) - (set! (attr* (car stack) key) (cons vline it)) - (set! (attr* (car stack) key) (list vline))) - ;; else - (set! (attr* (car stack) key) vline)))))) - - (loop (cdr lst) stack)]))))) diff --git a/module/vcomponent/parse/old.scm b/module/vcomponent/parse/old.scm deleted file mode 100644 index 648f9bc5..00000000 --- a/module/vcomponent/parse/old.scm +++ /dev/null @@ -1,315 +0,0 @@ -(define-module (vcomponent parse old) - :use-module (util) - :use-module (util strbuf) - :use-module (util exceptions) - - :use-module ((rnrs io ports) :select (get-u8)) - :use-module ((ice-9 textual-ports) :select (unget-char)) - - :use-module (srfi srfi-1) - :use-module (srfi srfi-9) - :use-module (srfi srfi-26) - - :use-module (datetime) - :use-module (datetime util) - - :use-module (vcomponent base) - :use-module (vcomponent datetime) - ;; export (parse-calendar) -) - -(define-record-type - (make-parse-ctx% filename row col ctx line-key param-key param-table) - parse-ctx? - (filename get-filename) ; string - (row get-row set-row!) ; [0, ] - (col get-col set-col!) ; [1, ) - (ctx get-ctx set-ctx!) ; '(key value param-name param-value escape) - (line-key get-line-key set-line-key!) ; string - (param-key get-param-key set-param-key!) ; string - (param-table get-param-table set-param-table!) ; hash-map - ) - -(define (make-parse-ctx filename) - (make-parse-ctx% filename 1 0 'key - #f #f (make-hash-table))) - -(define (increment-column! ctx) - (set-col! ctx (1+ (get-col ctx)))) - -(define (increment-row! ctx) - (set-col! ctx 0) - (set-row! ctx (1+ (get-row ctx)))) - -(define (ctx-dump-strings! ctx) - (set-line-key! ctx "") - (set-param-key! ctx "") - ;; (set-param-table! ctx (make-hash-table)) - ) - - - - -(define (fold-proc ctx c) - ;; First extra character optional read is to get the \n if our line - ;; ended with \r\n. Secound read is to get the first character of the - ;; next line. The initial \r which might recide in @var{c} is discarded. - (let ((pair (cons (if (char=? #\newline (integer->char c)) - c (get-u8 (current-input-port))) - (get-u8 (current-input-port))))) - (increment-row! ctx) - (cond [(not (char=? #\newline (integer->char (car pair)))) - (error "Expected newline after CR")] - - ;; The standard (3.4, l. 2675) says that each icalobject must - ;; end with CRLF. My files however does not. This means that - ;; an EOF can immideately follow a \n\r pair. But this case is the - ;; same as that we are at the end of line, so we spoof it and let - ;; the regular parser loop handle it. - [(eof-object? (cdr pair)) - 'end-of-line] - - ;; Following line begins with a whitespace character, - ;; meaning that we don't break the logical line here. - [(memv (integer->char (cdr pair)) '(#\space #\tab)) - (increment-column! ctx) ; since we just read the space - 'fold] - - [else - ;; TODO check if this failed, and signal a writeback error - (unget-char (current-input-port) - (integer->char (cdr pair))) - - 'end-of-line]))) - -(define (handle-value! key vline strbuf) - (case key - ;; As far as I can tell the RFC says nothing about special - ;; encoding for individual fields. It mentieons UTF-8, and - ;; that transfer encoding should be set in the mime-headers. - ;; That however seems like a breach of abstractions. - ;; Currently I allow a CHARSET property on SUMMARY fields, - ;; since I know that at least www.lysator.liu.se/alma/alma.cgi - ;; uses it. - [(SUMMARY) - (cond [(and=> (prop vline 'CHARSET) car) - => (lambda (encoding) - (set! (value vline) - (strbuf->string strbuf ((@ (rnrs io ports) make-transcoder) - encoding))))])] - - [(DTSTART DTEND RECURRENCE-ID LAST-MODIFIED DTSTAMP EXDATE) - - ;; '("Africa/Ceuta" "Europe/Stockholm" "local") - (let ((tz (or (and=> (prop vline 'TZID) car) - (and (string= "Z" (string-take-right (value vline) 1)) "UTC")))) - - (let ((type (and=> (prop vline 'VALUE) car))) - (if (or (and=> type (cut string=? <> "DATE-TIME")) - (string-contains (value vline) "T")) - ;; TODO TODO TODO - ;; we move all parsed datetimes to local time here. This - ;; gives a MASSIVE performance boost over calling get-datetime - ;; in all procedures which want to guarantee local time for proper calculations. - ;; 20s vs 70s runtime on my laptop. - ;; We sohuld however save the original datetime in a file like X-HNH-DTSTART, - ;; since we don't want to lose that information. - (set! (value vline) (get-datetime (parse-ics-datetime (value vline) tz)) - (prop vline 'VALUE) 'DATE-TIME) - (set! (value vline) (parse-ics-date (value vline)) - (prop vline 'VALUE) 'DATE))) - )])) - -;; Reads a vcomponent from the given port. -(define-public (parse-calendar port) - ;; (report-time! "Parsing ~a" port) - (with-input-from-port port - (lambda () - (let ((component (make-vcomponent)) - (ctx (make-parse-ctx (port-filename port))) - (strbuf (make-strbuf))) - (parameterize ((warning-handler - (lambda (fmt . args) - (format #f - "== PARSE WARNING == -filename = ~a -row ~a column ~a ctx = ~a -~a ; ~a = ... : ... -~?~%~%" - (get-filename ctx) - (get-row ctx) (get-col ctx) (get-ctx ctx) - (get-line-key ctx) (get-param-key ctx) - fmt args)))) - (with-throw-handler #t - (lambda () - (while #t - (let ((c (get-u8 (current-input-port)))) - (cond - - ;; End of file - [(eof-object? c) - ;; == NOTE == - ;; We never check the final line here. But since it - ;; ALWAYS should be "END:VCOMPONENT", and we do all - ;; the setup at creation this shouldn't be a problem. - (let ((component - (case (get-ctx ctx) - ;; Line ended before we came here, get the actual root - ;; component (instead of our virtual one: - [(key) (car (children component))] - ;; Line wasn't ended before we get here, so our current - ;; component is our "actual" root. NOTE that this never - ;; actually finalizes the root object, which matters if - ;; if do something with the finalizer below. - ;; At the time of writing we just set the parent. - [(value) component] - [else - => (lambda (a) - (scm-error - 'wrong-type-arg "parse-break" - (string-append - "Bad context at end of file. " - "Expected `key' or `value', got ~a") - (list a) #f))]))) - ;; == NOTE == - ;; This sets to the VCALENDAR, which is correct, - ;; but the program later squashes together everything - ;; and drops this information. - (set! (attr component 'X-HNH-FILENAME) (get-filename ctx) - (parent component) #f) - (break component))] - - ;; End of line - [(memv (integer->char c) '(#\return #\newline)) - (case (fold-proc ctx c) - [(end-of-line) - (let ((str (strbuf->string strbuf))) - (cond [(and (eq? 'key (get-ctx ctx)) - (string-null? str)) - ;; I believe that an empty line is against the standard - ;; in every way. But it's nice to handle it. - (warning "Unexpected completely empty line")] - - [(eq? 'BEGIN (get-line-key ctx)) - (let ((child (make-vcomponent (string->symbol str)))) - (add-child! component child) - (set! component child))] - - [(eq? (get-line-key ctx) 'END) - - ;; Ensure that we have a DTEND - ;; TODO Objects aren't required to have a DTEND, or a DURATION. - ;; write fancier code which acknoledges this. - (when (and (eq? 'VEVENT (type component)) - (not (attr component 'DTEND))) - (set! (attr component 'DTEND) - (let ((start (attr component 'DTSTART))) - ;; p. 54, 3.6.1 - ;; If DTSTART is a date then it's an all - ;; day event. If DTSTART instead is a - ;; datetime then the event has a length - ;; of 0? - (if (date? start) - (date+ start (date day: 1)) - (datetime+ start (datetime time: (time hour: 1))))))) - - (set! component (parent component))] - - [else ; Regular key-value line - (let ((key (get-line-key ctx)) - (vline (make-vline str (get-param-table ctx)))) - ;; Type specific processing - (handle-value! key vline strbuf) - - ;; From RFC 5545 §3.6.1 - ;; DTEND and DURATION are mutually exclusive - ;; DTSTART is required to exist while the other two are optional. - - ;; Allowed (some) repeated keys - (if (memv key '(EXDATE ATTENDEE)) - (aif (attr* component key) - ;; updates the current vline - ;; NOTE that this discards any properties belonging to this object - ;; TODO a more propper way to do it would be to store multiple vline - ;; objects for a given key. - (set! (value it) (cons (value vline) (value it))) - (begin (mod! (value vline) list) - (set-vline! component key vline))) - ;; Keys which aren't allowed to be repeated. - (begin - (awhen (attr* component key) - (warning "Key ~a encountered more than once, overriding old value [~a] with [~a]" - key (value it) (value vline))) - (set-vline! component key vline)))) - (set-param-table! ctx (make-hash-table))]) - - (strbuf-reset! strbuf) - (ctx-dump-strings! ctx) - (set-ctx! ctx 'key))] - [(fold) 'noop] ; Good case, here to catch errors in else - [else => (lambda (a) (error "Bad return from fold, unexpected" a))])] - - ;; Escaped characters - [(char=? #\\ (integer->char c)) - (case (integer->char (get-u8 (current-input-port))) - ;; Escape character '\' and escaped token sepparated by a newline - ;; (since the standard for some reason allows that (!!!)) - ;; We are at least guaranteed that it's a folded line, so just - ;; unfold it and continue trying to find a token to escape. - [(#\return #\newline) - => (lambda (c) - (case (fold-proc ctx (char->integer c)) - [(end-of-line) - (throw 'escape-error "ESC before not folded line")] - [(fold) - (increment-column! ctx) - (strbuf-append! strbuf (get-u8 (current-input-port)))]))] - - [(#\n #\N) (strbuf-append! strbuf (char->integer #\newline))] - [(#\; #\, #\\) => (lambda (c) (strbuf-append! strbuf (char->integer c)))] - [else => (lambda (c) (warning "Non-escapable character: ~a" c))]) - (increment-column! ctx)] - - ;; Delimiter between param key and param value - [(and (eq? (get-ctx ctx) 'param-name) - (char=? #\= (integer->char c))) - (set-param-key! ctx (string->symbol (strbuf->string strbuf))) - (strbuf-reset! strbuf) - (set-ctx! ctx 'param-value)] - - ;; Delimiter between parameters (;), or between - ;; "something" and attribute value (:) - [(and (memv (integer->char c) '(#\: #\;)) - (memv (get-ctx ctx) '(param-value key))) - (case (get-ctx ctx) - [(param-value) - (hashq-set! (get-param-table ctx) - (get-param-key ctx) - (strbuf->string strbuf)) - (strbuf-reset! strbuf)] - [(key) - (set-line-key! ctx (string->symbol (strbuf->string strbuf))) - (strbuf-reset! strbuf)]) - - (set-ctx! ctx (case (integer->char c) - [(#\:) 'value] - [(#\;) 'param-name]))] - - ;; Regular character - [else - (strbuf-append! strbuf c) - (increment-column! ctx)])))) - - (lambda _ - ;; display is atomic, format isn't - (display - (format #f - "== PARSE ERROR == -filename = ~a -row ~a column ~a ctx = ~a -~a ; ~a = ... : ...~%~%" - (get-filename ctx) - (get-row ctx) (get-col ctx) (get-ctx ctx) - (get-line-key ctx) (get-param-key ctx)))))))))) - - -- cgit v1.2.3