From f2a6efedc594533c5d755bf0d8bec8814f459834 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Hugo=20H=C3=B6rnquist?= Date: Sat, 2 Nov 2019 19:56:52 +0100 Subject: General improvements. --- src/parse.scm | 303 +++++++++++++++++++++++++++++++++------------------------- 1 file changed, 174 insertions(+), 129 deletions(-) (limited to 'src') diff --git a/src/parse.scm b/src/parse.scm index 9b7098b9..a6f3bed1 100644 --- a/src/parse.scm +++ b/src/parse.scm @@ -4,9 +4,7 @@ :use-module (rnrs bytevectors) :use-module (srfi srfi-9) :use-module ((ice-9 textual-ports) :select (unget-char)) - :use-module ((ice-9 ftw) :select (scandir)) - - ) + :use-module ((ice-9 ftw) :select (scandir))) @@ -28,11 +26,12 @@ (define* (get-attribute-value component key #:optional default) (cond [(hashq-ref (get-component-attributes component) key #f) - => (lambda (p) (cdr p))] + => cdr] [else default])) (define (get-attribute component key) - (hashq-ref (get-component-attributes component) key)) + (hashq-ref (get-component-attributes component) + key)) (define (set-attribute! component key value) (let ((ht (get-component-attributes component))) @@ -42,18 +41,16 @@ -(define contexts '(key value param-name param-value escape)) - (define-record-type (make-parse-ctx% filename row col ctx line-key param-key param-table) parse-ctx? - (filename get-filename) - (row get-row set-row!) - (col get-col set-col!) - (ctx get-ctx set-ctx!) - (line-key get-line-key set-line-key!) - (param-key get-param-key set-param-key!) - (param-table get-param-table set-param-table!) + (filename get-filename) ; string + (row get-row set-row!) ; [0, ] + (col get-col set-col!) ; [1, ) + (ctx get-ctx set-ctx!) ; '(key value param-name param-value escape) + (line-key get-line-key set-line-key!) ; string + (param-key get-param-key set-param-key!) ; string + (param-table get-param-table set-param-table!) ; hash-map ) (define (make-parse-ctx filename) @@ -73,12 +70,17 @@ (make-strbuf% len bytes) strbuf? (len get-length set-length!) - (bytes get-bytes) - ) + (bytes get-bytes set-bytes!)) (define (make-strbuf) - (make-strbuf% 0 (make-u8vector #x1000)) - ) + (make-strbuf% 0 (make-u8vector #x1000))) + +(define (strbuf-realloc! strbuf) + (let* ((len (u8vector-length (get-bytes strbuf))) + (nv (make-u8vector (ash len 1)))) + (bytevector-copy! (get-bytes strbuf) 0 + nv 0 len) + (set-bytes! strbuf nv))) (define (strbuf->string strbuf) (let ((bv (make-u8vector (get-length strbuf)))) @@ -91,22 +93,41 @@ (set-length! strbuf 0)) (define (strbuf-append! strbuf u8) - (u8vector-set! (get-bytes strbuf) - (get-length strbuf) - u8) + (catch 'out-of-range + (lambda () + (u8vector-set! (get-bytes strbuf) + (get-length strbuf) + u8)) + (lambda (err . args) + (strbuf-realloc! strbuf) + (strbuf-append! strbuf u8))) (set-length! strbuf (1+ (get-length strbuf)))) + + (define (fold-proc ctx c) - (let ((pair (cons (if (= c (char->integer #\newline)) - c (get-u8 (current-input-port))) - (get-u8 (current-input-port))))) + ;; First extra character optionall read is to get the \n if our line + ;; ended with \r\n. Secound read is to get the first character of the + ;; next line. The initial \r which might recide in @var{c} is discarded. + (let ((pair (cons (if (char=? #\newline (integer->char c)) + c (get-u8 (current-input-port))) + (get-u8 (current-input-port))))) (increment-row! ctx) - (cond [(not (= (char->integer #\newline) - (car pair))) - (throw 'fold-error "Expected newline after CR")] + (cond [(not (char=? #\newline (integer->char (car pair)))) + (error "Expected newline after CR")] + + ;; The standard (3.4, l. 2675) says that each icalobject must + ;; end with CRLF. My files however does not. This means that + ;; an EOF can immideately follow a \n\r pair. But this case is the + ;; same as that we are at the end of line, so we spoof it and let + ;; the regular parser loop handle it. + [(eof-object? (cdr pair)) + 'end-of-line] + ;; Following line begins with a whitespace character, + ;; meaning that we don't break the logical line here. [(memv (integer->char (cdr pair)) '(#\space #\tab)) - (increment-column! ctx) + (increment-column! ctx) ; since we just read the space 'fold] [else @@ -114,115 +135,139 @@ (unget-char (current-input-port) (integer->char (cdr pair))) - 'end-of-line] - - ))) + 'end-of-line]))) (define (parse-calendar port) (with-input-from-port port (lambda () - (let ((component (make-vcomponent)) - (ctx (make-parse-ctx (port-filename port))) - (strbuf (make-strbuf))) - (catch #t - (lambda () - (while #t - (let ((c (get-u8 (current-input-port)))) - (cond - - ;; End of file - [(eof-object? c) - ;; TODO handle final line here - (break)] - - ;; End of line - [(memv (integer->char c) '(#\return #\newline)) - (case (fold-proc ctx c) - [(error writeback-error) => (lambda (t) (throw t))] - [(end-of-line) - (let ((str (strbuf->string strbuf))) - (cond [(string=? (get-line-key ctx) "BEGIN") - (let ((child (make-vcomponent (string->symbol str)))) - (add-child! component child) - (set! component child))] - - [(string=? (get-line-key ctx) "END") - (set! component (get-component-parent component))] - - [else - (let ((ht (get-component-attributes component))) - ;; TODO repeated keys - (hashq-set! ht (string->symbol (get-line-key ctx)) - (cons (get-param-table ctx) - str)) - (set-param-table! ctx (make-hash-table)))]) - - (strbuf-reset! strbuf) - (set-ctx! ctx 'key))])] - - ;; Escaped characters - [(char=? (integer->char c) #\\) - (let ((cc (integer->char (get-u8 (current-input-port))))) - (case cc - ;; Escape character '\' and escaped token sepparated by a newline - ;; (since the standard for some reason allows that (!!!)) - ;; We are at least guaranteed that it's a folded line, so just - ;; unfold it and continue trying to find a token to escape. - [(#\return #\newline) - (case (fold-proc ctx cc) - [(end-of-line) (throw 'escape-error "ESC before not folded line")] - [(fold) - (increment-column! ctx) - (strbuf-append! strbuf (get-u8 (current-input-port)))])] - [(#\n #\N) - (strbuf-append! strbuf (char->integer #\newline))] - [(#\; #\, #\\) => (lambda (c) (strbuf-append! strbuf (char->integer c)))] - [else (throw 'escape-error "Non-escapable character" cc)]) - (increment-column! ctx))] - - ;; Delimiter between param key and param value - [(and (eq? (get-ctx ctx) 'panam-name) (char=? (integer->char c) #\=)) - (set-param-key! ctx (strbuf->string strbuf)) - (strbuf-reset! strbuf) - (set-ctx! ctx 'param-value)] - - ;; Delimiter between parameters (;), or between "something" and attribute value (:) - [(memv (integer->char c) '(#\: #\;)) - (case (get-ctx ctx) - [(param-value) - (hashq-set! (get-param-table ctx) - (get-param-key ctx) - (strbuf->string strbuf)) - (strbuf-reset! strbuf)] - [(key) - (set-line-key! ctx (strbuf->string strbuf)) - (strbuf-reset! strbuf)]) - (set-ctx! ctx (case c - [(#\:) 'value] - [(#\;) 'param-name]))] - - ;; Regular character - [else - (strbuf-append! strbuf c) - (increment-column! ctx) - ]))) - component) - - (lambda (err . args) - (format (current-error-port) "err = ~a~%ctx = ~a~%args = ~s~%" - err ctx args) - )))))) + (let ((component (make-vcomponent)) + (ctx (make-parse-ctx (port-filename port))) + (strbuf (make-strbuf))) + (with-throw-handler #t + (lambda () + (while #t + (let ((c (get-u8 (current-input-port)))) + (cond + + ;; End of file + [(eof-object? c) + ;; == NOTE == + ;; We never check the final line here. But since it + ;; ALWAYS should be "END:VCOMPONENT", and we do all + ;; the setup at creation this shouldn't be a problem. + (break (case (get-ctx ctx) + [(key) ; line ended + (let ((root-component + (car (get-component-children component)))) + (set-component-parent! root-component #f) + root-component)] + [(value) ; still ending line + (set-component-parent! component #f) + component] + [else => (lambda (a) + (scm-error 'wrong-type-arg "parse-break" + (string-append + "Bad context at end of file. " + "Expected `key' or `value', got ~a") + (list a) #f))]))] + + ;; End of line + [(memv (integer->char c) '(#\return #\newline)) + (case (fold-proc ctx c) + [(end-of-line) + (let ((str (strbuf->string strbuf))) + (cond [(string=? (get-line-key ctx) "BEGIN") + (let ((child (make-vcomponent (string->symbol str)))) + (add-child! component child) + (set! component child))] + + [(string=? (get-line-key ctx) "END") + (set! component (get-component-parent component))] + + [else + (let ((ht (get-component-attributes component))) + ;; TODO repeated keys + (hashq-set! ht (string->symbol (get-line-key ctx)) + (cons (get-param-table ctx) + str)) + (set-param-table! ctx (make-hash-table)))]) + + (strbuf-reset! strbuf) + (set-ctx! ctx 'key))] + [(fold) 'noop] ; Good case, here to catch errors in else + [else => (lambda (a) (error "Bad return from fold, unexpected" a))])] + + ;; Escaped characters + [(char=? #\\ (integer->char c)) + (case (integer->char (get-u8 (current-input-port))) + ;; Escape character '\' and escaped token sepparated by a newline + ;; (since the standard for some reason allows that (!!!)) + ;; We are at least guaranteed that it's a folded line, so just + ;; unfold it and continue trying to find a token to escape. + [(#\return #\newline) + => (lambda (c) + (case (fold-proc ctx (char->integer c)) + [(end-of-line) + (throw 'escape-error "ESC before not folded line")] + [(fold) + (increment-column! ctx) + (strbuf-append! strbuf (get-u8 (current-input-port)))]))] + + [(#\n #\N) (strbuf-append! strbuf (char->integer #\newline))] + [(#\; #\, #\\) => (lambda (c) (strbuf-append! strbuf (char->integer c)))] + [else => (lambda (c) (throw 'escape-error "Non-escapable character" c))]) + (increment-column! ctx)] + + ;; Delimiter between param key and param value + [(and (eq? (get-ctx ctx) 'panam-name) (char=? #\= (integer->char c))) + (set-param-key! ctx (strbuf->string strbuf)) + (strbuf-reset! strbuf) + (set-ctx! ctx 'param-value)] + + ;; Delimiter between parameters (;), or between + ;; "something" and attribute value (:) + [(memv (integer->char c) '(#\: #\;)) + (case (get-ctx ctx) + [(param-value) + (hashq-set! (get-param-table ctx) + (get-param-key ctx) + (strbuf->string strbuf)) + (strbuf-reset! strbuf)] + [(key) + (set-line-key! ctx (strbuf->string strbuf)) + (strbuf-reset! strbuf)]) + + (set-ctx! ctx (case (integer->char c) + [(#\:) 'value] + [(#\;) 'param-name]))] + + ;; Regular character + [else + (strbuf-append! strbuf c) + (increment-column! ctx)])))) + + (lambda _ + (format (current-error-port) + "== PARSE ERROR == +filename = ~a +row ~a column ~a ctx = ~a +~a ; ~a = ... : ...~%~%" + (get-filename ctx) + (get-row ctx) (get-col ctx) (get-ctx ctx) + (get-line-key ctx) (get-param-key ctx)))))))) (define-public (read-vcalendar path) (define st (stat path)) (case (stat:type st) - [(regular) (call-with-input-file path parse-calendar)] - [(directory) (map (lambda (fname) (call-with-input-file (string-append path file-name-separator-string fname) - parse-calendar)) - (scandir path (lambda (s) (and (not (string= "." (string-take s 1))) - (string= "ics" (string-take-right s 3))))))] + [(regular) (list (call-with-input-file path parse-calendar))] + [(directory) + (map (lambda (fname) + (call-with-input-file + (string-append path file-name-separator-string fname) + parse-calendar)) + (scandir path (lambda (s) (and (not (string= "." (string-take s 1))) + (string= "ics" (string-take-right s 3))))))] [(block-special char-special fifo socket unknown symlink) - => (lambda (t) (throw t))]) - ) + => (lambda (t) (error "Can't parse file of type " t))])) -- cgit v1.2.3