From 4cfb8ec5e6dad161dfefb683a64490d468caad7e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Hugo=20H=C3=B6rnquist?= Date: Sat, 2 Nov 2019 22:26:18 +0100 Subject: Move parser into module subtree. --- module/vcomponent.scm | 5 +- module/vcomponent/base.scm | 86 ++++++----- module/vcomponent/parse.scm | 322 ++++++++++++++++++++++++++++++++++++++++ module/vcomponent/primitive.scm | 9 -- 4 files changed, 373 insertions(+), 49 deletions(-) create mode 100644 module/vcomponent/parse.scm delete mode 100644 module/vcomponent/primitive.scm (limited to 'module') diff --git a/module/vcomponent.scm b/module/vcomponent.scm index 8751440d..d3e574b5 100644 --- a/module/vcomponent.scm +++ b/module/vcomponent.scm @@ -1,7 +1,4 @@ (define-module (vcomponent) - #:use-module ((vcomponent primitive) - :select (parse-cal-path - (make-vcomponent . primitive-make-vcomponent))) #:use-module (vcomponent datetime) #:use-module (vcomponent recurrence) #:use-module (vcomponent timezone) @@ -124,7 +121,7 @@ ;; return accum)) - ((no-type) (throw 'no-type))))) + ((no-type) (error 'no-type))))) (parse-dates! component) diff --git a/module/vcomponent/base.scm b/module/vcomponent/base.scm index 98b2aa89..f43f532e 100644 --- a/module/vcomponent/base.scm +++ b/module/vcomponent/base.scm @@ -2,33 +2,43 @@ :use-module (util) :use-module (srfi srfi-1) :use-module (srfi srfi-17) - :use-module (vcomponent primitive) + :use-module ((vcomponent parse) + :renamer (lambda (symb) + (case symb + ;; [(set-attribute!) 'get-attribute] + [(make-vcomponent) 'primitive-make-vcomponent] + [else symb]))) :use-module (ice-9 hash-table) :use-module ((ice-9 optargs) :select (define*-public)) - :re-export (add-child!)) + :re-export (add-child! primitive-make-vcomponent)) + +(define-public (parse-cal-path path) + (let ((parent (primitive-make-vcomponent))) + (for-each (lambda (child) (add-child! parent child)) + (read-vcalendar path)) + (if (null? (get-component-children parent)) + (set-attribute! parent 'X-HNH-SOURCETYPE "vdir") + (set-attribute! parent 'X-HNH-SOURCETYPE + (get-attribute-value (car (get-component-children parent)) + 'X-HNH-SOURCETYPE "vdir"))) + parent)) ;; vline → value (define-public value (make-procedure-with-setter - (lambda (vline) (struct-ref vline 0)) - (lambda (vline value) (struct-set! vline 0 value)))) + get-vline-value set-vline-value!)) ;; vcomponent x (or str symb) → vline (define-public (attr* component attr) - (hash-ref (struct-ref component 3) - (as-string attr))) + (hashq-ref (get-component-attributes component) + (as-symb attr))) ;; vcomponent x (or str symb) → value -(define (get-attr component attr) - (and=> (attr* component attr) - value)) +(define (get-attr component key) + (get-attribute-value component (as-symb key) #f)) -(define (set-attr! component attr value) - (aif (attr* component attr) - (struct-set! it 0 value) - (hash-set! (struct-ref component 3) - (as-string attr) - (make-vline value)))) +(define (set-attr! component key value) + (set-attribute! component (as-symb key) value)) (define-public attr (make-procedure-with-setter @@ -39,42 +49,46 @@ (define-public prop (make-procedure-with-setter (lambda (attr-obj prop-key) - (hash-ref (struct-ref attr-obj 1) (as-string prop-key))) + ;; TODO `list' is a hack since a bit to much code depends + ;; on prop always returning a list of values. + (and=> (hashq-ref (get-vline-parameters attr-obj) + (as-symb prop-key)) + list)) (lambda (attr-obj prop-key val) - (hash-set! (struct-ref attr-obj 1) (as-string prop-key) val)))) + (hashq-set! (get-vline-parameters attr-obj) + (as-symb prop-key) val)))) ;; Returns the properties of attribute as an assoc list. ;; @code{(map car <>)} leads to available properties. (define-public (properties attrptr) - (hash-map->list cons (struct-ref attrptr 1))) + (hash-map->list cons (get-attribute-parameters attrptr))) (define-public type (make-procedure-with-setter - (lambda (c) (struct-ref c 0)) - (lambda (c v) struct-set! c 0 v) - )) + (lambda (c) (component-type c)) + (lambda (c v) ; struct-set! c 0 v + (format (current-error-port) + "This method is a deprecated NOOP")))) -(define-public (parent c) (struct-ref c 2)) +(define-public parent get-component-parent) (define-public (attributes component) - (hash-map->list cons (struct-ref component 3))) + (hash-map->list cons (get-component-attributes component))) -(define*-public (children component) - (struct-ref component 1)) +(define*-public children get-component-children) (define (copy-vline vline) - (make-struct/no-tail (struct-vtable vline) - (struct-ref vline 0) - ;; TODO deep-copy on properties? - (struct-ref vline 1))) + (make-vline (get-vline-value vline) + ;; TODO deep-copy on properties? + (get-vline-parameters vline))) (define-public (copy-vcomponent component) - (make-struct/no-tail (struct-vtable component) - (struct-ref component 0) - (struct-ref component 1) - (struct-ref component 2) - (alist->hash-table - (hash-map->list (lambda (key value) (cons key (copy-vline value))) - (struct-ref component 3))))) + (make-vcomponent% (component-type component) + (get-component-children component) + (get-component-parent component) + ;; attributes + (alist->hashq-table + (hash-map->list (lambda (key value) (cons key (copy-vline value))) + (get-component-attributes component))))) (define-public (extract field) (lambda (e) (attr e field))) diff --git a/module/vcomponent/parse.scm b/module/vcomponent/parse.scm new file mode 100644 index 00000000..9eabacb3 --- /dev/null +++ b/module/vcomponent/parse.scm @@ -0,0 +1,322 @@ + +(define-module (vcomponent parse) + :use-module (rnrs io ports) + :use-module (rnrs bytevectors) + :use-module (srfi srfi-9) + :use-module ((ice-9 textual-ports) :select (unget-char)) + :use-module ((ice-9 ftw) :select (scandir ftw))) + + + +(define-record-type + (make-vline% value parameters) + vline? + (value get-vline-value set-vline-value!) + (parameters get-vline-parameters)) + +(define* (make-vline value #:optional ht) + (make-vline% value (or ht (make-hash-table)))) + +(define-record-type + (make-vcomponent% type children parent attributes) + vcomponent? + (type component-type) + (children get-component-children set-component-children!) + (parent get-component-parent set-component-parent!) + (attributes get-component-attributes)) + +(define* (make-vcomponent #:optional (type 'VIRTUAL)) + (make-vcomponent% type '() #f (make-hash-table #x10))) + +(define (add-child! parent child) + (set-component-children! parent (cons child (get-component-children parent))) + (set-component-parent! child parent)) + +(define* (get-attribute-value component key #:optional default) + (cond [(hashq-ref (get-component-attributes component) + key #f) + => get-vline-value] + [else default])) + +(define (get-attribute component key) + (hashq-ref (get-component-attributes component) + key)) + +(define (set-attribute! component key value) + (let ((ht (get-component-attributes component))) + (cond [(hashq-ref ht key #f) + => (lambda (vline) (set-vline-value! vline value))] + [else (hashq-set! ht key (make-vline value))]))) + +(define (set-vline! component key vline) + (hashq-set! (get-component-attributes component) + key vline)) + + + +(define-record-type + (make-parse-ctx% filename row col ctx line-key param-key param-table) + parse-ctx? + (filename get-filename) ; string + (row get-row set-row!) ; [0, ] + (col get-col set-col!) ; [1, ) + (ctx get-ctx set-ctx!) ; '(key value param-name param-value escape) + (line-key get-line-key set-line-key!) ; string + (param-key get-param-key set-param-key!) ; string + (param-table get-param-table set-param-table!) ; hash-map + ) + +(define (make-parse-ctx filename) + (make-parse-ctx% filename 1 0 'key + #f #f (make-hash-table))) + +(define (increment-column! ctx) + (set-col! ctx (1+ (get-col ctx)))) + +(define (increment-row! ctx) + (set-col! ctx 0) + (set-row! ctx (1+ (get-row ctx)))) + + + +(define-record-type + (make-strbuf% len bytes) + strbuf? + (len get-length set-length!) + (bytes get-bytes set-bytes!)) + +(define (make-strbuf) + (make-strbuf% 0 (make-u8vector #x1000))) + +(define (strbuf-realloc! strbuf) + (let* ((len (u8vector-length (get-bytes strbuf))) + (nv (make-u8vector (ash len 1)))) + (bytevector-copy! (get-bytes strbuf) 0 + nv 0 len) + (set-bytes! strbuf nv))) + +(define (strbuf->string strbuf) + (let ((bv (make-u8vector (get-length strbuf)))) + (bytevector-copy! (get-bytes strbuf) 0 + bv 0 + (get-length strbuf)) + (bytevector->string bv (native-transcoder)))) ; TODO charset + +(define (strbuf-reset! strbuf) + (set-length! strbuf 0)) + +(define (strbuf-append! strbuf u8) + (catch 'out-of-range + (lambda () + (u8vector-set! (get-bytes strbuf) + (get-length strbuf) + u8)) + (lambda (err . args) + (strbuf-realloc! strbuf) + (strbuf-append! strbuf u8))) + (set-length! strbuf (1+ (get-length strbuf)))) + + + +(define (fold-proc ctx c) + ;; First extra character optionall read is to get the \n if our line + ;; ended with \r\n. Secound read is to get the first character of the + ;; next line. The initial \r which might recide in @var{c} is discarded. + (let ((pair (cons (if (char=? #\newline (integer->char c)) + c (get-u8 (current-input-port))) + (get-u8 (current-input-port))))) + (increment-row! ctx) + (cond [(not (char=? #\newline (integer->char (car pair)))) + (error "Expected newline after CR")] + + ;; The standard (3.4, l. 2675) says that each icalobject must + ;; end with CRLF. My files however does not. This means that + ;; an EOF can immideately follow a \n\r pair. But this case is the + ;; same as that we are at the end of line, so we spoof it and let + ;; the regular parser loop handle it. + [(eof-object? (cdr pair)) + 'end-of-line] + + ;; Following line begins with a whitespace character, + ;; meaning that we don't break the logical line here. + [(memv (integer->char (cdr pair)) '(#\space #\tab)) + (increment-column! ctx) ; since we just read the space + 'fold] + + [else + ;; TODO check if this failed, and signal a writeback error + (unget-char (current-input-port) + (integer->char (cdr pair))) + + 'end-of-line]))) + +(define (parse-calendar port) + (with-input-from-port port + (lambda () + (let ((component (make-vcomponent)) + (ctx (make-parse-ctx (port-filename port))) + (strbuf (make-strbuf))) + (with-throw-handler #t + (lambda () + + (set-attribute! component 'X-HNH-FILENAME + (get-filename ctx)) + + (while #t + (let ((c (get-u8 (current-input-port)))) + (cond + + ;; End of file + [(eof-object? c) + ;; == NOTE == + ;; We never check the final line here. But since it + ;; ALWAYS should be "END:VCOMPONENT", and we do all + ;; the setup at creation this shouldn't be a problem. + (break (case (get-ctx ctx) + [(key) ; line ended + (let ((root-component + (car (get-component-children component)))) + (set-component-parent! root-component #f) + root-component)] + [(value) ; still ending line + (set-component-parent! component #f) + component] + [else => (lambda (a) + (scm-error 'wrong-type-arg "parse-break" + (string-append + "Bad context at end of file. " + "Expected `key' or `value', got ~a") + (list a) #f))]))] + + ;; End of line + [(memv (integer->char c) '(#\return #\newline)) + (case (fold-proc ctx c) + [(end-of-line) + (let ((str (strbuf->string strbuf))) + (cond [(eq? (get-line-key ctx) 'BEGIN) + (let ((child (make-vcomponent (string->symbol str)))) + ;; TOOD remove this copying of attributes!!! + (for-each (lambda (pair) + (set-attribute! child + (car pair) + (cdr pair))) + (hash-map->list + cons (get-component-attributes component))) + (add-child! component child) + (set! component child))] + + [(eq? (get-line-key ctx) 'END) + (set! component (get-component-parent component))] + + [else + ;; TODO repeated keys + (set-vline! component (get-line-key ctx) + (make-vline str (get-param-table ctx))) + (set-param-table! ctx (make-hash-table))]) + + (strbuf-reset! strbuf) + (set-ctx! ctx 'key))] + [(fold) 'noop] ; Good case, here to catch errors in else + [else => (lambda (a) (error "Bad return from fold, unexpected" a))])] + + ;; Escaped characters + [(char=? #\\ (integer->char c)) + (case (integer->char (get-u8 (current-input-port))) + ;; Escape character '\' and escaped token sepparated by a newline + ;; (since the standard for some reason allows that (!!!)) + ;; We are at least guaranteed that it's a folded line, so just + ;; unfold it and continue trying to find a token to escape. + [(#\return #\newline) + => (lambda (c) + (case (fold-proc ctx (char->integer c)) + [(end-of-line) + (throw 'escape-error "ESC before not folded line")] + [(fold) + (increment-column! ctx) + (strbuf-append! strbuf (get-u8 (current-input-port)))]))] + + [(#\n #\N) (strbuf-append! strbuf (char->integer #\newline))] + [(#\; #\, #\\) => (lambda (c) (strbuf-append! strbuf (char->integer c)))] + [else => (lambda (c) (throw 'escape-error "Non-escapable character" c))]) + (increment-column! ctx)] + + ;; Delimiter between param key and param value + [(and (eq? (get-ctx ctx) 'param-name) + (char=? #\= (integer->char c))) + (set-param-key! ctx (string->symbol (strbuf->string strbuf))) + (strbuf-reset! strbuf) + (set-ctx! ctx 'param-value)] + + ;; Delimiter between parameters (;), or between + ;; "something" and attribute value (:) + [(and (memv (integer->char c) '(#\: #\;)) + (memv (get-ctx ctx) '(param-value key))) + (case (get-ctx ctx) + [(param-value) + (hashq-set! (get-param-table ctx) + (get-param-key ctx) + (strbuf->string strbuf)) + (strbuf-reset! strbuf)] + [(key) + (set-line-key! ctx (string->symbol (strbuf->string strbuf))) + (strbuf-reset! strbuf)]) + + (set-ctx! ctx (case (integer->char c) + [(#\:) 'value] + [(#\;) 'param-name]))] + + ;; Regular character + [else + (strbuf-append! strbuf c) + (increment-column! ctx)])))) + + (lambda _ + (format (current-error-port) + "== PARSE ERROR == +filename = ~a +row ~a column ~a ctx = ~a +~a ; ~a = ... : ...~%~%" + (get-filename ctx) + (get-row ctx) (get-col ctx) (get-ctx ctx) + (get-line-key ctx) (get-param-key ctx)))))))) + + + +(define-public (read-vcalendar path) + (define st (stat path)) + (case (stat:type st) + [(regular) (let ((comp (call-with-input-file path parse-calendar))) + (set-attribute! comp 'X-HNH-SOURCETYPE "file") + (list comp))] + [(directory) + (map (lambda (fname) + (call-with-input-file + (string-append path file-name-separator-string fname) + parse-calendar)) + (scandir path (lambda (s) (and (not (string= "." (string-take s 1))) + (string= "ics" (string-take-right s 3))))))] + [(block-special char-special fifo socket unknown symlink) + => (lambda (t) (error "Can't parse file of type " t))])) + + +(define-public (read-tree path) + (define list '()) + (ftw path + (lambda (filename statinfo flag) + (case flag + [(regular) + (case (stat:type statinfo) + [(regular) + (when (and (not (string= "." (string-take filename 1))) + (string= "ics" (string-take-right filename 3))) + (set! list (cons filename list))) + #t] + [else #t])] + [(directory) #t] + [else #f]))) + ((@ (ice-9 threads) n-par-map) 12 + (lambda (fname) (call-with-input-file fname parse-calendar)) + list)) + + +(export add-child! make-vcomponent get-vline-value set-vline-value! get-component-parent get-component-children get-attribute-value set-attribute! get-component-attributes component-type make-vcomponent% make-vline get-vline-parameters) diff --git a/module/vcomponent/primitive.scm b/module/vcomponent/primitive.scm deleted file mode 100644 index 5fef08cc..00000000 --- a/module/vcomponent/primitive.scm +++ /dev/null @@ -1,9 +0,0 @@ -;;; Primitive export of symbols linked from C binary. - -(define-module (vcomponent primitive) - #:export (make-vcomponent - add-line! add-child! - make-vline add-attribute! - parse-cal-path)) - -(load-extension "libguile-calendar" "init_lib") -- cgit v1.2.3