From 4cfb8ec5e6dad161dfefb683a64490d468caad7e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Hugo=20H=C3=B6rnquist?= Date: Sat, 2 Nov 2019 22:26:18 +0100 Subject: Move parser into module subtree. --- src/parse.scm | 293 ---------------------------------------------------------- 1 file changed, 293 deletions(-) delete mode 100644 src/parse.scm (limited to 'src') diff --git a/src/parse.scm b/src/parse.scm deleted file mode 100644 index b11240df..00000000 --- a/src/parse.scm +++ /dev/null @@ -1,293 +0,0 @@ - -(define-module (parse) - :use-module (rnrs io ports) - :use-module (rnrs bytevectors) - :use-module (srfi srfi-9) - :use-module ((ice-9 textual-ports) :select (unget-char)) - :use-module ((ice-9 ftw) :select (scandir ftw))) - - - -(define-record-type - (make-vcomponent% type children parent attributes) - vcomponent? - (type component-type) - (children get-component-children set-component-children!) - (parent get-component-parent set-component-parent!) - (attributes get-component-attributes)) - -(define* (make-vcomponent #:optional (type 'VIRTUAL)) - (make-vcomponent% type '() #f (make-hash-table #x10))) - -(define (add-child! parent child) - (set-component-children! parent (cons child (get-component-children parent))) - (set-component-parent! child parent)) - -(define* (get-attribute-value component key #:optional default) - (cond [(hashq-ref (get-component-attributes component) - key #f) - => cdr] - [else default])) - -(define (get-attribute component key) - (hashq-ref (get-component-attributes component) - key)) - -(define (set-attribute! component key value) - (let ((ht (get-component-attributes component))) - (cond [(hashq-ref ht key #f) - => (lambda (pair) (set-cdr! pair value))] - [else (hashq-set! ht key (cons (make-hash-table) value))]))) - - - -(define-record-type - (make-parse-ctx% filename row col ctx line-key param-key param-table) - parse-ctx? - (filename get-filename) ; string - (row get-row set-row!) ; [0, ] - (col get-col set-col!) ; [1, ) - (ctx get-ctx set-ctx!) ; '(key value param-name param-value escape) - (line-key get-line-key set-line-key!) ; string - (param-key get-param-key set-param-key!) ; string - (param-table get-param-table set-param-table!) ; hash-map - ) - -(define (make-parse-ctx filename) - (make-parse-ctx% filename 1 0 'key - #f #f (make-hash-table))) - -(define (increment-column! ctx) - (set-col! ctx (1+ (get-col ctx)))) - -(define (increment-row! ctx) - (set-col! ctx 0) - (set-row! ctx (1+ (get-row ctx)))) - - - -(define-record-type - (make-strbuf% len bytes) - strbuf? - (len get-length set-length!) - (bytes get-bytes set-bytes!)) - -(define (make-strbuf) - (make-strbuf% 0 (make-u8vector #x1000))) - -(define (strbuf-realloc! strbuf) - (let* ((len (u8vector-length (get-bytes strbuf))) - (nv (make-u8vector (ash len 1)))) - (bytevector-copy! (get-bytes strbuf) 0 - nv 0 len) - (set-bytes! strbuf nv))) - -(define (strbuf->string strbuf) - (let ((bv (make-u8vector (get-length strbuf)))) - (bytevector-copy! (get-bytes strbuf) 0 - bv 0 - (get-length strbuf)) - (bytevector->string bv (native-transcoder)))) ; TODO charset - -(define (strbuf-reset! strbuf) - (set-length! strbuf 0)) - -(define (strbuf-append! strbuf u8) - (catch 'out-of-range - (lambda () - (u8vector-set! (get-bytes strbuf) - (get-length strbuf) - u8)) - (lambda (err . args) - (strbuf-realloc! strbuf) - (strbuf-append! strbuf u8))) - (set-length! strbuf (1+ (get-length strbuf)))) - - - -(define (fold-proc ctx c) - ;; First extra character optionall read is to get the \n if our line - ;; ended with \r\n. Secound read is to get the first character of the - ;; next line. The initial \r which might recide in @var{c} is discarded. - (let ((pair (cons (if (char=? #\newline (integer->char c)) - c (get-u8 (current-input-port))) - (get-u8 (current-input-port))))) - (increment-row! ctx) - (cond [(not (char=? #\newline (integer->char (car pair)))) - (error "Expected newline after CR")] - - ;; The standard (3.4, l. 2675) says that each icalobject must - ;; end with CRLF. My files however does not. This means that - ;; an EOF can immideately follow a \n\r pair. But this case is the - ;; same as that we are at the end of line, so we spoof it and let - ;; the regular parser loop handle it. - [(eof-object? (cdr pair)) - 'end-of-line] - - ;; Following line begins with a whitespace character, - ;; meaning that we don't break the logical line here. - [(memv (integer->char (cdr pair)) '(#\space #\tab)) - (increment-column! ctx) ; since we just read the space - 'fold] - - [else - ;; TODO check if this failed, and signal a writeback error - (unget-char (current-input-port) - (integer->char (cdr pair))) - - 'end-of-line]))) - -(define (parse-calendar port) - (with-input-from-port port - (lambda () - (let ((component (make-vcomponent)) - (ctx (make-parse-ctx (port-filename port))) - (strbuf (make-strbuf))) - (with-throw-handler #t - (lambda () - (while #t - (let ((c (get-u8 (current-input-port)))) - (cond - - ;; End of file - [(eof-object? c) - ;; == NOTE == - ;; We never check the final line here. But since it - ;; ALWAYS should be "END:VCOMPONENT", and we do all - ;; the setup at creation this shouldn't be a problem. - (break (case (get-ctx ctx) - [(key) ; line ended - (let ((root-component - (car (get-component-children component)))) - (set-component-parent! root-component #f) - root-component)] - [(value) ; still ending line - (set-component-parent! component #f) - component] - [else => (lambda (a) - (scm-error 'wrong-type-arg "parse-break" - (string-append - "Bad context at end of file. " - "Expected `key' or `value', got ~a") - (list a) #f))]))] - - ;; End of line - [(memv (integer->char c) '(#\return #\newline)) - (case (fold-proc ctx c) - [(end-of-line) - (let ((str (strbuf->string strbuf))) - (cond [(string=? (get-line-key ctx) "BEGIN") - (let ((child (make-vcomponent (string->symbol str)))) - (add-child! component child) - (set! component child))] - - [(string=? (get-line-key ctx) "END") - (set! component (get-component-parent component))] - - [else - (let ((ht (get-component-attributes component))) - ;; TODO repeated keys - (hashq-set! ht (string->symbol (get-line-key ctx)) - (cons (get-param-table ctx) - str)) - (set-param-table! ctx (make-hash-table)))]) - - (strbuf-reset! strbuf) - (set-ctx! ctx 'key))] - [(fold) 'noop] ; Good case, here to catch errors in else - [else => (lambda (a) (error "Bad return from fold, unexpected" a))])] - - ;; Escaped characters - [(char=? #\\ (integer->char c)) - (case (integer->char (get-u8 (current-input-port))) - ;; Escape character '\' and escaped token sepparated by a newline - ;; (since the standard for some reason allows that (!!!)) - ;; We are at least guaranteed that it's a folded line, so just - ;; unfold it and continue trying to find a token to escape. - [(#\return #\newline) - => (lambda (c) - (case (fold-proc ctx (char->integer c)) - [(end-of-line) - (throw 'escape-error "ESC before not folded line")] - [(fold) - (increment-column! ctx) - (strbuf-append! strbuf (get-u8 (current-input-port)))]))] - - [(#\n #\N) (strbuf-append! strbuf (char->integer #\newline))] - [(#\; #\, #\\) => (lambda (c) (strbuf-append! strbuf (char->integer c)))] - [else => (lambda (c) (throw 'escape-error "Non-escapable character" c))]) - (increment-column! ctx)] - - ;; Delimiter between param key and param value - [(and (eq? (get-ctx ctx) 'panam-name) (char=? #\= (integer->char c))) - (set-param-key! ctx (strbuf->string strbuf)) - (strbuf-reset! strbuf) - (set-ctx! ctx 'param-value)] - - ;; Delimiter between parameters (;), or between - ;; "something" and attribute value (:) - [(memv (integer->char c) '(#\: #\;)) - (case (get-ctx ctx) - [(param-value) - (hashq-set! (get-param-table ctx) - (get-param-key ctx) - (strbuf->string strbuf)) - (strbuf-reset! strbuf)] - [(key) - (set-line-key! ctx (strbuf->string strbuf)) - (strbuf-reset! strbuf)]) - - (set-ctx! ctx (case (integer->char c) - [(#\:) 'value] - [(#\;) 'param-name]))] - - ;; Regular character - [else - (strbuf-append! strbuf c) - (increment-column! ctx)])))) - - (lambda _ - (format (current-error-port) - "== PARSE ERROR == -filename = ~a -row ~a column ~a ctx = ~a -~a ; ~a = ... : ...~%~%" - (get-filename ctx) - (get-row ctx) (get-col ctx) (get-ctx ctx) - (get-line-key ctx) (get-param-key ctx)))))))) - - - -(define-public (read-vcalendar path) - (define st (stat path)) - (case (stat:type st) - [(regular) (list (call-with-input-file path parse-calendar))] - [(directory) - (map (lambda (fname) - (call-with-input-file - (string-append path file-name-separator-string fname) - parse-calendar)) - (scandir path (lambda (s) (and (not (string= "." (string-take s 1))) - (string= "ics" (string-take-right s 3))))))] - [(block-special char-special fifo socket unknown symlink) - => (lambda (t) (error "Can't parse file of type " t))])) - - -(define-public (read-tree path) - (define list '()) - (ftw path - (lambda (filename statinfo flag) - (case flag - [(regular) - (case (stat:type statinfo) - [(regular) - (when (and (not (string= "." (string-take filename 1))) - (string= "ics" (string-take-right filename 3))) - (set! list (cons filename list))) - #t] - [else #t])] - [(directory) #t] - [else #f]))) - ((@ (ice-9 threads) n-par-map) 12 - (lambda (fname) (call-with-input-file fname parse-calendar)) - list)) -- cgit v1.2.3