(define-module (parse) #:export (csv-ref csv-length load-csv*)) (use-modules (srfi srfi-1) (srfi srfi-8) (srfi srfi-9) (srfi srfi-9 gnu) (srfi srfi-26) (ice-9 rdelim) (ice-9 peg) (ice-9 popen) (patterns) (macros arrow)) (define (md5-file file) (-> (string-append "md5sum " file) open-input-pipe read)) ;; var is the field to test ;; symb is the expected value (define-syntax-rule (ensure-symbol var symb) (unless (eqv? var symb) (throw 'symbol-not-equal "Expected ~s, got ~s~%" symb var))) (define (read-csv port) "Read from port into a peg:tree." (let* ((rawstr (read-delimited "" port)) (match (match-pattern file rawstr)) (tree (keyword-flatten '(line) (peg:tree match)))) tree)) ;;; TODO sometimes part is a symbol instead of a list, ;;; somehow an earlier part got consed on? or something? (define (get-part-data part) "Get data from field-part cells" (if (symbol? part) #f (begin (ensure-symbol (car part) 'field-part) (cadr part)))) (define (flatten-1 lst) "If list is of length one return the first element, otherwise return the full list" (if (= 1 (length lst)) (car lst) lst)) ;; ('line ('field ('field-part data) ...) ...) ;; -> ({(data ...) | data} ...) (define (clean-peg-tree tree) "Clean up input from read-csv, returns a more bare bones tree" (map (lambda (line) ; for every line (ensure-symbol (car line) 'line) (map (lambda (field) ; for every field in line (and (not (symbol? field)) ; if a symbol no data is present ;; else extract the data (begin (ensure-symbol (car field) 'field) (flatten-1 (map get-part-data (cdr field)))))) (cdr line))) tree)) (define-record-type csv (make-csv data input-file input-hash cache-hash) csv? (data get-data) (input-file get-file) (input-hash get-hash) (cache-hash get-cache-hash)) (define (csv-printer record port) (format port "#" (get-file record) (symbol->string (get-hash record)))) (set-record-type-printer! csv csv-printer) (define (tree->vector tree) (let ((fields (map string->symbol (car tree))) (data (cdr tree))) (list->vector ;; TODO this assoc list ought to be a hash map (map (cut map cons fields <>) data)))) ;;; Primitive load csv (define (load-csv port) (-> port read-csv clean-peg-tree tree->vector)) (define (get-cache-file csv-file) (let ((path-word (string-join (string-split csv-file #\/) "%"))) (string-append (getenv "HOME") "/.cache/csv-cache/" path-word))) (define (write-cache csv-file csv-hash cache-hash) (with-output-to-file (string-append (get-cache-file csv-file) ".status") (lambda () (write (list csv-hash cache-hash))))) ;;; TODO rewrite this (define (load-csv* filename) "Loads the csv file from disk, or from cache" (let* ((cache-file (get-cache-file filename)) (cache-status-file (string-append cache-file ".status")) (cache-status (if (file-exists? cache-status-file) (call-with-input-file cache-status-file read) (list '#{0}# '#{0}#))) (csv-sum-expected (list-ref cache-status 0)) (cache-sum-expected (list-ref cache-status 1)) (csv-sum (md5-file filename)) (cache-sum (md5-file cache-file))) (cond ((not (eqv? csv-sum csv-sum-expected)) ;; file changed, rebuild cache (display "file changed, rebuild cache\n") (let ((data (call-with-input-file filename load-csv))) (write data (open-output-file cache-file)) (write-cache filename csv-sum cache-sum) (make-csv data filename csv-sum cache-sum)) ) ((eqv? cache-sum cache-sum-expected) ;; use cache (display "use cache\n") (make-csv (call-with-input-file cache-file read) filename csv-sum cache-sum) ) (else ; rebuild cache (display "rebuild cache\n") (let ((data (call-with-input-file filename load-csv))) (write data (open-output-file cache-file)) (write-cache filename csv-sum cache-sum) (make-csv data filename csv-sum cache-sum)) )))) (define-syntax-rule (ensure-csv csv) (unless (csv? csv) (throw 'type-error "Expected csv, got something else~%"))) ;;; TODO (csv? ) sometimes returns false. (define (csv-ref csv key row) #; (ensure-csv csv) (assoc-ref (vector-ref (get-data csv) row) key)) (define (csv-length csv) #; (ensure-csv csv) (vector-length (get-data csv)))