summaryrefslogtreecommitdiff
path: root/parse.scm
blob: 034e1ffd6cd977b8343864acfcc50a6297ad8ff4 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
(define-module (parse)
  #:export (csv-ref csv-length load-csv*))

(use-modules (srfi srfi-1)
             (srfi srfi-8)
             (srfi srfi-9)
             (srfi srfi-9 gnu)
             (srfi srfi-26)

             (ice-9 rdelim)
             (ice-9 peg)
             (ice-9 popen)

             (patterns)
             (macros arrow))

(define (md5-file file)
  (-> (string-append "md5sum " file)
      open-input-pipe
      read))

;; var is the field to test
;; symb is the expected value
(define-syntax-rule (ensure-symbol var symb)
  (unless (eqv? var symb)
    (throw 'symbol-not-equal "Expected ~s, got ~s~%" symb var)))

(define (read-csv port)
  "Read from port into a peg:tree."
  (let* ((rawstr (read-delimited "" port))
         (match (match-pattern file rawstr))
         (tree (keyword-flatten '(line)
                                (peg:tree match))))
    tree))

;;; TODO sometimes part is a symbol instead of a list,
;;; somehow an earlier part got consed on? or something?
(define (get-part-data part)
  "Get data from field-part cells"
  (if (symbol? part)
      #f
      (begin
        (ensure-symbol (car part) 'field-part)
        (cadr part))))



(define (flatten-1 lst)
  "If list is of length one return the first element,
otherwise return the full list"
  (if (= 1 (length lst))
      (car lst)
      lst))

;; ('line ('field ('field-part data) ...) ...)
;; -> ({(data ...) | data} ...)
(define (clean-peg-tree tree)
  "Clean up input from read-csv, returns a more bare bones tree"
  (map (lambda (line) ; for every line
         (ensure-symbol (car line) 'line)
         (map (lambda (field) ; for every field in line
                (and (not (symbol? field)) ; if a symbol no data is present
                     ;; else extract the data
                     (begin (ensure-symbol (car field) 'field)
                            (flatten-1 (map get-part-data (cdr field))))))
              (cdr line)))
       tree))

(define-record-type csv
  (make-csv data
            input-file input-hash
            cache-hash)
  csv?
  (data get-data)
  (input-file get-file)
  (input-hash get-hash)
  (cache-hash get-cache-hash))

(define (csv-printer record port)
  (format port "#<csv file: ~s, hash ~s, data ...>"
          (get-file record)
          (symbol->string (get-hash record))))
(set-record-type-printer! csv csv-printer)

(define (tree->vector tree)
  (let ((fields (map string->symbol (car tree)))
        (data (cdr tree)))
    (list->vector
     ;; TODO this assoc list ought to be a hash map
     (map (cut map cons fields <>) data))))

;;; Primitive load csv
(define (load-csv port)
  (-> port
      read-csv
      clean-peg-tree
      tree->vector))

(define (get-cache-file csv-file)
  (let ((path-word (string-join (string-split csv-file #\/)
                                "%")))
    (string-append (getenv "HOME")
                   "/.cache/csv-cache/" path-word)))

(define (write-cache csv-file csv-hash cache-hash)
  (with-output-to-file (string-append (get-cache-file csv-file) ".status")
    (lambda ()
      (write (list csv-hash cache-hash)))))

;;; TODO rewrite this 
(define (load-csv* filename)
  "Loads the csv file from disk, or from cache"
  (let* ((cache-file (get-cache-file filename))
         (cache-status-file (string-append cache-file ".status"))
         (cache-status (if (file-exists? cache-status-file)
                           (call-with-input-file cache-status-file read)
                           (list '#{0}# '#{0}#)))
         (csv-sum-expected (list-ref cache-status 0))
         (cache-sum-expected (list-ref cache-status 1))
         (csv-sum (md5-file filename))
         (cache-sum (md5-file cache-file)))
    (cond ((not (eqv? csv-sum csv-sum-expected))
           ;; file changed, rebuild cache
           (display "file changed, rebuild cache\n")
           (let ((data (call-with-input-file filename load-csv)))
             (write data (open-output-file cache-file))
             (write-cache filename csv-sum cache-sum)
             (make-csv data filename csv-sum cache-sum))
           )
          ((eqv? cache-sum cache-sum-expected)
           ;; use cache
           (display "use cache\n")
           (make-csv (call-with-input-file cache-file read)
                     filename
                     csv-sum cache-sum)
           )
          (else ; rebuild cache
           (display "rebuild cache\n")
           (let ((data (call-with-input-file filename load-csv)))
             (write data (open-output-file cache-file))
             (write-cache filename csv-sum cache-sum)
             (make-csv data filename csv-sum cache-sum))
           ))))

(define-syntax-rule (ensure-csv csv)
 (unless (csv? csv)
    (throw 'type-error "Expected csv, got something else~%")))

;;; TODO (csv? <csv-object>) sometimes returns false.

(define (csv-ref csv key row)
  #; (ensure-csv csv)
  (assoc-ref (vector-ref (get-data csv) row)
             key))

(define (csv-length csv)
  #; (ensure-csv csv)
  (vector-length (get-data csv)))