summaryrefslogtreecommitdiff
path: root/main.scm
blob: 4094ebda2ea6dacf06a07cf28ef57d6a726707d7 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
#!/usr/bin/guile \
-e main -s
!#

(add-to-load-path "/home/hugo/code/calp/module")

(use-modules
 (web client)
 (web response)
 (sxml simple)
 (sxml xpath)
 (sxml transform)
 (ice-9 regex)
 (ice-9 getopt-long)
 (ice-9 curried-definitions)
 (ice-9 format)
 (rnrs records syntactic)

 ;; following are from calp
 (sxml namespace)
 ((calp util) :select (path-append))
 ((datetime) :select (datetime datetime->string))
 )


(define (parse-month str)
  "Get month number from (a shortened) monthname.
Returns -1 on failure"
  (let loop ((i 1)
             (months
              (list "january" "february" "march" "april" "may" "june" "july"
                    "august" "september" "october" "november" "december")))


    (if (null? months)
        -1
        (let ((len (min (string-length (car months))
                        (string-length str))))
          (if (string=?
               (string-take (string-downcase str) len)
               (string-take (car months) len))
              i
              (loop (1+ i) (cdr months)))))))


(define-record-type feed
  (fields url transformer))

(define (parse-date str)
  (and=> (string-match "([a-zA-Z]*)\\. ([0-9]+), ([0-9]+), ([0-9]+):([0-9]+) ([ap])\\.m\\." str)
         (lambda (m)
           (let ((month (match:substring m 1))
                 (day (string->number (match:substring m 2)))
                 (year (string->number (match:substring m 3)))
                 (hour (string->number (match:substring m 4)))
                 (minute (string->number (match:substring m 5)))
                 (am-pm (match:substring m 6)))
             (datetime #:year year
                       #:month (parse-month month)
                       #:day day
                       #:hour (+ hour (if (string=? am-pm "p") 12 0))
                       #:minute minute)))))


(define feeds
  (list
   (make-feed
    "https://lwn.net/headlines/Features"
    `((rss:item
       . ,(lambda (key . children)
            (define tag (cons key children))
            (call-with-values (lambda () (apply values ((sxpath '(rss:title *text*)) tag)))
              (case-lambda (() tag)
                           ((title . _)
                            (if (string-match "^\\[\\$\\]" title)
                                '() tag))))))))

   (make-feed
    "https://swordscomic.com/comic/feed/"
    `((rss:pubDate
       . ,(lambda (key . children)
            (cond ((parse-date (car children))
                   => (lambda (dt)
                        (list key (datetime->string dt "~Y-~m-~dT~H:~M:~S"))))
                  (else (cons key children)))))

      (rss:description
       . ,(lambda (key . children)
            (cons key
                  ;; Each entry has a <style/> tag at the
                  ;; beggining, which brakes the short preview on
                  ;; NetNewsWire. This removes it
                  (cond ((string-match "</style>" (car children))
                         => (lambda (m)
                              (list (string-drop (car children) (match:end m)))))
                        (else children)))))
      ))
   ))

(define rss-namespaces
  '((rdf . "http://www.w3.org/1999/02/22-rdf-syntax-ns#")
    (dc . "http://purl.org/dc/elements/1.1/")
    (syn . "http://purl.org/rss/1.0/modules/syndication/")
    (rss . "http://purl.org/rss/1.0/")
    (atom . "http://www.w3.org/2005/Atom")
    (#f . "http://purl.org/rss/1.0/")
    ))

(define (parse-rss port)
  (xml->sxml port #:namespaces rss-namespaces))

(define (namespaces->sxml namespaces)
  (map (lambda (namespace)
         (list (string->symbol
                (if (not (car namespace))
                    "xmlns"
                    (string-append "xmlns:" (symbol->string (car namespace)))))
               (cdr namespace)))
       namespaces))


(define (filter-tree transformers tree)
  (pre-post-order
   tree
   `(,@transformers
     (rdf:RDF . ,(lambda (tag . children)
                   (cond (((sxpath '(@)) (cons tag children))
                          (negate null?)
                          => (lambda (attributes)
                               (write attributes (current-error-port)) (newline (current-error-port))
                               `(,tag
                                 (@ ,@(namespaces->sxml rss-namespaces)
                                    ,@(cdar attributes))
                                 ,@(cdr children))))
                         (else
                          `(,tag (@ ,@(namespaces->sxml rss-namespaces))
                                 ,@(cdr children))))))
     (*text* . ,(lambda (_ x . xs) x))
     (*default* . ,(lambda (item  . children)
                     (cons item children))))) )

(define option-spec
  `((output (single-char #\o)
            (required? #f)
            (value #t))))

(define ((handle-feed output-directory) feed)
  (define response
    ;; TODO follow redirects
    (http-get (feed-url feed)
              #:streaming? #t))

  (unless (= 200 (response-code response))
    (format (current-error-port) "HTTP error ~a"
            (response-code response))
    (exit 1))

  (let* ((feed-content (move-to-namespace (parse-rss (response-body-port response))
                                          '((#f . rss))))
         (feed-title (car ((sxpath '(// rss:channel rss:title *text*)) feed-content)))
         (safe-title (string-map (lambda (c) (if (char-set-contains? char-set:letter+digit c)
                                            c #\_))
                                 feed-title)))


    (let ((output (filter-tree (feed-transformer feed)
                               feed-content))
          (filename (string-append
                     (path-append output-directory safe-title)
                     ".rss")))
      (with-output-to-file filename
        (lambda ()
          (sxml->xml
           ;; Removing the rss prefix should be fine, but at least NewNewsWire
           ;; (for iPad) expects all rss elements to be un-namespaced.
           (move-to-namespace output '((rss . #f)))))))))

(define (main args)

  (define opts (getopt-long args option-spec))
  (define output-directory (option-ref opts 'output "."))

  (for-each (handle-feed output-directory) feeds))