summaryrefslogtreecommitdiff
path: root/main.scm
blob: 7ea117275744b6f389dd13449d982628685740f6 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
#!/usr/bin/guile \
-e main -s
!#

(add-to-load-path "/home/hugo/code/calp/module")

(use-modules
 (web client)
 (web response)
 (sxml simple)
 (sxml xpath)
 (sxml transform)
 (ice-9 regex)
 (ice-9 getopt-long)

 ;; following are from calp
 (sxml namespace)
 )



(define transformer
  `((rss:item
     . ,(lambda (key . children)
          (define tag (cons key children))
          (call-with-values (lambda () (apply values ((sxpath '(rss:title *text*)) tag)))
            (case-lambda (() tag)
                         ((title . _)
                          (if (string-match "^\\[\\$\\]" title)
                              '() tag))))))))

(define rss-namespaces
  '((rdf . "http://www.w3.org/1999/02/22-rdf-syntax-ns#")
    (dc . "http://purl.org/dc/elements/1.1/")
    (syn . "http://purl.org/rss/1.0/modules/syndication/")
    (rss . "http://purl.org/rss/1.0/")
    (#f . "http://purl.org/rss/1.0/")
    ))

(define (parse-rss port)
  (xml->sxml port #:namespaces rss-namespaces))

(define (namespaces->sxml namespaces)
  (map (lambda (namespace)
         (list (string->symbol
                 (if (not (car namespace))
                   "xmlns"
                   (string-append "xmlns:" (symbol->string (car namespace)))))
               (cdr namespace)))
       namespaces))


(define (filter-tree transformers tree)
  (pre-post-order
   tree
   `(,@transformer
      (rdf:RDF . ,(lambda (tag . children)
                    (cond (((sxpath '(@)) (cons tag children))
                           (negate null?)
                           => (lambda (attributes)
                                (write attributes (current-error-port)) (newline (current-error-port))
                                `(,tag
                                   (@ ,@(namespaces->sxml rss-namespaces)
                                      ,@(cdar attributes))
                                   ,@(cdr children))))
                          (else
                            `(,tag (@ ,@(namespaces->sxml rss-namespaces))
                                   ,@(cdr children))))))
     (*text* . ,(lambda (_ x . xs) x))
     (*default* . ,(lambda (item  . children)
                     (cons item children))))) )

(define option-spec
  `((output (single-char #\o)
            (required? #t)
            (value #t))))

(define (main args)

  (define opts (getopt-long args option-spec))

  (define response
    (http-get "https://lwn.net/headlines/Features"
              #:streaming? #t))

  (unless (= 200 (response-code response))
    (format (current-error-port) "HTTP error ~a"
            (response-code response))
    (exit 1))

  (with-output-to-file (option-ref opts 'output #f)
    (lambda ()
      (sxml->xml
       ;; Removing the rss prefix should be fine, but at least NewNewsWire
       ;; (for iPad) expects all rss elements to be un-namespaced.
       (move-to-namespace
        (filter-tree
         transformer
         (parse-rss (response-body-port response)))
        '((rss . #f))
        )))))