1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
|
#!/usr/bin/guile \
-e main -s
!#
(add-to-load-path "/home/hugo/code/calp/module")
(use-modules
(web client)
(web response)
(sxml simple)
(sxml xpath)
(sxml transform)
(ice-9 regex)
(ice-9 getopt-long)
;; following are from calp
(sxml namespace)
)
(define transformer
`((rss:item
. ,(lambda (key . children)
(define tag (cons key children))
(call-with-values (lambda () (apply values ((sxpath '(rss:title *text*)) tag)))
(case-lambda (() tag)
((title . _)
(if (string-match "^\\[\\$\\]" title)
'() tag))))))))
(define rss-namespaces
'((rdf . "http://www.w3.org/1999/02/22-rdf-syntax-ns#")
(dc . "http://purl.org/dc/elements/1.1/")
(syn . "http://purl.org/rss/1.0/modules/syndication/")
(rss . "http://purl.org/rss/1.0/")
(#f . "http://purl.org/rss/1.0/")
))
(define (parse-rss port)
(xml->sxml port #:namespaces rss-namespaces))
(define (namespaces->sxml namespaces)
(map (lambda (namespace)
(list (string->symbol
(if (not (car namespace))
"xmlns"
(string-append "xmlns:" (symbol->string (car namespace)))))
(cdr namespace)))
namespaces))
(define (filter-tree transformers tree)
(pre-post-order
tree
`(,@transformer
(rdf:RDF . ,(lambda (tag . children)
(cond (((sxpath '(@)) (cons tag children))
(negate null?)
=> (lambda (attributes)
(write attributes (current-error-port)) (newline (current-error-port))
`(,tag
(@ ,@(namespaces->sxml rss-namespaces)
,@(cdar attributes))
,@(cdr children))))
(else
`(,tag (@ ,@(namespaces->sxml rss-namespaces))
,@(cdr children))))))
(*text* . ,(lambda (_ x . xs) x))
(*default* . ,(lambda (item . children)
(cons item children))))) )
(define option-spec
`((output (single-char #\o)
(required? #t)
(value #t))))
(define (main args)
(define opts (getopt-long args option-spec))
(define response
(http-get "https://lwn.net/headlines/Features"
#:streaming? #t))
(unless (= 200 (response-code response))
(format (current-error-port) "HTTP error ~a"
(response-code response))
(exit 1))
(with-output-to-file (option-ref opts 'output #f)
(lambda ()
(sxml->xml
;; Removing the rss prefix should be fine, but at least NewNewsWire
;; (for iPad) expects all rss elements to be un-namespaced.
(move-to-namespace
(filter-tree
transformer
(parse-rss (response-body-port response)))
'((rss . #f))
)))))
|