blob: d0459b2944ab3403f79808947ad2e7ee55c16dd8 (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
|
#!/usr/bin/guile \
-e main -s
!#
(use-modules
(web client)
(web response)
(sxml simple)
(sxml xpath)
(sxml transform)
(ice-9 regex)
)
(define transformer
`((rss:item
. ,(lambda (key . children)
(define tag (cons key children))
(call-with-values (lambda () (apply values ((sxpath '(rss:title *text*)) tag)))
(case-lambda (() tag)
((title . _)
(if (string-match "^\\[\\$\\]" title)
'() tag))))))))
(define (parse-rss port)
(xml->sxml port
#:namespaces
'((rdf . "http://www.w3.org/1999/02/22-rdf-syntax-ns#")
(dc . "http://purl.org/dc/elements/1.1/")
(syn . "http://purl.org/rss/1.0/modules/syndication/")
(rss . "http://purl.org/rss/1.0/"))) )
(define (filter-tree transformers tree)
(pre-post-order
tree
`(,@transformer
(*text* . ,(lambda (_ x . xs) x))
(*default* . ,(lambda (item . children)
(cons item children))))) )
(define (main args)
(define response
(http-get "https://lwn.net/headlines/Features"
#:streaming? #t))
(unless (= 200 (response-code response))
(format (current-error-port) "HTTP error ~a"
(response-code response))
(exit 1))
(with-output-to-file "lwn-unlocked-features.xml"
(lambda ()
(sxml->xml
(filter-tree
transformer
(parse-rss (response-body-port response)))))))
|