summaryrefslogtreecommitdiff
path: root/module
diff options
context:
space:
mode:
authorHugo Hörnquist <hugo@lysator.liu.se>2021-12-30 17:35:30 +0100
committerHugo Hörnquist <hugo@lysator.liu.se>2021-12-30 17:36:07 +0100
commitbeb3d107fc9d922a6c4e78a11e5be1f1ef1c46c0 (patch)
tree5ecf8e90002b6edc51580aabd514812c71249ccb /module
parentImprove --help output. (diff)
downloadrss-filter-beb3d107fc9d922a6c4e78a11e5be1f1ef1c46c0.tar.gz
rss-filter-beb3d107fc9d922a6c4e78a11e5be1f1ef1c46c0.tar.xz
Move stuff to module dir.
Diffstat (limited to 'module')
-rw-r--r--module/rss-filter.scm45
-rw-r--r--module/rss-filter/feed-handler.scm53
2 files changed, 98 insertions, 0 deletions
diff --git a/module/rss-filter.scm b/module/rss-filter.scm
new file mode 100644
index 0000000..748fa77
--- /dev/null
+++ b/module/rss-filter.scm
@@ -0,0 +1,45 @@
+(define-module (rss-filter)
+ :export (handle-feed)
+ :use-module (ice-9 curried-definitions)
+ :use-module (rss-filter feed-handler)
+ :use-module (web client)
+ :use-module (web response)
+ :use-module (sxml simple)
+ :use-module (sxml xpath)
+ :use-module (sxml namespace)
+ :use-module ((calp util) :select (path-append))
+ )
+
+(define ((handle-feed output-directory quiet?) feed)
+ (define response
+ (begin
+ (when (not quiet?)
+ (format (current-error-port) "Fetching ~a~%" (feed-url feed)))
+ ;; TODO follow redirects
+ (http-get (feed-url feed)
+ #:streaming? #t)))
+
+ (unless (= 200 (response-code response))
+ (format (current-error-port) "HTTP error ~a"
+ (response-code response))
+ (exit 1))
+
+ (let* ((feed-content (move-to-namespace (parse-rss (response-body-port response))
+ '((#f . rss))))
+ (feed-title (car ((sxpath '(// rss:channel rss:title *text*)) feed-content)))
+ (safe-title (string-map (lambda (c) (if (char-set-contains? char-set:letter+digit c)
+ c #\_))
+ feed-title)))
+
+
+ (let ((output (filter-tree (feed-transformer feed)
+ feed-content))
+ (filename (string-append
+ (path-append output-directory safe-title)
+ ".rss")))
+ (with-output-to-file filename
+ (lambda ()
+ (sxml->xml
+ ;; Removing the rss prefix should be fine, but at least NewNewsWire
+ ;; (for iPad) expects all rss elements to be un-namespaced.
+ (move-to-namespace output '((rss . #f)))))))))
diff --git a/module/rss-filter/feed-handler.scm b/module/rss-filter/feed-handler.scm
new file mode 100644
index 0000000..eef1ca7
--- /dev/null
+++ b/module/rss-filter/feed-handler.scm
@@ -0,0 +1,53 @@
+(define-module (rss-filter feed-handler)
+ :use-module (rnrs records syntactic)
+ :use-module (sxml simple)
+ :use-module (sxml transform)
+ :use-module (sxml xpath)
+ :export (make-feed feed-url feed-transformer
+ parse-rss filter-tree))
+
+(define-record-type feed
+ (fields url transformer))
+
+
+(define rss-namespaces
+ '((rdf . "http://www.w3.org/1999/02/22-rdf-syntax-ns#")
+ (dc . "http://purl.org/dc/elements/1.1/")
+ (syn . "http://purl.org/rss/1.0/modules/syndication/")
+ (rss . "http://purl.org/rss/1.0/")
+ (atom . "http://www.w3.org/2005/Atom")
+ (#f . "http://purl.org/rss/1.0/")
+ ))
+
+(define (parse-rss port)
+ (xml->sxml port #:namespaces rss-namespaces))
+
+(define (namespaces->sxml namespaces)
+ (map (lambda (namespace)
+ (list (string->symbol
+ (if (not (car namespace))
+ "xmlns"
+ (string-append "xmlns:" (symbol->string (car namespace)))))
+ (cdr namespace)))
+ namespaces))
+
+
+(define (filter-tree transformers tree)
+ (pre-post-order
+ tree
+ `(,@transformers
+ (rdf:RDF . ,(lambda (tag . children)
+ (cond (((sxpath '(@)) (cons tag children))
+ (negate null?)
+ => (lambda (attributes)
+ (write attributes (current-error-port)) (newline (current-error-port))
+ `(,tag
+ (@ ,@(namespaces->sxml rss-namespaces)
+ ,@(cdar attributes))
+ ,@(cdr children))))
+ (else
+ `(,tag (@ ,@(namespaces->sxml rss-namespaces))
+ ,@(cdr children))))))
+ (*text* . ,(lambda (_ x . xs) x))
+ (*default* . ,(lambda (item . children)
+ (cons item children))))) )