From beb3d107fc9d922a6c4e78a11e5be1f1ef1c46c0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Hugo=20H=C3=B6rnquist?= Date: Thu, 30 Dec 2021 17:35:30 +0100 Subject: Move stuff to module dir. --- module/rss-filter.scm | 45 ++++++++++++++++++++++++++++++++ module/rss-filter/feed-handler.scm | 53 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 98 insertions(+) create mode 100644 module/rss-filter.scm create mode 100644 module/rss-filter/feed-handler.scm (limited to 'module') diff --git a/module/rss-filter.scm b/module/rss-filter.scm new file mode 100644 index 0000000..748fa77 --- /dev/null +++ b/module/rss-filter.scm @@ -0,0 +1,45 @@ +(define-module (rss-filter) + :export (handle-feed) + :use-module (ice-9 curried-definitions) + :use-module (rss-filter feed-handler) + :use-module (web client) + :use-module (web response) + :use-module (sxml simple) + :use-module (sxml xpath) + :use-module (sxml namespace) + :use-module ((calp util) :select (path-append)) + ) + +(define ((handle-feed output-directory quiet?) feed) + (define response + (begin + (when (not quiet?) + (format (current-error-port) "Fetching ~a~%" (feed-url feed))) + ;; TODO follow redirects + (http-get (feed-url feed) + #:streaming? #t))) + + (unless (= 200 (response-code response)) + (format (current-error-port) "HTTP error ~a" + (response-code response)) + (exit 1)) + + (let* ((feed-content (move-to-namespace (parse-rss (response-body-port response)) + '((#f . rss)))) + (feed-title (car ((sxpath '(// rss:channel rss:title *text*)) feed-content))) + (safe-title (string-map (lambda (c) (if (char-set-contains? char-set:letter+digit c) + c #\_)) + feed-title))) + + + (let ((output (filter-tree (feed-transformer feed) + feed-content)) + (filename (string-append + (path-append output-directory safe-title) + ".rss"))) + (with-output-to-file filename + (lambda () + (sxml->xml + ;; Removing the rss prefix should be fine, but at least NewNewsWire + ;; (for iPad) expects all rss elements to be un-namespaced. + (move-to-namespace output '((rss . #f))))))))) diff --git a/module/rss-filter/feed-handler.scm b/module/rss-filter/feed-handler.scm new file mode 100644 index 0000000..eef1ca7 --- /dev/null +++ b/module/rss-filter/feed-handler.scm @@ -0,0 +1,53 @@ +(define-module (rss-filter feed-handler) + :use-module (rnrs records syntactic) + :use-module (sxml simple) + :use-module (sxml transform) + :use-module (sxml xpath) + :export (make-feed feed-url feed-transformer + parse-rss filter-tree)) + +(define-record-type feed + (fields url transformer)) + + +(define rss-namespaces + '((rdf . "http://www.w3.org/1999/02/22-rdf-syntax-ns#") + (dc . "http://purl.org/dc/elements/1.1/") + (syn . "http://purl.org/rss/1.0/modules/syndication/") + (rss . "http://purl.org/rss/1.0/") + (atom . "http://www.w3.org/2005/Atom") + (#f . "http://purl.org/rss/1.0/") + )) + +(define (parse-rss port) + (xml->sxml port #:namespaces rss-namespaces)) + +(define (namespaces->sxml namespaces) + (map (lambda (namespace) + (list (string->symbol + (if (not (car namespace)) + "xmlns" + (string-append "xmlns:" (symbol->string (car namespace))))) + (cdr namespace))) + namespaces)) + + +(define (filter-tree transformers tree) + (pre-post-order + tree + `(,@transformers + (rdf:RDF . ,(lambda (tag . children) + (cond (((sxpath '(@)) (cons tag children)) + (negate null?) + => (lambda (attributes) + (write attributes (current-error-port)) (newline (current-error-port)) + `(,tag + (@ ,@(namespaces->sxml rss-namespaces) + ,@(cdar attributes)) + ,@(cdr children)))) + (else + `(,tag (@ ,@(namespaces->sxml rss-namespaces)) + ,@(cdr children)))))) + (*text* . ,(lambda (_ x . xs) x)) + (*default* . ,(lambda (item . children) + (cons item children))))) ) -- cgit v1.2.3