summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorHugo Hörnquist <hugo@lysator.liu.se>2021-11-27 19:25:10 +0100
committerHugo Hörnquist <hugo@lysator.liu.se>2021-11-27 19:25:10 +0100
commit8f98107b3d9d834c16393af795a38d5d7b571859 (patch)
treede5d8d322f7d1109cae31c4c04f8e2e04697e6a5
parentWorking. (diff)
downloadrss-filter-8f98107b3d9d834c16393af795a38d5d7b571859.tar.gz
rss-filter-8f98107b3d9d834c16393af795a38d5d7b571859.tar.xz
Handle multiple feeds.
-rwxr-xr-xmain.scm168
1 files changed, 125 insertions, 43 deletions
diff --git a/main.scm b/main.scm
index 7ea1172..4094ebd 100755
--- a/main.scm
+++ b/main.scm
@@ -12,28 +12,97 @@
(sxml transform)
(ice-9 regex)
(ice-9 getopt-long)
+ (ice-9 curried-definitions)
+ (ice-9 format)
+ (rnrs records syntactic)
;; following are from calp
(sxml namespace)
+ ((calp util) :select (path-append))
+ ((datetime) :select (datetime datetime->string))
)
-
-(define transformer
- `((rss:item
- . ,(lambda (key . children)
- (define tag (cons key children))
- (call-with-values (lambda () (apply values ((sxpath '(rss:title *text*)) tag)))
- (case-lambda (() tag)
- ((title . _)
- (if (string-match "^\\[\\$\\]" title)
- '() tag))))))))
+(define (parse-month str)
+ "Get month number from (a shortened) monthname.
+Returns -1 on failure"
+ (let loop ((i 1)
+ (months
+ (list "january" "february" "march" "april" "may" "june" "july"
+ "august" "september" "october" "november" "december")))
+
+
+ (if (null? months)
+ -1
+ (let ((len (min (string-length (car months))
+ (string-length str))))
+ (if (string=?
+ (string-take (string-downcase str) len)
+ (string-take (car months) len))
+ i
+ (loop (1+ i) (cdr months)))))))
+
+
+(define-record-type feed
+ (fields url transformer))
+
+(define (parse-date str)
+ (and=> (string-match "([a-zA-Z]*)\\. ([0-9]+), ([0-9]+), ([0-9]+):([0-9]+) ([ap])\\.m\\." str)
+ (lambda (m)
+ (let ((month (match:substring m 1))
+ (day (string->number (match:substring m 2)))
+ (year (string->number (match:substring m 3)))
+ (hour (string->number (match:substring m 4)))
+ (minute (string->number (match:substring m 5)))
+ (am-pm (match:substring m 6)))
+ (datetime #:year year
+ #:month (parse-month month)
+ #:day day
+ #:hour (+ hour (if (string=? am-pm "p") 12 0))
+ #:minute minute)))))
+
+
+(define feeds
+ (list
+ (make-feed
+ "https://lwn.net/headlines/Features"
+ `((rss:item
+ . ,(lambda (key . children)
+ (define tag (cons key children))
+ (call-with-values (lambda () (apply values ((sxpath '(rss:title *text*)) tag)))
+ (case-lambda (() tag)
+ ((title . _)
+ (if (string-match "^\\[\\$\\]" title)
+ '() tag))))))))
+
+ (make-feed
+ "https://swordscomic.com/comic/feed/"
+ `((rss:pubDate
+ . ,(lambda (key . children)
+ (cond ((parse-date (car children))
+ => (lambda (dt)
+ (list key (datetime->string dt "~Y-~m-~dT~H:~M:~S"))))
+ (else (cons key children)))))
+
+ (rss:description
+ . ,(lambda (key . children)
+ (cons key
+ ;; Each entry has a <style/> tag at the
+ ;; beggining, which brakes the short preview on
+ ;; NetNewsWire. This removes it
+ (cond ((string-match "</style>" (car children))
+ => (lambda (m)
+ (list (string-drop (car children) (match:end m)))))
+ (else children)))))
+ ))
+ ))
(define rss-namespaces
'((rdf . "http://www.w3.org/1999/02/22-rdf-syntax-ns#")
(dc . "http://purl.org/dc/elements/1.1/")
(syn . "http://purl.org/rss/1.0/modules/syndication/")
(rss . "http://purl.org/rss/1.0/")
+ (atom . "http://www.w3.org/2005/Atom")
(#f . "http://purl.org/rss/1.0/")
))
@@ -43,9 +112,9 @@
(define (namespaces->sxml namespaces)
(map (lambda (namespace)
(list (string->symbol
- (if (not (car namespace))
- "xmlns"
- (string-append "xmlns:" (symbol->string (car namespace)))))
+ (if (not (car namespace))
+ "xmlns"
+ (string-append "xmlns:" (symbol->string (car namespace)))))
(cdr namespace)))
namespaces))
@@ -53,34 +122,32 @@
(define (filter-tree transformers tree)
(pre-post-order
tree
- `(,@transformer
- (rdf:RDF . ,(lambda (tag . children)
- (cond (((sxpath '(@)) (cons tag children))
- (negate null?)
- => (lambda (attributes)
- (write attributes (current-error-port)) (newline (current-error-port))
- `(,tag
- (@ ,@(namespaces->sxml rss-namespaces)
- ,@(cdar attributes))
- ,@(cdr children))))
- (else
- `(,tag (@ ,@(namespaces->sxml rss-namespaces))
- ,@(cdr children))))))
+ `(,@transformers
+ (rdf:RDF . ,(lambda (tag . children)
+ (cond (((sxpath '(@)) (cons tag children))
+ (negate null?)
+ => (lambda (attributes)
+ (write attributes (current-error-port)) (newline (current-error-port))
+ `(,tag
+ (@ ,@(namespaces->sxml rss-namespaces)
+ ,@(cdar attributes))
+ ,@(cdr children))))
+ (else
+ `(,tag (@ ,@(namespaces->sxml rss-namespaces))
+ ,@(cdr children))))))
(*text* . ,(lambda (_ x . xs) x))
(*default* . ,(lambda (item . children)
(cons item children))))) )
(define option-spec
`((output (single-char #\o)
- (required? #t)
+ (required? #f)
(value #t))))
-(define (main args)
-
- (define opts (getopt-long args option-spec))
-
+(define ((handle-feed output-directory) feed)
(define response
- (http-get "https://lwn.net/headlines/Features"
+ ;; TODO follow redirects
+ (http-get (feed-url feed)
#:streaming? #t))
(unless (= 200 (response-code response))
@@ -88,14 +155,29 @@
(response-code response))
(exit 1))
- (with-output-to-file (option-ref opts 'output #f)
- (lambda ()
- (sxml->xml
- ;; Removing the rss prefix should be fine, but at least NewNewsWire
- ;; (for iPad) expects all rss elements to be un-namespaced.
- (move-to-namespace
- (filter-tree
- transformer
- (parse-rss (response-body-port response)))
- '((rss . #f))
- )))))
+ (let* ((feed-content (move-to-namespace (parse-rss (response-body-port response))
+ '((#f . rss))))
+ (feed-title (car ((sxpath '(// rss:channel rss:title *text*)) feed-content)))
+ (safe-title (string-map (lambda (c) (if (char-set-contains? char-set:letter+digit c)
+ c #\_))
+ feed-title)))
+
+
+ (let ((output (filter-tree (feed-transformer feed)
+ feed-content))
+ (filename (string-append
+ (path-append output-directory safe-title)
+ ".rss")))
+ (with-output-to-file filename
+ (lambda ()
+ (sxml->xml
+ ;; Removing the rss prefix should be fine, but at least NewNewsWire
+ ;; (for iPad) expects all rss elements to be un-namespaced.
+ (move-to-namespace output '((rss . #f)))))))))
+
+(define (main args)
+
+ (define opts (getopt-long args option-spec))
+ (define output-directory (option-ref opts 'output "."))
+
+ (for-each (handle-feed output-directory) feeds))