From 8f98107b3d9d834c16393af795a38d5d7b571859 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Hugo=20H=C3=B6rnquist?= Date: Sat, 27 Nov 2021 19:25:10 +0100 Subject: Handle multiple feeds. --- main.scm | 168 +++++++++++++++++++++++++++++++++++++++++++++++---------------- 1 file changed, 125 insertions(+), 43 deletions(-) (limited to 'main.scm') diff --git a/main.scm b/main.scm index 7ea1172..4094ebd 100755 --- a/main.scm +++ b/main.scm @@ -12,28 +12,97 @@ (sxml transform) (ice-9 regex) (ice-9 getopt-long) + (ice-9 curried-definitions) + (ice-9 format) + (rnrs records syntactic) ;; following are from calp (sxml namespace) + ((calp util) :select (path-append)) + ((datetime) :select (datetime datetime->string)) ) - -(define transformer - `((rss:item - . ,(lambda (key . children) - (define tag (cons key children)) - (call-with-values (lambda () (apply values ((sxpath '(rss:title *text*)) tag))) - (case-lambda (() tag) - ((title . _) - (if (string-match "^\\[\\$\\]" title) - '() tag)))))))) +(define (parse-month str) + "Get month number from (a shortened) monthname. +Returns -1 on failure" + (let loop ((i 1) + (months + (list "january" "february" "march" "april" "may" "june" "july" + "august" "september" "october" "november" "december"))) + + + (if (null? months) + -1 + (let ((len (min (string-length (car months)) + (string-length str)))) + (if (string=? + (string-take (string-downcase str) len) + (string-take (car months) len)) + i + (loop (1+ i) (cdr months))))))) + + +(define-record-type feed + (fields url transformer)) + +(define (parse-date str) + (and=> (string-match "([a-zA-Z]*)\\. ([0-9]+), ([0-9]+), ([0-9]+):([0-9]+) ([ap])\\.m\\." str) + (lambda (m) + (let ((month (match:substring m 1)) + (day (string->number (match:substring m 2))) + (year (string->number (match:substring m 3))) + (hour (string->number (match:substring m 4))) + (minute (string->number (match:substring m 5))) + (am-pm (match:substring m 6))) + (datetime #:year year + #:month (parse-month month) + #:day day + #:hour (+ hour (if (string=? am-pm "p") 12 0)) + #:minute minute))))) + + +(define feeds + (list + (make-feed + "https://lwn.net/headlines/Features" + `((rss:item + . ,(lambda (key . children) + (define tag (cons key children)) + (call-with-values (lambda () (apply values ((sxpath '(rss:title *text*)) tag))) + (case-lambda (() tag) + ((title . _) + (if (string-match "^\\[\\$\\]" title) + '() tag)))))))) + + (make-feed + "https://swordscomic.com/comic/feed/" + `((rss:pubDate + . ,(lambda (key . children) + (cond ((parse-date (car children)) + => (lambda (dt) + (list key (datetime->string dt "~Y-~m-~dT~H:~M:~S")))) + (else (cons key children))))) + + (rss:description + . ,(lambda (key . children) + (cons key + ;; Each entry has a " (car children)) + => (lambda (m) + (list (string-drop (car children) (match:end m))))) + (else children))))) + )) + )) (define rss-namespaces '((rdf . "http://www.w3.org/1999/02/22-rdf-syntax-ns#") (dc . "http://purl.org/dc/elements/1.1/") (syn . "http://purl.org/rss/1.0/modules/syndication/") (rss . "http://purl.org/rss/1.0/") + (atom . "http://www.w3.org/2005/Atom") (#f . "http://purl.org/rss/1.0/") )) @@ -43,9 +112,9 @@ (define (namespaces->sxml namespaces) (map (lambda (namespace) (list (string->symbol - (if (not (car namespace)) - "xmlns" - (string-append "xmlns:" (symbol->string (car namespace))))) + (if (not (car namespace)) + "xmlns" + (string-append "xmlns:" (symbol->string (car namespace))))) (cdr namespace))) namespaces)) @@ -53,34 +122,32 @@ (define (filter-tree transformers tree) (pre-post-order tree - `(,@transformer - (rdf:RDF . ,(lambda (tag . children) - (cond (((sxpath '(@)) (cons tag children)) - (negate null?) - => (lambda (attributes) - (write attributes (current-error-port)) (newline (current-error-port)) - `(,tag - (@ ,@(namespaces->sxml rss-namespaces) - ,@(cdar attributes)) - ,@(cdr children)))) - (else - `(,tag (@ ,@(namespaces->sxml rss-namespaces)) - ,@(cdr children)))))) + `(,@transformers + (rdf:RDF . ,(lambda (tag . children) + (cond (((sxpath '(@)) (cons tag children)) + (negate null?) + => (lambda (attributes) + (write attributes (current-error-port)) (newline (current-error-port)) + `(,tag + (@ ,@(namespaces->sxml rss-namespaces) + ,@(cdar attributes)) + ,@(cdr children)))) + (else + `(,tag (@ ,@(namespaces->sxml rss-namespaces)) + ,@(cdr children)))))) (*text* . ,(lambda (_ x . xs) x)) (*default* . ,(lambda (item . children) (cons item children))))) ) (define option-spec `((output (single-char #\o) - (required? #t) + (required? #f) (value #t)))) -(define (main args) - - (define opts (getopt-long args option-spec)) - +(define ((handle-feed output-directory) feed) (define response - (http-get "https://lwn.net/headlines/Features" + ;; TODO follow redirects + (http-get (feed-url feed) #:streaming? #t)) (unless (= 200 (response-code response)) @@ -88,14 +155,29 @@ (response-code response)) (exit 1)) - (with-output-to-file (option-ref opts 'output #f) - (lambda () - (sxml->xml - ;; Removing the rss prefix should be fine, but at least NewNewsWire - ;; (for iPad) expects all rss elements to be un-namespaced. - (move-to-namespace - (filter-tree - transformer - (parse-rss (response-body-port response))) - '((rss . #f)) - ))))) + (let* ((feed-content (move-to-namespace (parse-rss (response-body-port response)) + '((#f . rss)))) + (feed-title (car ((sxpath '(// rss:channel rss:title *text*)) feed-content))) + (safe-title (string-map (lambda (c) (if (char-set-contains? char-set:letter+digit c) + c #\_)) + feed-title))) + + + (let ((output (filter-tree (feed-transformer feed) + feed-content)) + (filename (string-append + (path-append output-directory safe-title) + ".rss"))) + (with-output-to-file filename + (lambda () + (sxml->xml + ;; Removing the rss prefix should be fine, but at least NewNewsWire + ;; (for iPad) expects all rss elements to be un-namespaced. + (move-to-namespace output '((rss . #f))))))))) + +(define (main args) + + (define opts (getopt-long args option-spec)) + (define output-directory (option-ref opts 'output ".")) + + (for-each (handle-feed output-directory) feeds)) -- cgit v1.2.3