summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorHugo Hörnquist <hugo@lysator.liu.se>2021-11-27 19:34:22 +0100
committerHugo Hörnquist <hugo@lysator.liu.se>2021-11-27 19:34:22 +0100
commit2a60e50138c0ef868d32cfeb1aca07c523819589 (patch)
treeb0f81212f288d5af0374f391c5ef2c5de1bd37c6
parentHandle multiple feeds. (diff)
downloadrss-filter-2a60e50138c0ef868d32cfeb1aca07c523819589.tar.gz
rss-filter-2a60e50138c0ef868d32cfeb1aca07c523819589.tar.xz
Moved stuff to modules.
-rw-r--r--config.scm81
-rw-r--r--feed-handler.scm53
-rwxr-xr-xmain.scm125
3 files changed, 139 insertions, 120 deletions
diff --git a/config.scm b/config.scm
new file mode 100644
index 0000000..33a06d1
--- /dev/null
+++ b/config.scm
@@ -0,0 +1,81 @@
+(define-module (config)
+ :export (feeds))
+
+(use-modules
+ (sxml xpath)
+ (ice-9 regex)
+
+ (feed-handler)
+
+ ((datetime) :select (datetime datetime->string)))
+
+
+(define (parse-month str)
+ "Get month number from (a shortened) monthname.
+Returns -1 on failure"
+ (let loop ((i 1)
+ (months
+ (list "january" "february" "march" "april" "may" "june" "july"
+ "august" "september" "october" "november" "december")))
+
+
+ (if (null? months)
+ -1
+ (let ((len (min (string-length (car months))
+ (string-length str))))
+ (if (string=?
+ (string-take (string-downcase str) len)
+ (string-take (car months) len))
+ i
+ (loop (1+ i) (cdr months)))))))
+
+
+(define (parse-date str)
+ (and=> (string-match "([a-zA-Z]*)\\. ([0-9]+), ([0-9]+), ([0-9]+):([0-9]+) ([ap])\\.m\\." str)
+ (lambda (m)
+ (let ((month (match:substring m 1))
+ (day (string->number (match:substring m 2)))
+ (year (string->number (match:substring m 3)))
+ (hour (string->number (match:substring m 4)))
+ (minute (string->number (match:substring m 5)))
+ (am-pm (match:substring m 6)))
+ (datetime #:year year
+ #:month (parse-month month)
+ #:day day
+ #:hour (+ hour (if (string=? am-pm "p") 12 0))
+ #:minute minute)))))
+
+(define feeds
+ (list
+ (make-feed
+ "https://lwn.net/headlines/Features"
+ `((rss:item
+ . ,(lambda (key . children)
+ (define tag (cons key children))
+ (call-with-values (lambda () (apply values ((sxpath '(rss:title *text*)) tag)))
+ (case-lambda (() tag)
+ ((title . _)
+ (if (string-match "^\\[\\$\\]" title)
+ '() tag))))))))
+
+ (make-feed
+ "https://swordscomic.com/comic/feed/"
+ `((rss:pubDate
+ . ,(lambda (key . children)
+ (cond ((parse-date (car children))
+ => (lambda (dt)
+ (list key (datetime->string dt "~Y-~m-~dT~H:~M:~S"))))
+ (else (cons key children)))))
+
+ (rss:description
+ . ,(lambda (key . children)
+ (cons key
+ ;; Each entry has a <style/> tag at the
+ ;; beggining, which brakes the short preview on
+ ;; NetNewsWire. This removes it
+ (cond ((string-match "</style>" (car children))
+ => (lambda (m)
+ (list (string-drop (car children) (match:end m)))))
+ (else children)))))
+ ))
+ ))
diff --git a/feed-handler.scm b/feed-handler.scm
new file mode 100644
index 0000000..1096fd4
--- /dev/null
+++ b/feed-handler.scm
@@ -0,0 +1,53 @@
+(define-module (feed-handler)
+ :use-module (rnrs records syntactic)
+ :use-module (sxml simple)
+ :use-module (sxml transform)
+ :use-module (sxml xpath)
+ :export (make-feed feed-url feed-transformer
+ parse-rss filter-tree))
+
+(define-record-type feed
+ (fields url transformer))
+
+
+(define rss-namespaces
+ '((rdf . "http://www.w3.org/1999/02/22-rdf-syntax-ns#")
+ (dc . "http://purl.org/dc/elements/1.1/")
+ (syn . "http://purl.org/rss/1.0/modules/syndication/")
+ (rss . "http://purl.org/rss/1.0/")
+ (atom . "http://www.w3.org/2005/Atom")
+ (#f . "http://purl.org/rss/1.0/")
+ ))
+
+(define (parse-rss port)
+ (xml->sxml port #:namespaces rss-namespaces))
+
+(define (namespaces->sxml namespaces)
+ (map (lambda (namespace)
+ (list (string->symbol
+ (if (not (car namespace))
+ "xmlns"
+ (string-append "xmlns:" (symbol->string (car namespace)))))
+ (cdr namespace)))
+ namespaces))
+
+
+(define (filter-tree transformers tree)
+ (pre-post-order
+ tree
+ `(,@transformers
+ (rdf:RDF . ,(lambda (tag . children)
+ (cond (((sxpath '(@)) (cons tag children))
+ (negate null?)
+ => (lambda (attributes)
+ (write attributes (current-error-port)) (newline (current-error-port))
+ `(,tag
+ (@ ,@(namespaces->sxml rss-namespaces)
+ ,@(cdar attributes))
+ ,@(cdr children))))
+ (else
+ `(,tag (@ ,@(namespaces->sxml rss-namespaces))
+ ,@(cdr children))))))
+ (*text* . ,(lambda (_ x . xs) x))
+ (*default* . ,(lambda (item . children)
+ (cons item children))))) )
diff --git a/main.scm b/main.scm
index 4094ebd..5782c04 100755
--- a/main.scm
+++ b/main.scm
@@ -2,6 +2,7 @@
-e main -s
!#
+(add-to-load-path ".")
(add-to-load-path "/home/hugo/code/calp/module")
(use-modules
@@ -9,135 +10,19 @@
(web response)
(sxml simple)
(sxml xpath)
- (sxml transform)
- (ice-9 regex)
+
(ice-9 getopt-long)
(ice-9 curried-definitions)
(ice-9 format)
- (rnrs records syntactic)
;; following are from calp
(sxml namespace)
((calp util) :select (path-append))
- ((datetime) :select (datetime datetime->string))
+
+ (feed-handler)
)
-(define (parse-month str)
- "Get month number from (a shortened) monthname.
-Returns -1 on failure"
- (let loop ((i 1)
- (months
- (list "january" "february" "march" "april" "may" "june" "july"
- "august" "september" "october" "november" "december")))
-
-
- (if (null? months)
- -1
- (let ((len (min (string-length (car months))
- (string-length str))))
- (if (string=?
- (string-take (string-downcase str) len)
- (string-take (car months) len))
- i
- (loop (1+ i) (cdr months)))))))
-
-
-(define-record-type feed
- (fields url transformer))
-
-(define (parse-date str)
- (and=> (string-match "([a-zA-Z]*)\\. ([0-9]+), ([0-9]+), ([0-9]+):([0-9]+) ([ap])\\.m\\." str)
- (lambda (m)
- (let ((month (match:substring m 1))
- (day (string->number (match:substring m 2)))
- (year (string->number (match:substring m 3)))
- (hour (string->number (match:substring m 4)))
- (minute (string->number (match:substring m 5)))
- (am-pm (match:substring m 6)))
- (datetime #:year year
- #:month (parse-month month)
- #:day day
- #:hour (+ hour (if (string=? am-pm "p") 12 0))
- #:minute minute)))))
-
-
-(define feeds
- (list
- (make-feed
- "https://lwn.net/headlines/Features"
- `((rss:item
- . ,(lambda (key . children)
- (define tag (cons key children))
- (call-with-values (lambda () (apply values ((sxpath '(rss:title *text*)) tag)))
- (case-lambda (() tag)
- ((title . _)
- (if (string-match "^\\[\\$\\]" title)
- '() tag))))))))
-
- (make-feed
- "https://swordscomic.com/comic/feed/"
- `((rss:pubDate
- . ,(lambda (key . children)
- (cond ((parse-date (car children))
- => (lambda (dt)
- (list key (datetime->string dt "~Y-~m-~dT~H:~M:~S"))))
- (else (cons key children)))))
-
- (rss:description
- . ,(lambda (key . children)
- (cons key
- ;; Each entry has a <style/> tag at the
- ;; beggining, which brakes the short preview on
- ;; NetNewsWire. This removes it
- (cond ((string-match "</style>" (car children))
- => (lambda (m)
- (list (string-drop (car children) (match:end m)))))
- (else children)))))
- ))
- ))
-
-(define rss-namespaces
- '((rdf . "http://www.w3.org/1999/02/22-rdf-syntax-ns#")
- (dc . "http://purl.org/dc/elements/1.1/")
- (syn . "http://purl.org/rss/1.0/modules/syndication/")
- (rss . "http://purl.org/rss/1.0/")
- (atom . "http://www.w3.org/2005/Atom")
- (#f . "http://purl.org/rss/1.0/")
- ))
-
-(define (parse-rss port)
- (xml->sxml port #:namespaces rss-namespaces))
-
-(define (namespaces->sxml namespaces)
- (map (lambda (namespace)
- (list (string->symbol
- (if (not (car namespace))
- "xmlns"
- (string-append "xmlns:" (symbol->string (car namespace)))))
- (cdr namespace)))
- namespaces))
-
-
-(define (filter-tree transformers tree)
- (pre-post-order
- tree
- `(,@transformers
- (rdf:RDF . ,(lambda (tag . children)
- (cond (((sxpath '(@)) (cons tag children))
- (negate null?)
- => (lambda (attributes)
- (write attributes (current-error-port)) (newline (current-error-port))
- `(,tag
- (@ ,@(namespaces->sxml rss-namespaces)
- ,@(cdar attributes))
- ,@(cdr children))))
- (else
- `(,tag (@ ,@(namespaces->sxml rss-namespaces))
- ,@(cdr children))))))
- (*text* . ,(lambda (_ x . xs) x))
- (*default* . ,(lambda (item . children)
- (cons item children))))) )
(define option-spec
`((output (single-char #\o)
@@ -180,4 +65,4 @@ Returns -1 on failure"
(define opts (getopt-long args option-spec))
(define output-directory (option-ref opts 'output "."))
- (for-each (handle-feed output-directory) feeds))
+ (for-each (handle-feed output-directory) (@ (config) feeds)))