From 2a60e50138c0ef868d32cfeb1aca07c523819589 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Hugo=20H=C3=B6rnquist?= Date: Sat, 27 Nov 2021 19:34:22 +0100 Subject: Moved stuff to modules. --- config.scm | 81 +++++++++++++++++++++++++++++++++++ feed-handler.scm | 53 +++++++++++++++++++++++ main.scm | 125 +++---------------------------------------------------- 3 files changed, 139 insertions(+), 120 deletions(-) create mode 100644 config.scm create mode 100644 feed-handler.scm diff --git a/config.scm b/config.scm new file mode 100644 index 0000000..33a06d1 --- /dev/null +++ b/config.scm @@ -0,0 +1,81 @@ +(define-module (config) + :export (feeds)) + +(use-modules + (sxml xpath) + (ice-9 regex) + + (feed-handler) + + ((datetime) :select (datetime datetime->string))) + + +(define (parse-month str) + "Get month number from (a shortened) monthname. +Returns -1 on failure" + (let loop ((i 1) + (months + (list "january" "february" "march" "april" "may" "june" "july" + "august" "september" "october" "november" "december"))) + + + (if (null? months) + -1 + (let ((len (min (string-length (car months)) + (string-length str)))) + (if (string=? + (string-take (string-downcase str) len) + (string-take (car months) len)) + i + (loop (1+ i) (cdr months))))))) + + +(define (parse-date str) + (and=> (string-match "([a-zA-Z]*)\\. ([0-9]+), ([0-9]+), ([0-9]+):([0-9]+) ([ap])\\.m\\." str) + (lambda (m) + (let ((month (match:substring m 1)) + (day (string->number (match:substring m 2))) + (year (string->number (match:substring m 3))) + (hour (string->number (match:substring m 4))) + (minute (string->number (match:substring m 5))) + (am-pm (match:substring m 6))) + (datetime #:year year + #:month (parse-month month) + #:day day + #:hour (+ hour (if (string=? am-pm "p") 12 0)) + #:minute minute))))) + +(define feeds + (list + (make-feed + "https://lwn.net/headlines/Features" + `((rss:item + . ,(lambda (key . children) + (define tag (cons key children)) + (call-with-values (lambda () (apply values ((sxpath '(rss:title *text*)) tag))) + (case-lambda (() tag) + ((title . _) + (if (string-match "^\\[\\$\\]" title) + '() tag)))))))) + + (make-feed + "https://swordscomic.com/comic/feed/" + `((rss:pubDate + . ,(lambda (key . children) + (cond ((parse-date (car children)) + => (lambda (dt) + (list key (datetime->string dt "~Y-~m-~dT~H:~M:~S")))) + (else (cons key children))))) + + (rss:description + . ,(lambda (key . children) + (cons key + ;; Each entry has a " (car children)) + => (lambda (m) + (list (string-drop (car children) (match:end m))))) + (else children))))) + )) + )) diff --git a/feed-handler.scm b/feed-handler.scm new file mode 100644 index 0000000..1096fd4 --- /dev/null +++ b/feed-handler.scm @@ -0,0 +1,53 @@ +(define-module (feed-handler) + :use-module (rnrs records syntactic) + :use-module (sxml simple) + :use-module (sxml transform) + :use-module (sxml xpath) + :export (make-feed feed-url feed-transformer + parse-rss filter-tree)) + +(define-record-type feed + (fields url transformer)) + + +(define rss-namespaces + '((rdf . "http://www.w3.org/1999/02/22-rdf-syntax-ns#") + (dc . "http://purl.org/dc/elements/1.1/") + (syn . "http://purl.org/rss/1.0/modules/syndication/") + (rss . "http://purl.org/rss/1.0/") + (atom . "http://www.w3.org/2005/Atom") + (#f . "http://purl.org/rss/1.0/") + )) + +(define (parse-rss port) + (xml->sxml port #:namespaces rss-namespaces)) + +(define (namespaces->sxml namespaces) + (map (lambda (namespace) + (list (string->symbol + (if (not (car namespace)) + "xmlns" + (string-append "xmlns:" (symbol->string (car namespace))))) + (cdr namespace))) + namespaces)) + + +(define (filter-tree transformers tree) + (pre-post-order + tree + `(,@transformers + (rdf:RDF . ,(lambda (tag . children) + (cond (((sxpath '(@)) (cons tag children)) + (negate null?) + => (lambda (attributes) + (write attributes (current-error-port)) (newline (current-error-port)) + `(,tag + (@ ,@(namespaces->sxml rss-namespaces) + ,@(cdar attributes)) + ,@(cdr children)))) + (else + `(,tag (@ ,@(namespaces->sxml rss-namespaces)) + ,@(cdr children)))))) + (*text* . ,(lambda (_ x . xs) x)) + (*default* . ,(lambda (item . children) + (cons item children))))) ) diff --git a/main.scm b/main.scm index 4094ebd..5782c04 100755 --- a/main.scm +++ b/main.scm @@ -2,6 +2,7 @@ -e main -s !# +(add-to-load-path ".") (add-to-load-path "/home/hugo/code/calp/module") (use-modules @@ -9,135 +10,19 @@ (web response) (sxml simple) (sxml xpath) - (sxml transform) - (ice-9 regex) + (ice-9 getopt-long) (ice-9 curried-definitions) (ice-9 format) - (rnrs records syntactic) ;; following are from calp (sxml namespace) ((calp util) :select (path-append)) - ((datetime) :select (datetime datetime->string)) + + (feed-handler) ) -(define (parse-month str) - "Get month number from (a shortened) monthname. -Returns -1 on failure" - (let loop ((i 1) - (months - (list "january" "february" "march" "april" "may" "june" "july" - "august" "september" "october" "november" "december"))) - - - (if (null? months) - -1 - (let ((len (min (string-length (car months)) - (string-length str)))) - (if (string=? - (string-take (string-downcase str) len) - (string-take (car months) len)) - i - (loop (1+ i) (cdr months))))))) - - -(define-record-type feed - (fields url transformer)) - -(define (parse-date str) - (and=> (string-match "([a-zA-Z]*)\\. ([0-9]+), ([0-9]+), ([0-9]+):([0-9]+) ([ap])\\.m\\." str) - (lambda (m) - (let ((month (match:substring m 1)) - (day (string->number (match:substring m 2))) - (year (string->number (match:substring m 3))) - (hour (string->number (match:substring m 4))) - (minute (string->number (match:substring m 5))) - (am-pm (match:substring m 6))) - (datetime #:year year - #:month (parse-month month) - #:day day - #:hour (+ hour (if (string=? am-pm "p") 12 0)) - #:minute minute))))) - - -(define feeds - (list - (make-feed - "https://lwn.net/headlines/Features" - `((rss:item - . ,(lambda (key . children) - (define tag (cons key children)) - (call-with-values (lambda () (apply values ((sxpath '(rss:title *text*)) tag))) - (case-lambda (() tag) - ((title . _) - (if (string-match "^\\[\\$\\]" title) - '() tag)))))))) - - (make-feed - "https://swordscomic.com/comic/feed/" - `((rss:pubDate - . ,(lambda (key . children) - (cond ((parse-date (car children)) - => (lambda (dt) - (list key (datetime->string dt "~Y-~m-~dT~H:~M:~S")))) - (else (cons key children))))) - - (rss:description - . ,(lambda (key . children) - (cons key - ;; Each entry has a " (car children)) - => (lambda (m) - (list (string-drop (car children) (match:end m))))) - (else children))))) - )) - )) - -(define rss-namespaces - '((rdf . "http://www.w3.org/1999/02/22-rdf-syntax-ns#") - (dc . "http://purl.org/dc/elements/1.1/") - (syn . "http://purl.org/rss/1.0/modules/syndication/") - (rss . "http://purl.org/rss/1.0/") - (atom . "http://www.w3.org/2005/Atom") - (#f . "http://purl.org/rss/1.0/") - )) - -(define (parse-rss port) - (xml->sxml port #:namespaces rss-namespaces)) - -(define (namespaces->sxml namespaces) - (map (lambda (namespace) - (list (string->symbol - (if (not (car namespace)) - "xmlns" - (string-append "xmlns:" (symbol->string (car namespace))))) - (cdr namespace))) - namespaces)) - - -(define (filter-tree transformers tree) - (pre-post-order - tree - `(,@transformers - (rdf:RDF . ,(lambda (tag . children) - (cond (((sxpath '(@)) (cons tag children)) - (negate null?) - => (lambda (attributes) - (write attributes (current-error-port)) (newline (current-error-port)) - `(,tag - (@ ,@(namespaces->sxml rss-namespaces) - ,@(cdar attributes)) - ,@(cdr children)))) - (else - `(,tag (@ ,@(namespaces->sxml rss-namespaces)) - ,@(cdr children)))))) - (*text* . ,(lambda (_ x . xs) x)) - (*default* . ,(lambda (item . children) - (cons item children))))) ) (define option-spec `((output (single-char #\o) @@ -180,4 +65,4 @@ Returns -1 on failure" (define opts (getopt-long args option-spec)) (define output-directory (option-ref opts 'output ".")) - (for-each (handle-feed output-directory) feeds)) + (for-each (handle-feed output-directory) (@ (config) feeds))) -- cgit v1.2.3