aboutsummaryrefslogtreecommitdiff
path: root/scripts/fetch-liu-map-index.scm
diff options
context:
space:
mode:
Diffstat (limited to 'scripts/fetch-liu-map-index.scm')
-rwxr-xr-xscripts/fetch-liu-map-index.scm65
1 files changed, 42 insertions, 23 deletions
diff --git a/scripts/fetch-liu-map-index.scm b/scripts/fetch-liu-map-index.scm
index 31db3844..2ac300e1 100755
--- a/scripts/fetch-liu-map-index.scm
+++ b/scripts/fetch-liu-map-index.scm
@@ -19,35 +19,54 @@
(ice-9 getopt-long)
(sxml gumbo)
(sxml match)
- ((hnh util) :select (->))
+ ((sxml xpath) :select (sxpath))
+ ((hnh util) :select (-> ->>))
(json))
+;; Fallback to ensure we have HTTPS
+(define http-get
+ (catch 'gnutls-not-available
+ (lambda ()
+ ((@@ (web client) ensure-gnutls))
+ (@ (web client) http-get))
+ (lambda _
+ (use-modules (ice-9 popen)
+ (web http))
+
+ (lambda (url . _)
+ (let ((pipe (open-pipe* OPEN_READ "curl" "--include" "--http1.1" url)))
+ (let ((intro (string-split (read-line pipe) #\space)) ; HTTP/1.1 200 OK\r
+ (headers (read-headers pipe)))
+ (let ((response
+ (build-response
+ version: (parse-http-version (list-ref intro 0))
+ code: (string->number (list-ref intro 1))
+ reason-phrase: (string-trim-right
+ (string-join (drop intro 2) " " 'infix)
+ char-whitespace?)
+ headers: headers
+ port: pipe
+ validate-headers?: #t)))
+ (values response pipe))))))))
;; Parse string as HTML, find all links which are "map links",
;; and return them as an association list from name to url-fragments.
-(define (get-data string)
- (define data (html->sxml string))
-
+(define (extract-data string)
(define rx (make-regexp "^karta\\?"))
- (define links
- (map (lambda (node)
- (sxml-match node
- [(a (@ (href ,href)) ,b0 ,body ...)
- (cons href b0)]))
- (((@ (sxml xpath) sxpath) '(// a)) data)))
-
- (define map-links (filter (lambda (pair) (regexp-exec rx (car pair)))
- links))
-
- (define link-table (make-hash-table))
- (for-each (lambda (pair) (hash-set! link-table (string-upcase (string-trim-both (cdr pair)))
- (car pair)))
- map-links)
+ ;; for (let el of document.querySelectorAll('a[href*="karta?"]')) {
+ ;; ret[el.textContent.trim().toUpperCase()] = el.href
+ ;; }
- (hash-map->list (lambda (name frag)
- `(,name . ,frag))
- link-table))
+ (->> (html->sxml string)
+ ((sxpath '(// a)))
+ (map (lambda (node)
+ (sxml-match node
+ [(a (@ (href ,href)) ,b0 ,body ...)
+ (cons href b0)])))
+ (filter (lambda (pair) (regexp-exec rx (car pair))))
+ (map (lambda (pair) (cons (string-upcase (string-trim-both (cdr pair)))
+ (car pair))))))
;; Open a HTTP request to the given URL, and return the
;; response body as a port.
@@ -85,9 +104,9 @@
(let ((port
(cond ((option-ref options 'url #f) => open-input-url)
- ((and=> (option-ref options 'file #f) (lambda (s) (string=? s "-")))
+ ((string=? "-" (option-ref options 'file ""))
(current-input-port))
((option-ref options 'file #f) => open-input-file)
(else (open-input-url "https://old.liu.se/karta/list?l=sv")))))
- (-> port read-string get-data scm->json)
+ (-> port read-string extract-data scm->json)
(newline)))