diff options
Diffstat (limited to 'scripts/fetch-liu-map-index.scm')
-rwxr-xr-x | scripts/fetch-liu-map-index.scm | 65 |
1 files changed, 42 insertions, 23 deletions
diff --git a/scripts/fetch-liu-map-index.scm b/scripts/fetch-liu-map-index.scm index 31db3844..2ac300e1 100755 --- a/scripts/fetch-liu-map-index.scm +++ b/scripts/fetch-liu-map-index.scm @@ -19,35 +19,54 @@ (ice-9 getopt-long) (sxml gumbo) (sxml match) - ((hnh util) :select (->)) + ((sxml xpath) :select (sxpath)) + ((hnh util) :select (-> ->>)) (json)) +;; Fallback to ensure we have HTTPS +(define http-get + (catch 'gnutls-not-available + (lambda () + ((@@ (web client) ensure-gnutls)) + (@ (web client) http-get)) + (lambda _ + (use-modules (ice-9 popen) + (web http)) + + (lambda (url . _) + (let ((pipe (open-pipe* OPEN_READ "curl" "--include" "--http1.1" url))) + (let ((intro (string-split (read-line pipe) #\space)) ; HTTP/1.1 200 OK\r + (headers (read-headers pipe))) + (let ((response + (build-response + version: (parse-http-version (list-ref intro 0)) + code: (string->number (list-ref intro 1)) + reason-phrase: (string-trim-right + (string-join (drop intro 2) " " 'infix) + char-whitespace?) + headers: headers + port: pipe + validate-headers?: #t))) + (values response pipe)))))))) ;; Parse string as HTML, find all links which are "map links", ;; and return them as an association list from name to url-fragments. -(define (get-data string) - (define data (html->sxml string)) - +(define (extract-data string) (define rx (make-regexp "^karta\\?")) - (define links - (map (lambda (node) - (sxml-match node - [(a (@ (href ,href)) ,b0 ,body ...) - (cons href b0)])) - (((@ (sxml xpath) sxpath) '(// a)) data))) - - (define map-links (filter (lambda (pair) (regexp-exec rx (car pair))) - links)) - - (define link-table (make-hash-table)) - (for-each (lambda (pair) (hash-set! link-table (string-upcase (string-trim-both (cdr pair))) - (car pair))) - map-links) + ;; for (let el of document.querySelectorAll('a[href*="karta?"]')) { + ;; ret[el.textContent.trim().toUpperCase()] = el.href + ;; } - (hash-map->list (lambda (name frag) - `(,name . ,frag)) - link-table)) + (->> (html->sxml string) + ((sxpath '(// a))) + (map (lambda (node) + (sxml-match node + [(a (@ (href ,href)) ,b0 ,body ...) + (cons href b0)]))) + (filter (lambda (pair) (regexp-exec rx (car pair)))) + (map (lambda (pair) (cons (string-upcase (string-trim-both (cdr pair))) + (car pair)))))) ;; Open a HTTP request to the given URL, and return the ;; response body as a port. @@ -85,9 +104,9 @@ (let ((port (cond ((option-ref options 'url #f) => open-input-url) - ((and=> (option-ref options 'file #f) (lambda (s) (string=? s "-"))) + ((string=? "-" (option-ref options 'file "")) (current-input-port)) ((option-ref options 'file #f) => open-input-file) (else (open-input-url "https://old.liu.se/karta/list?l=sv"))))) - (-> port read-string get-data scm->json) + (-> port read-string extract-data scm->json) (newline))) |