diff options
Diffstat (limited to '')
-rw-r--r-- | .gitignore | 5 | ||||
-rw-r--r-- | 100-2.html | 25 | ||||
-rw-r--r-- | 100.html | 46 | ||||
-rw-r--r-- | Main.hs | 8 | ||||
-rw-r--r-- | Makefile | 27 | ||||
-rw-r--r-- | README | 4 | ||||
-rw-r--r-- | fmt-stack.scm | 55 | ||||
-rw-r--r-- | html.c | 101 | ||||
-rw-r--r-- | html.scm | 19 | ||||
-rw-r--r-- | json.c | 112 | ||||
-rw-r--r-- | json.scm | 8 | ||||
-rwxr-xr-x | main.scm | 147 | ||||
m--------- | monad | 0 | ||||
-rwxr-xr-x | script.scm | 136 | ||||
-rwxr-xr-x | texttv.py | 24 | ||||
-rwxr-xr-x | ttv | 10 | ||||
-rwxr-xr-x | wrapper.sh | 2 |
17 files changed, 488 insertions, 241 deletions
diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..5abbd84 --- /dev/null +++ b/.gitignore @@ -0,0 +1,5 @@ +.last-updated +texttv.json +cache/ +html +json diff --git a/100-2.html b/100-2.html deleted file mode 100644 index 8871c2d..0000000 --- a/100-2.html +++ /dev/null @@ -1,25 +0,0 @@ -<div class="root"><span class="toprow"> 100 SVT Text Fredag 09 nov 2018 - </span><span class="B bgB"> </span><span class="B bgB"> </span> - <span class="B bgB"> </span><span class="B bgB"> </span> - <span class="B bgB"> </span><span class="B bgB"> </span> - <span class="B bgB"> </span><span class="B bgB"> </span> - <span class="Y"> </span> - <h1 class="Y DH"> Ätstörningar bland ungdomar fördubblas</h1> - <span class="Y"> Allt fler i åldern 10-14 år söker vård</span> - <span class="Y"> <a href="/106">106</a></span><span class="Y"> </span> - <span class="C"> </span> - <span class="C"> Man död efter skjutning </span> - <span class="C"> i Uppsala i går kväll </span> - <span class="C"> <a href="/108">108</a> </span> - <span class="Y"> </span><span class="Y"> </span> - <h1 class="Y DH"> Kalifornien-stad förstörd i skogsbrand</h1> - <span class="Y"> Räddningstjänsten: Staden helt ödelagd</span> - <span class="Y"> <a href="/130">130</a></span><span class="Y"> </span> - <span class="C"> </span> - <span class="C"> Österrikisk överste spionmisstänkt <a href="/136">136</a></span> - - <span class='added-line'> </span> - <span class='added-line'> </span> - <span class='added-line'> </span> - <span class="B bgB DH"> </span><span class="B bgB DH"> </span><h1 class="Y bgB DH"> Inrikes <a href="/101">101</a> Utrikes <a href="/104">104</a> Innehåll <a href="/700">700</a></h1> - </div> diff --git a/100.html b/100.html deleted file mode 100644 index defc465..0000000 --- a/100.html +++ /dev/null @@ -1,46 +0,0 @@ -<style> -.B { color: blue; } -.bgB { background-color: blue; } -.Y { color: yellow; } -.DH { font-weight: bold } -.C { color: cyan; } -.root { - white-space: pre; - font-family: monospace; - font-size: 12pt !important; - background-color: black; -} -h1 { - font-size: 12pt; - display: inline; -} -.toprow { - color: white; -} -</style> - -<div class="root"><span class="toprow"> 100 SVT Text Fredag 09 nov 2018 - </span><span class="B bgB"> </span><span class="B bgB"> </span> - <span class="B bgB"> </span><span class="B bgB"> </span> - <span class="B bgB"> </span><span class="B bgB"> </span> - <span class="B bgB"> </span><span class="B bgB"> </span> - <span class="Y"> </span> - <h1 class="Y DH"> Ätstörningar bland ungdomar fördubblas</h1> - <span class="Y"> Allt fler i åldern 10-14 år söker vård</span> - <span class="Y"> <a href="/106">106</a></span><span class="Y"> </span> - <span class="C"> </span> - <span class="C"> Man död efter skjutning </span> - <span class="C"> i Uppsala i går kväll </span> - <span class="C"> <a href="/108">108</a> </span> - <span class="Y"> </span><span class="Y"> </span> - <h1 class="Y DH"> Kalifornien-stad förstörd i skogsbrand</h1> - <span class="Y"> Räddningstjänsten: Staden helt ödelagd</span> - <span class="Y"> <a href="/130">130</a></span><span class="Y"> </span> - <span class="C"> </span> - <span class="C"> Österrikisk överste spionmisstänkt <a href="/136">136</a></span> - - <span class='added-line'> </span> - <span class='added-line'> </span> - <span class='added-line'> </span> - <span class="B bgB DH"> </span><span class="B bgB DH"> </span><h1 class="Y bgB DH"> Inrikes <a href="/101">101</a> Utrikes <a href="/104">104</a> Innehåll <a href="/700">700</a></h1> -</div> diff --git a/Main.hs b/Main.hs deleted file mode 100644 index c0e4ed0..0000000 --- a/Main.hs +++ /dev/null @@ -1,8 +0,0 @@ -import Text.ParserCombinators.Parsec - -docParser :: GenParser Char () Doc - -parseLine :: - -main :: IO () -main = putStrLn "HaHa" diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..8f867c9 --- /dev/null +++ b/Makefile @@ -0,0 +1,27 @@ +CFLAGS := -fPIC $(shell guile-config compile) \ + $(shell pkg-config --cflags gumbo) \ + -I/usr/local/include/ -Wall +LFLAGS := $(shell guile-config link) \ + $(shell pkg-config --libs gumbo) \ + -L/usr/local/lib -lcjson +CC := gcc + +.PHONY: all clean + +all: libguile-json.so libguile-html.so + +%: %.oo + $(CC) -o $@ $< $(LFLAGS) + +%.x: %.c + guile-snarf -o $@ $< $(CFLAGS) + +libguile-%.so: %.oo + $(CC) -shared -o $@ $< $(LFLAGS) + +%.oo: %.c %.x + $(CC) -c -o $@ $(CFLAGS) $< + +clean: + -rm *.oo + -rm libguile-*.so @@ -0,0 +1,4 @@ +Dependencies +------------ +- cJSON +- Gumbo diff --git a/fmt-stack.scm b/fmt-stack.scm new file mode 100644 index 0000000..ced6be3 --- /dev/null +++ b/fmt-stack.scm @@ -0,0 +1,55 @@ +(define-module (fmt-stack) + #:export (get-attr + set-fg set-bg set-style + make-fmt-frame empty-fmt-frame + fmt-frame->ansi-esc) + + #:use-module (control monad) + #:use-module (control monad state) + + #:use-module (data optional) + + #:use-module (srfi srfi-1) + #:use-module (srfi srfi-9 gnu)) + +(define-immutable-record-type fmt-frame + (make-fmt-frame style fg bg) + fmt-frame? + (style get-style set-style) + (fg get-fg set-fg) + (bg get-bg set-bg)) + +(define (empty-fmt-frame) + (make-fmt-frame #f #f #f)) + +(define (fmt-frame->ansi-esc frame) + (string-append + "\x1b[m" + (case (get-fg frame) + ((B) "\x1b[0;34m") + ((Y) "\x1b[0;33m") + ((C) "\x1b[0;36m") + (else "")) + + (case (get-bg frame) + ((B) "\x1b[44m") + ((Y) "\x1b[43m") + ((C) "\x1b[46m") + (else "")) + + (case (get-style frame) + ((underline) "\x1b[4m") + ((italic) "\x1b[3m") + ((bold) "\x1b[1m") + (else "")))) + +(define (get-attr) + (do stack <- (get) + (return-state + (fmt-frame->ansi-esc + (make-fmt-frame + (get-style (find get-style stack)) + (get-fg (find get-fg stack)) + (get-bg (find get-bg stack))))))) + + @@ -0,0 +1,101 @@ +#include <gumbo.h> +#include <libguile.h> + +#include <sys/mman.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> + +SCM handle_node (GumboNode* n) { + + SCM dummytagname, scm_children = SCM_EOL; + GumboNode* ch; + switch (n->type) { + case GUMBO_NODE_DOCUMENT: + ; + GumboDocument doc = n->v.document; + for (int i = 0; i < doc.children.length; i++) { + ch = doc.children.data[i]; + scm_children = scm_cons (handle_node(ch), scm_children); + } + scm_children = scm_reverse_x(scm_children, SCM_EOL); + + return scm_cons ( scm_from_utf8_symbol("*TOP*"), + scm_cons ( SCM_BOOL_F, scm_children)); + + case GUMBO_NODE_TEMPLATE: + case GUMBO_NODE_ELEMENT: + ; + SCM attributes; + + GumboElement el = n->v.element; + GumboVector attr = el.attributes; + + attributes = scm_c_eval_string("(make-hash-table)"); + + GumboAttribute* a; + for (int i = 0; i < attr.length; i++) { + a = attr.data[i]; + scm_hashq_set_x (attributes, + scm_from_utf8_symbol(a->name), + scm_from_utf8_stringn(a->value, strlen(a->value))); + } + + for (int i = 0; i < el.children.length; i++) { + ch = el.children.data[i]; + scm_children = scm_cons (handle_node(ch), scm_children); + } + scm_children = scm_reverse_x(scm_children, SCM_EOL); + + return scm_cons (scm_from_utf8_symbol(gumbo_normalized_tagname(el.tag)), + scm_cons (attributes, scm_children)); + + case GUMBO_NODE_TEXT: + case GUMBO_NODE_WHITESPACE: + return scm_from_utf8_stringn(n->v.text.text, strlen(n->v.text.text)); + + case GUMBO_NODE_CDATA: + dummytagname = scm_from_utf8_symbol("cdata"); + goto rettext; + case GUMBO_NODE_COMMENT: + dummytagname = scm_from_utf8_symbol("comment"); + +rettext: + return scm_cons (dummytagname, + scm_cons ( SCM_BOOL_F, + scm_from_utf8_stringn(n->v.text.text, strlen(n->v.text.text)))); + } + return SCM_BOOL_F; +} + +SCM_DEFINE (parse_html, "parse-html", 1, 0, 0, + (SCM filename), + "") +{ + char* fname = scm_to_utf8_string(filename); + int fd = open(fname, O_RDONLY); + struct stat sb; + fstat(fd, &sb); + const char* buf = mmap(NULL, sb.st_size, PROT_READ, MAP_SHARED, fd, 0); + + + if (buf == NULL) { + fprintf(stderr, "Error MMAPing file\n"); + return scm_from_utf8_symbol("mmap-err"); + } + + GumboOutput* output = gumbo_parse(buf); + + SCM ret = handle_node (output->root); + // GumboNode* p = n->parent; + + gumbo_destroy_output(&kGumboDefaultOptions, output); + + return ret; +} + +void init_html (void) { +#ifndef SCM_MAGIC_SNARFER +#include "html.x" +#endif +} diff --git a/html.scm b/html.scm new file mode 100644 index 0000000..90dd039 --- /dev/null +++ b/html.scm @@ -0,0 +1,19 @@ +(define-module (html) + #:export (parse-html) + #:re-export (write) + #:use-module (oop goops) + ) + +;; (setenv "LD_LIBRARY_PATH" (dirname (current-filename))) + +;; exposes the parse-html symbol +(load-extension "libguile-html" "init_html") + +(define (fmt-hash-table t) + `(@ ,@(hash-map->list cons t))) + +(define-method (write (t <hashtable>) port) + (display (fmt-hash-table t) port)) + +(define-method (display (t <hashtable>) port) + (display (fmt-hash-table t) port)) @@ -0,0 +1,112 @@ +#include <cjson/cJSON.h> +#include <libguile.h> + +#include <sys/mman.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> + +static SCM json_type; + +static inline void assert_json (SCM item) + { scm_assert_foreign_object_type (json_type, item); } +static inline cJSON* to_json (SCM item) + { return scm_foreign_object_ref (item, 0); } +static inline SCM from_json (cJSON* item) + { return scm_make_foreign_object_1 (json_type, item); } + +SCM_DEFINE (parse_json, "parse-json", 1, 0, 0, + (SCM filename), + "") +{ + char* fname = scm_to_utf8_stringn (filename, NULL); + + int fd = open(fname, O_RDONLY); + struct stat sb; + fstat(fd, &sb); + const char* buf = mmap(NULL, sb.st_size, PROT_READ, MAP_SHARED, fd, 0); + + return from_json (cJSON_Parse(buf)); +} + +SCM json_to_scheme (cJSON* j) { + cJSON* node; + switch (j->type) { + case cJSON_Invalid: + return scm_from_utf8_symbol("invalid"); + + case cJSON_False: + return SCM_BOOL_F; + + case cJSON_True: + return SCM_BOOL_T; + + case cJSON_NULL: + return SCM_EOL; + + case cJSON_Number: + return scm_values(scm_list_2( + scm_from_int(j->valueint), + scm_from_double(j->valuedouble))); + + case cJSON_String: + return scm_from_utf8_string(j->valuestring); + + case cJSON_Array: + node = j->child; + + SCM list = SCM_EOL; + while (node != NULL) { + list = scm_cons(json_to_scheme(node), list); + node = node->next; + } + return scm_vector(scm_reverse_x (list, SCM_EOL)); + + case cJSON_Object: + node = j->child; + SCM table = scm_c_eval_string ("(make-hash-table)"); + while (node->next != NULL) { + scm_hashq_set_x (table, + scm_from_utf8_symbol(node->string), + json_to_scheme(node)); + node = node->next; + } + return table; + + + case cJSON_Raw: + return scm_from_utf8_symbol("raw"); + } + + return scm_from_utf8_symbol("nocase"); +} + +SCM_DEFINE(json_to_scheme_, "json->scm", 1, 0, 0, + (SCM json), + "") +{ + assert_json(json); + cJSON* j = to_json(json); + + return json_to_scheme (j); +} + +static void finilize_json (SCM json) { + cJSON* j = to_json(json); + cJSON_Delete (j); +} + +static void init_json_type (void) { + SCM name = scm_from_utf8_symbol("json"); + SCM slot = scm_list_1(scm_from_utf8_symbol("data")); + scm_t_struct_finalize finilizer = finilize_json;; + json_type = scm_make_foreign_object_type (name, slot, finilizer); +} + +void init_json (void) { + init_json_type (); + +#ifndef SCM_MAGIC_SNARFER +#include "json.x" +#endif +} diff --git a/json.scm b/json.scm new file mode 100644 index 0000000..bb33e5d --- /dev/null +++ b/json.scm @@ -0,0 +1,8 @@ +(define-module (json) + #:export (get-json parse-json json->scm)) +;; (setenv "LD_LIBRARY_PATH" (string-append "/usr/local/lib:" (getcwd))) + +(load-extension "libguile-json" "init_json") + +(define (get-json filename) + (json->scm (parse-json filename))) diff --git a/main.scm b/main.scm new file mode 100755 index 0000000..fd4489e --- /dev/null +++ b/main.scm @@ -0,0 +1,147 @@ +#!/usr/bin/guile \ +-q -e main -s +!# + +(add-to-load-path (dirname (current-filename))) +(add-to-load-path (string-append (dirname (current-filename)) "/monad/")) +(add-to-load-path "/home/hugo/lib/guile") + +(add-to-load-path "/home/hugo/code/calparse") ; For (util), move that to a library + +(setenv "LD_LIBRARY_PATH" (dirname (current-filename))) + +(use-modules #; (sxml simple) + ;; (sxml match) + + (srfi srfi-1) + (srfi srfi-26) + (srfi srfi-43) ; Vector iteration + + ;; (ice-9 rdelim) + (ice-9 regex) + (ice-9 popen) + + (macros arrow) + (util) + + (control monad) + (control monad state) + (data stack) + + (fmt-stack) + (html) + (json)) + +(define-macro (regex-case str . cases) + `(cond + ,@(map (lambda (case) + (let ((pattern (car case)) + (rest (cdr case))) + (if (eq? pattern 'else) + `(else ,@rest) + `((string-match ,pattern ,str) ,@rest)))) + cases))) + +(define (substr-1 match) + (match:substring match 1)) + +(define (class-handlers class) + (fold (lambda (cl obj) + (regex-case + cl + ("^DH$" (set-style obj 'bold)) + ("^bg([BYC])$" => (compose (cut set-bg obj <>) string->symbol substr-1)) + ("^([BYC])$" => (compose (cut set-fg obj <>) string->symbol substr-1)) + (else obj))) + (empty-fmt-frame) + (string-split class #\space))) + +;;; TODO every push and pop should emit current ANSI-escape after it has run. + +(use-modules (ice-9 match)) + +;;; TODO Every clause should return a string, in the state context of a state. + +;; +-- State storage, "mutable" +;; | +- Return value, static +;; V V +;; sxml → State <fmt-stack> <string> +(define (fmt-tag tag) + (match tag + + [('a _ body ...) + (do fmt-before <- (get-attr) + (push (make-fmt-frame 'underline 'B #f)) + fmt-with <- (get-attr) + (pop) + (return-state + (string-append fmt-with (car body) fmt-before)))] + + [((or 'h1 'span) attrs nodes ...) + (do let class = (hashq-ref attrs 'class) + fmt-before <- (get-attr) + (push (class-handlers class)) + fmt-with <- (get-attr) + ret <- (fmap (cut string-append fmt-with <> fmt-before) + (fmap string-concatenate (sequence (map fmt-tag nodes)))) + (pop) + (return-state ret))] + + ;; Default rule, since the above case requires a class list + [(tag _ node nodes ...) + (fmap string-concatenate (sequence (map fmt-tag (cons node nodes))))] + + ;; Just ignore tags without children + [(tag _) (return-state "")] + + [(? string? str) (return-state str)] + + [default (return-state (format #f "[|~a|]" default))])) + +(define (parse-html-string str) + (let ((fname (tmpnam))) + (with-output-to-file fname + (lambda () (display str))) + (parse-html fname))) + +(define (text-for-num j int) + (let* ((table (vector-ref j (- int 100))) + (arr (hashq-ref table 'content)) + (text (array-ref arr 0))) + text)) + +(define (max-date vect) + (vector-fold (lambda (i accum el) (max accum (hashq-ref el 'date_updated_unix))) + 0 vect)) + +(define (main args) + (let* (((self filename pagestr) args) + (page (string->number pagestr))) + + (let ((last-updated (call-with-input-file ".last-updated" read))) + (when (> (- (current-time) last-updated) 10000) + (display "Downloading new data, please stand by...") + (system* "curl" + "-s" "http://api.texttv.nu/api/get/100-999?app=hugonikanor" + "-o" filename) + + (let* ((json (get-json filename)) + (update (max-date json))) + + (with-output-to-file ".last-updated" (lambda () (display update))) + + (vector-for-each + (lambda (i el) + (let ((num (hashq-ref el 'num)) + (text (vector-ref (hashq-ref el 'content) 0))) + (with-output-to-file (format #f "cache/~a.ansi" num) + (lambda () (-> (parse-html-string text) + fmt-tag + (run-state (list (make-fmt-frame "" "" ""))) + car display))))) + json))) + + (let ((p1 (format #f "cache/~a.ansi" page)) + (p2 (format #f "cache/~a.ansi" (1+ page)))) + (system* "paste" p1 p2)) + ))) diff --git a/monad b/monad -Subproject f1225201c9ded1078ef1f98fbf4969a8480d3b3 +Subproject a4a7edd487d4d2207829462d06c40578dc75733 diff --git a/script.scm b/script.scm deleted file mode 100755 index b43428d..0000000 --- a/script.scm +++ /dev/null @@ -1,136 +0,0 @@ -#!/usr/bin/guile \ --e main -s -!# - -(use-modules (sxml simple) - (sxml match) - (ice-9 match) - (srfi srfi-1)) - -(define (setmode . args) - "Sets display mode" - (with-output-to-string - (lambda () - (for-each display - `(#\escape #\[ ,@args #\m))))) - - -(define* (deffunc name idx - #:optional (func-pre '#{}#) (mode-pre "")) - (let ((str (gensym))) - `(define (,(symbol-append func-pre name) . ,str) - (string-append - (setmode ,mode-pre ,idx) - #; (string #\escape #\[ #\K) - (string-concatenate ,str) - (setmode 0))))) - -(define-macro (create-modes modes) - `(begin - ,@(map deffunc - modes - (iota (length modes))))) - -(define-macro (create-colors modes) - `(begin - ,@(apply append - (map (lambda (itm idx) - (list (deffunc itm idx 'fg- "0;3") - (deffunc itm idx 'bg- "0;4"))) - modes - (iota (length modes)))))) - -(create-modes (off bold dim slant underline)) -(create-colors (black red green yellow blue purple cyan white)) - -(define classmap - (match-lambda ("B" fg-blue) - ("bgB" bg-blue) - ("Y" fg-yellow) - ("C" fg-cyan) - ("DH" bold) - (_ identity))) - -(define (push item stack) (cons item stack)) -(define (pop stack) (unless (null? stack) - (car+cdr stack))) -(define (peek stack) (if (null? stack) '() (car stack))) - -(define (class-handlers class-str) - (fold compose identity - (map classmap (string-split class-str #\space)))) - -(define (fmt-sub nodes) - (string-concatenate (map fmt-tag nodes))) - -(define (fmt-tag tag) - (sxml-match tag - [(a #; (@ (class ,class)) ,text) - (underline text)] - - [(h1 (@ (class ,class)) ,nodes ...) - ((class-handlers class) - (bold (fmt-sub nodes)))] - - [(span (@ (class ,class)) ,nodes ...) - ((class-handlers class) - (fmt-sub nodes))] - - [,str (guard (string? str)) str] - - [,default (format #f "[|~a|]" default)] - )) - -;;; -;;; -;;; - -(define (fmt-tag tag) - (sxml-match tag - [(a #; (@ (class ,class)) ,text) - (underline text)] - - [(h1 (@ (class ,class)) ,nodes ...) - ((class-handlers class) - (bold (fmt-sub nodes)))] - - [(span (@ (class ,class)) ,nodes ...) - ((class-handlers class) - (fmt-sub nodes))] - - [,str (guard (string? str)) str] - - [,default (format #f "[|~a|]" default)] - )) - -(define (parse-doc sxml) - (sxml-match sxml - [(*TOP* (div #; (span (@ (class "toprow")) ,top-row ...) - ,spans ...)) - spans - ])) - -'(span (@ (class "B")) - "A" - (span (@ (class "G")) - "B") - "C") - -;; => (blue A (green B) C) -;; => set-blue A set-green B unset-green/set-blue C unset-blue - -;; push blue : print-esc -;; print A -;; push green : print-esc -;; print B -;; pop : print-esc -;; print C -;; pop : print-esc - - - - -(define (main args) - (define d (call-with-input-file "100-2.html" xml->sxml)) - (display (string-concatenate (map fmt-tag (parse-doc d)))) - (newline)) diff --git a/texttv.py b/texttv.py deleted file mode 100755 index a6cf2b1..0000000 --- a/texttv.py +++ /dev/null @@ -1,24 +0,0 @@ -#!/usr/bin/env python2 - -from httplib2 import Http -from urllib import urlencode -import json -import re -import sys - -def cleanhtml(raw_html): - cleanr = re.compile('<.*?>') - cleantext = re.sub(cleanr, '', raw_html) - return cleantext - -if len(sys.argv) > 1: - page = sys.argv[1] -else: - page = '100' - -http = Http() -url = 'http://api.texttv.nu/api/get/' + page + '?app=python' -headers = {'Accept': 'application/json', 'Accept-Language': 'en-US,en'} -response, content = http.request(url, 'GET', headers=headers) -data = json.loads(content) -print cleanhtml(data[0]['content'][0]) @@ -0,0 +1,10 @@ +#!/bin/bash + +pushd $(dirname $(realpath $0)) > /dev/null + +LD_LIBRARY_PATH=$PWD + +file=texttv.json +page=${1:- 100} + +./main.scm $file $page diff --git a/wrapper.sh b/wrapper.sh deleted file mode 100755 index 11d9236..0000000 --- a/wrapper.sh +++ /dev/null @@ -1,2 +0,0 @@ -curl http://api.texttv.nu/api/get/100?app=hugonikanor \ - | jq -j '.[0].content[0]' \ |