aboutsummaryrefslogtreecommitdiff
path: root/html.c
diff options
context:
space:
mode:
Diffstat (limited to 'html.c')
-rw-r--r--html.c101
1 files changed, 0 insertions, 101 deletions
diff --git a/html.c b/html.c
deleted file mode 100644
index 79d8615..0000000
--- a/html.c
+++ /dev/null
@@ -1,101 +0,0 @@
-#include <gumbo.h>
-#include <libguile.h>
-
-#include <sys/mman.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <fcntl.h>
-
-SCM handle_node (GumboNode* n) {
-
- SCM dummytagname, scm_children = SCM_EOL;
- GumboNode* ch;
- switch (n->type) {
- case GUMBO_NODE_DOCUMENT:
- ;
- GumboDocument doc = n->v.document;
- for (int i = 0; i < doc.children.length; i++) {
- ch = doc.children.data[i];
- scm_children = scm_cons (handle_node(ch), scm_children);
- }
- scm_children = scm_reverse_x(scm_children, SCM_EOL);
-
- return scm_cons ( scm_from_utf8_symbol("*TOP*"),
- scm_cons ( SCM_BOOL_F, scm_children));
-
- case GUMBO_NODE_TEMPLATE:
- case GUMBO_NODE_ELEMENT:
- ;
- SCM attributes;
-
- GumboElement el = n->v.element;
- GumboVector attr = el.attributes;
-
- attributes = scm_c_eval_string("(make-hash-table)");
-
- GumboAttribute* a;
- for (int i = 0; i < attr.length; i++) {
- a = attr.data[i];
- scm_hashq_set_x (attributes,
- scm_from_utf8_symbol(a->name),
- scm_from_utf8_stringn(a->value, strlen(a->value)));
- }
-
- for (int i = 0; i < el.children.length; i++) {
- ch = el.children.data[i];
- scm_children = scm_cons (handle_node(ch), scm_children);
- }
- scm_children = scm_reverse_x(scm_children, SCM_EOL);
-
- return scm_cons (scm_from_utf8_symbol(gumbo_normalized_tagname(el.tag)),
- scm_cons (attributes, scm_children));
-
- case GUMBO_NODE_TEXT:
- case GUMBO_NODE_WHITESPACE:
- return scm_from_utf8_stringn(n->v.text.text, strlen(n->v.text.text));
-
- case GUMBO_NODE_CDATA:
- dummytagname = scm_from_utf8_symbol("cdata");
- goto rettext;
- case GUMBO_NODE_COMMENT:
- dummytagname = scm_from_utf8_symbol("comment");
-
-rettext:
- return scm_cons (dummytagname,
- scm_cons ( SCM_BOOL_F,
- scm_from_utf8_stringn(n->v.text.text, strlen(n->v.text.text))));
- }
- return SCM_BOOL_F;
-}
-
-SCM_DEFINE (parse_html, "parse-html", 1, 0, 0,
- (SCM filename),
- "")
-{
- char* fname = scm_to_utf8_string(filename);
- int fd = open(fname, O_RDONLY);
- struct stat sb;
- fstat(fd, &sb);
- const char* buf = mmap(NULL, sb.st_size, PROT_READ, MAP_SHARED, fd, 0);
-
-
- if (buf == NULL) {
- fprintf(stderr, "Error MMAPing file\n");
- return scm_from_utf8_symbol("mmap-err");
- }
-
- GumboOutput* output = gumbo_parse(buf);
-
- SCM ret = handle_node (output->root);
- // GumboNode* p = n->parent;
-
- gumbo_destroy_output(&kGumboDefaultOptions, output);
-
- return ret;
-}
-
-void init_html (void) {
-#ifndef SCM_MAGIC_SNARFER
-#include "html.x"
-#endif
-}