#include #include #include #include #include #include SCM handle_node (GumboNode* n) { SCM dummytagname, scm_children = SCM_EOL; GumboNode* ch; switch (n->type) { case GUMBO_NODE_DOCUMENT: ; GumboDocument doc = n->v.document; for (int i = 0; i < doc.children.length; i++) { ch = doc.children.data[i]; scm_children = scm_cons (handle_node(ch), scm_children); } scm_children = scm_reverse_x(scm_children, SCM_EOL); return scm_cons ( scm_from_utf8_symbol("*TOP*"), scm_cons ( SCM_BOOL_F, scm_children)); case GUMBO_NODE_TEMPLATE: case GUMBO_NODE_ELEMENT: ; SCM attributes; GumboElement el = n->v.element; GumboVector attr = el.attributes; attributes = scm_c_eval_string("(make-hash-table)"); GumboAttribute* a; for (int i = 0; i < attr.length; i++) { a = attr.data[i]; scm_hashq_set_x (attributes, scm_from_utf8_symbol(a->name), scm_from_utf8_stringn(a->value, strlen(a->value))); } for (int i = 0; i < el.children.length; i++) { ch = el.children.data[i]; scm_children = scm_cons (handle_node(ch), scm_children); } scm_children = scm_reverse_x(scm_children, SCM_EOL); return scm_cons (scm_from_utf8_symbol(gumbo_normalized_tagname(el.tag)), scm_cons (attributes, scm_children)); case GUMBO_NODE_TEXT: case GUMBO_NODE_WHITESPACE: return scm_from_utf8_stringn(n->v.text.text, strlen(n->v.text.text)); case GUMBO_NODE_CDATA: dummytagname = scm_from_utf8_symbol("cdata"); goto rettext; case GUMBO_NODE_COMMENT: dummytagname = scm_from_utf8_symbol("comment"); rettext: return scm_cons (dummytagname, scm_cons ( SCM_BOOL_F, scm_from_utf8_stringn(n->v.text.text, strlen(n->v.text.text)))); } return SCM_BOOL_F; } SCM_DEFINE (parse_html, "parse-html", 1, 0, 0, (SCM filename), "") { char* fname = scm_to_utf8_string(filename); int fd = open(fname, O_RDONLY); struct stat sb; fstat(fd, &sb); const char* buf = mmap(NULL, sb.st_size, PROT_READ, MAP_SHARED, fd, 0); if (buf == NULL) { fprintf(stderr, "Error MMAPing file\n"); return scm_from_utf8_symbol("mmap-err"); } GumboOutput* output = gumbo_parse(buf); SCM ret = handle_node (output->root); // GumboNode* p = n->parent; gumbo_destroy_output(&kGumboDefaultOptions, output); return ret; } void init_html (void) { #ifndef SCM_MAGIC_SNARFER #include "html.x" #endif }