Skip to content
This repository has been archived by the owner on Aug 26, 2023. It is now read-only.

Commit

Permalink
Merge pull request #46 from stevecheckoway/report-parse-errors
Browse files Browse the repository at this point in the history
Expose Gumbo parse errors to Nokogiri document.
  • Loading branch information
rubys authored Aug 5, 2016
2 parents 1f8c668 + e361b49 commit e9f5fe4
Show file tree
Hide file tree
Showing 2 changed files with 51 additions and 7 deletions.
51 changes: 44 additions & 7 deletions ext/nokogumboc/nokogumbo.c
Original file line number Diff line number Diff line change
Expand Up @@ -20,9 +20,12 @@

#include <ruby.h>
#include <gumbo.h>
#include <error.h>
#include <parser.h>

// class constants
static VALUE Document;
static VALUE XMLSyntaxError;

#ifdef NGLIB
#include <nokogiri.h>
Expand Down Expand Up @@ -182,10 +185,10 @@ static xmlNodePtr walk_tree(xmlDocPtr document, GumboNode *node) {

// Parse a string using gumbo_parse into a Nokogiri document
static VALUE parse(VALUE self, VALUE string) {
GumboOutput *output = gumbo_parse_with_options(
&kGumboDefaultOptions, RSTRING_PTR(string),
(size_t) RSTRING_LEN(string)
);
const GumboOptions *options = &kGumboDefaultOptions;
const char *input = RSTRING_PTR(string);
size_t input_len = RSTRING_LEN(string);
GumboOutput *output = gumbo_parse_with_options(options, input, input_len);
xmlDocPtr doc = xmlNewDoc(CONST_CAST "1.0");
#ifdef NGLIB
doc->type = XML_HTML_DOCUMENT_NODE;
Expand All @@ -210,9 +213,42 @@ static VALUE parse(VALUE self, VALUE string) {
xmlAddChild((xmlNodePtr)doc, node);
}
}
gumbo_destroy_output(&kGumboDefaultOptions, output);

return Nokogiri_wrap_xml_document(Document, doc);
VALUE rdoc = Nokogiri_wrap_xml_document(Document, doc);

// Add parse errors to rdoc.
if (output->errors.length) {
GumboVector *errors = &output->errors;
GumboParser parser = { ._options = options };
GumboStringBuffer msg;
VALUE rerrors = rb_ary_new2(errors->length);

gumbo_string_buffer_init(&parser, &msg);
for (int i=0; i < errors->length; i++) {
GumboError *err = errors->data[i];
gumbo_string_buffer_clear(&parser, &msg);
gumbo_caret_diagnostic_to_string(&parser, err, input, &msg);
VALUE err_str = rb_str_new(msg.data, msg.length);
VALUE syntax_error = rb_class_new_instance(1, &err_str, XMLSyntaxError);
rb_iv_set(syntax_error, "@domain", INT2NUM(1)); // XML_FROM_PARSER
rb_iv_set(syntax_error, "@code", INT2NUM(1)); // XML_ERR_INTERNAL_ERROR
rb_iv_set(syntax_error, "@level", INT2NUM(2)); // XML_ERR_ERROR
rb_iv_set(syntax_error, "@file", Qnil);
rb_iv_set(syntax_error, "@line", INT2NUM(err->position.line));
rb_iv_set(syntax_error, "@str1", Qnil);
rb_iv_set(syntax_error, "@str2", Qnil);
rb_iv_set(syntax_error, "@str3", Qnil);
rb_iv_set(syntax_error, "@int1", INT2NUM(err->type));
rb_iv_set(syntax_error, "@column", INT2NUM(err->position.column));
rb_ary_push(rerrors, syntax_error);
}
rb_iv_set(rdoc, "@errors", rerrors);
gumbo_string_buffer_destroy(&parser, &msg);
}

gumbo_destroy_output(options, output);

return rdoc;
}

// Initialize the Nokogumbo class and fetch constants we will use later
Expand All @@ -224,10 +260,11 @@ void Init_nokogumboc() {
VALUE Nokogiri = rb_const_get(rb_cObject, rb_intern("Nokogiri"));
VALUE HTML = rb_const_get(Nokogiri, rb_intern("HTML"));
Document = rb_const_get(HTML, rb_intern("Document"));
VALUE XML = rb_const_get(Nokogiri, rb_intern("XML"));
XMLSyntaxError = rb_const_get(XML, rb_intern("SyntaxError"));

#ifndef NGLIB
// more class constants
VALUE XML = rb_const_get(Nokogiri, rb_intern("XML"));
Element = rb_const_get(XML, rb_intern("Element"));
Text = rb_const_get(XML, rb_intern("Text"));
CDATA = rb_const_get(XML, rb_intern("CDATA"));
Expand Down
7 changes: 7 additions & 0 deletions test-nokogumbo.rb
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,13 @@ def test_root_comments
assert_equal ["html", "comment", "html", "comment"], doc.children.map(&:name)
end

def test_parse_errors
doc = Nokogiri::HTML5("<!DOCTYPE html><html><!-- -- --></a>")
assert_equal doc.errors.length, 2
doc = Nokogiri::HTML5("<!DOCTYPE html><html>")
assert_empty doc.errors
end

private

def buffer
Expand Down

0 comments on commit e9f5fe4

Please sign in to comment.