123456789_123456789_123456789_123456789_123456789_

Class: Nokogiri::XML::SAX::PushParser

Relationships & Source Files
Extension / Inclusion / Inheritance Descendants
Subclasses:
Inherits: Object
Defined in: lib/nokogiri/xml/sax/push_parser.rb,
ext/nokogiri/xml_sax_push_parser.c

Overview

PushParser can parse a document that is fed to it manually. It must be given a Document object which will be called with ::Nokogiri::XML::SAX events as the document is being parsed.

Calling #<< writes ::Nokogiri::XML to the parser, calling any ::Nokogiri::XML::SAX callbacks it can.

#finish tells the parser that the document is finished and calls the end_document ::Nokogiri::XML::SAX method.

Example:

parser = PushParser.new(Class.new(XML::SAX::Document) {
  def start_document
    puts "start document called"
  end
}.new)
parser << "<div>hello<"
parser << "/div>"
parser.finish

Class Method Summary

Instance Attribute Summary

Instance Method Summary

Constructor Details

.new(doc = XML::SAX::Document.new, file_name = nil, encoding = "UTF-8") ⇒ PushParser

Create a new PushParser with doc as the ::Nokogiri::XML::SAX Document, providing an optional file_name and encoding

[ GitHub ]

  
# File 'lib/nokogiri/xml/sax/push_parser.rb', line 35

def initialize(doc = XML::SAX::Document.new, file_name = nil, encoding = "UTF-8")
  @document = doc
  @encoding = encoding
  @sax_parser = XML::SAX::Parser.new(doc)

  ## Create our push parser context
  initialize_native(@sax_parser, file_name)
end

Instance Attribute Details

#document (rw)

The Document on which the PushParser will be operating

[ GitHub ]

  
# File 'lib/nokogiri/xml/sax/push_parser.rb', line 30

attr_accessor :document

#options (rw)

[ GitHub ]

  
# File 'ext/nokogiri/xml_sax_push_parser.c', line 103

static VALUE
noko_xml_sax_push_parser__options_get(VALUE self)
{
  xmlParserCtxtPtr ctx;

  ctx = noko_xml_sax_push_parser_unwrap(self);

  return INT2NUM(xmlCtxtGetOptions(ctx));
}

#options=(options) (rw)

[ GitHub ]

  
# File 'ext/nokogiri/xml_sax_push_parser.c', line 113

static VALUE
noko_xml_sax_push_parser__options_set(VALUE self, VALUE options)
{
  int error;
  xmlParserCtxtPtr ctx;

  ctx = noko_xml_sax_push_parser_unwrap(self);

  error = xmlCtxtSetOptions(ctx, (int)NUM2INT(options));
  if (error) {
    rb_raise(rb_eRuntimeError, "Cannot set XML parser context options (%x)", error);
  }

  return Qnil;
}

#replace_entities (rw)

See Document@Entity+Handling for an explanation of the behavior controlled by this flag.

Returns

(Boolean) Value of the parse option. (Default false)

This option is perhaps misnamed by the libxml2 author, since it controls resolution and not replacement.

[ GitHub ]

  
# File 'ext/nokogiri/xml_sax_push_parser.c', line 140

static VALUE
noko_xml_sax_push_parser__replace_entities_get(VALUE self)
{
  xmlParserCtxtPtr ctxt = noko_xml_sax_push_parser_unwrap(self);

  if (xmlCtxtGetOptions(ctxt) & XML_PARSE_NOENT) {
    return Qtrue;
  } else {
    return Qfalse;
  }
}

#replace_entities=(value) (rw)

See Document@Entity+Handling for an explanation of the behavior controlled by this flag.

Parameters
  • value (Boolean) Whether external parsed entities will be resolved.

It is UNSAFE to set this option to true when parsing untrusted documents. The option defaults to false for this reason.

This option is perhaps misnamed by the libxml2 author, since it controls resolution and not replacement.

[ GitHub ]

  
# File 'ext/nokogiri/xml_sax_push_parser.c', line 167

static VALUE
noko_xml_sax_push_parser__replace_entities_set(VALUE self, VALUE value)
{
  int error;
  xmlParserCtxtPtr ctxt = noko_xml_sax_push_parser_unwrap(self);

  if (RB_TEST(value)) {
    error = xmlCtxtSetOptions(ctxt, xmlCtxtGetOptions(ctxt) | XML_PARSE_NOENT);
  } else {
    error = xmlCtxtSetOptions(ctxt, xmlCtxtGetOptions(ctxt) & ~XML_PARSE_NOENT);
  }

  if (error) {
    rb_raise(rb_eRuntimeError, "failed to set parser context options (%x)", error);
  }

  return value;
}

Instance Method Details

#<<(chunk, last_chunk = false)

Alias for #write.

[ GitHub ]

  
# File 'lib/nokogiri/xml/sax/push_parser.rb', line 50

alias_method :<<, :write

#finish

Finish the parsing. This method is only necessary for Document#end_document to be called.

⚠ Note that empty documents are treated as an error when using the libxml2-based implementation (CRuby), but are fine when using the Xerces-based implementation (JRuby).

[ GitHub ]

  
# File 'lib/nokogiri/xml/sax/push_parser.rb', line 58

def finish
  write("", true)
end

#initialize_native(xml_sax, filename) (private)

Initialize the push parser with xml_sax using filename

[ GitHub ]

  
# File 'ext/nokogiri/xml_sax_push_parser.c', line 74

static VALUE
noko_xml_sax_push_parser__initialize_native(VALUE self, VALUE _xml_sax, VALUE _filename)
{
  xmlSAXHandlerPtr sax;
  const char *filename = NULL;
  xmlParserCtxtPtr ctx;

  sax = noko_xml_sax_parser_unwrap(_xml_sax);

  if (_filename != Qnil) { filename = StringValueCStr(_filename); }

  ctx = xmlCreatePushParserCtxt(
          sax,
          NULL,
          NULL,
          0,
          filename
        );
  if (ctx == NULL) {
    rb_raise(rb_eRuntimeError, "Could not create a parser context");
  }

  ctx->userData = ctx;
  ctx->_private = (void *)_xml_sax;

  DATA_PTR(self) = ctx;
  return self;
}

#native_write(_chunk, _last_chunk) (private)

Write chunk to PushParser. last_chunk triggers the end_document handle

[ GitHub ]

  
# File 'ext/nokogiri/xml_sax_push_parser.c', line 42

static VALUE
noko_xml_sax_push_parser__native_write(VALUE self, VALUE _chunk, VALUE _last_chunk)
{
  xmlParserCtxtPtr ctx;
  const char *chunk  = NULL;
  int size            = 0;

  ctx = noko_xml_sax_push_parser_unwrap(self);

  if (Qnil != _chunk) {
    chunk = StringValuePtr(_chunk);
    size = (int)RSTRING_LEN(_chunk);
  }

  xmlSetStructuredErrorFunc(NULL, NULL);

  if (xmlParseChunk(ctx, chunk, size, Qtrue == _last_chunk ? 1 : 0)) {
    if (!(xmlCtxtGetOptions(ctx) & XML_PARSE_RECOVER)) {
      xmlErrorConstPtr e = xmlCtxtGetLastError(ctx);
      noko__error_raise(NULL, e);
    }
  }

  return self;
}

#write(chunk, last_chunk = false) Also known as: #<<

Write a chunk of ::Nokogiri::XML to the PushParser. Any callback methods that can be called will be called immediately.

[ GitHub ]

  
# File 'lib/nokogiri/xml/sax/push_parser.rb', line 47

def write(chunk, last_chunk = false)
  native_write(chunk, last_chunk)
end