123456789_123456789_123456789_123456789_123456789_

Class: Nokogiri::XML::SAX::PushParser

Relationships & Source Files
Extension / Inclusion / Inheritance Descendants
Subclasses:
Inherits: Object
Defined in: lib/nokogiri/xml/sax/push_parser.rb,
ext/nokogiri/xml_sax_push_parser.c

Overview

PushParser can parse a document that is fed to it manually. It must be given a Document object which will be called with ::Nokogiri::XML::SAX events as the document is being parsed.

Calling #<< writes ::Nokogiri::XML to the parser, calling any ::Nokogiri::XML::SAX callbacks it can.

#finish tells the parser that the document is finished and calls the end_document ::Nokogiri::XML::SAX method.

Example:

parser = PushParser.new(Class.new(XML::SAX::Document) {
  def start_document
    puts "start document called"
  end
}.new)
parser << "<div>hello<"
parser << "/div>"
parser.finish

Class Method Summary

Instance Attribute Summary

Instance Method Summary

Constructor Details

.new(doc = XML::SAX::Document.new, file_name = nil, encoding = "UTF-8") ⇒ PushParser

Create a new PushParser with doc as the ::Nokogiri::XML::SAX Document, providing an optional file_name and encoding

[ GitHub ]

  
# File 'lib/nokogiri/xml/sax/push_parser.rb', line 35

def initialize(doc = XML::SAX::Document.new, file_name = nil, encoding = "UTF-8")
  @document = doc
  @encoding = encoding
  @sax_parser = XML::SAX::Parser.new(doc)

  ## Create our push parser context
  initialize_native(@sax_parser, file_name)
end

Instance Attribute Details

#document (rw)

The Document on which the PushParser will be operating

[ GitHub ]

  
# File 'lib/nokogiri/xml/sax/push_parser.rb', line 30

attr_accessor :document

#options (rw)

[ GitHub ]

  
# File 'ext/nokogiri/xml_sax_push_parser.c', line 105

static VALUE
get_options(VALUE self)
{
  xmlParserCtxtPtr ctx;

  ctx = noko_xml_sax_push_parser_unwrap(self);

  return INT2NUM(ctx->options);
}

#options=(options) (rw)

[ GitHub ]

  
# File 'ext/nokogiri/xml_sax_push_parser.c', line 115

static VALUE
set_options(VALUE self, VALUE options)
{
  xmlParserCtxtPtr ctx;

  ctx = noko_xml_sax_push_parser_unwrap(self);

  if (xmlCtxtUseOptions(ctx, (int)NUM2INT(options)) != 0) {
    rb_raise(rb_eRuntimeError, "Cannot set XML parser context options");
  }

  return Qnil;
}

#replace_entities (rw)

Should this parser replace entities? &amp; will get converted to ‘&’ if set to true

[ GitHub ]

  
# File 'ext/nokogiri/xml_sax_push_parser.c', line 136

static VALUE
get_replace_entities(VALUE self)
{
  xmlParserCtxtPtr ctx;

  ctx = noko_xml_sax_push_parser_unwrap(self);

  if (0 == ctx->replaceEntities) {
    return Qfalse;
  } else {
    return Qtrue;
  }
}

#replace_entities=(boolean) (rw)

Should this parser replace entities? &amp; will get converted to ‘&’ if set to true

[ GitHub ]

  
# File 'ext/nokogiri/xml_sax_push_parser.c', line 157

static VALUE
set_replace_entities(VALUE self, VALUE value)
{
  xmlParserCtxtPtr ctx;

  ctx = noko_xml_sax_push_parser_unwrap(self);

  if (Qfalse == value) {
    ctx->replaceEntities = 0;
  } else {
    ctx->replaceEntities = 1;
  }

  return value;
}

Instance Method Details

#<<(chunk, last_chunk = false)

Alias for #write.

[ GitHub ]

  
# File 'lib/nokogiri/xml/sax/push_parser.rb', line 50

alias_method :<<, :write

#finish

Finish the parsing. This method is only necessary for Document#end_document to be called.

[ GitHub ]

  
# File 'lib/nokogiri/xml/sax/push_parser.rb', line 55

def finish
  write("", true)
end

#initialize_native(xml_sax, filename) (private)

Initialize the push parser with xml_sax using filename

[ GitHub ]

  
# File 'ext/nokogiri/xml_sax_push_parser.c', line 76

static VALUE
initialize_native(VALUE self, VALUE _xml_sax, VALUE _filename)
{
  xmlSAXHandlerPtr sax;
  const char *filename = NULL;
  xmlParserCtxtPtr ctx;

  sax = noko_sax_handler_unwrap(_xml_sax);

  if (_filename != Qnil) { filename = StringValueCStr(_filename); }

  ctx = xmlCreatePushParserCtxt(
          sax,
          NULL,
          NULL,
          0,
          filename
        );
  if (ctx == NULL) {
    rb_raise(rb_eRuntimeError, "Could not create a parser context");
  }

  ctx->userData = NOKOGIRI_SAX_TUPLE_NEW(ctx, self);

  ctx->sax2 = 1;
  DATA_PTR(self) = ctx;
  return self;
}

#native_write(chunk, last_chunk) (private)

Write chunk to PushParser. last_chunk triggers the end_document handle

[ GitHub ]

  
# File 'ext/nokogiri/xml_sax_push_parser.c', line 43

static VALUE
native_write(VALUE self, VALUE _chunk, VALUE _last_chunk)
{
  xmlParserCtxtPtr ctx;
  const char *chunk  = NULL;
  int size            = 0;


  ctx = noko_xml_sax_push_parser_unwrap(self);

  if (Qnil != _chunk) {
    chunk = StringValuePtr(_chunk);
    size = (int)RSTRING_LEN(_chunk);
  }

  xmlSetStructuredErrorFunc(NULL, NULL);

  if (xmlParseChunk(ctx, chunk, size, Qtrue == _last_chunk ? 1 : 0)) {
    if (!(ctx->options & XML_PARSE_RECOVER)) {
      xmlErrorConstPtr e = xmlCtxtGetLastError(ctx);
      Nokogiri_error_raise(NULL, e);
    }
  }

  return self;
}

#write(chunk, last_chunk = false) Also known as: #<<

Write a chunk of ::Nokogiri::XML to the PushParser. Any callback methods that can be called will be called immediately.

[ GitHub ]

  
# File 'lib/nokogiri/xml/sax/push_parser.rb', line 47

def write(chunk, last_chunk = false)
  native_write(chunk, last_chunk)
end