123456789_123456789_123456789_123456789_123456789_

Class: Nokogiri::HTML4::SAX::PushParser

Relationships & Source Files
Super Chains via Extension / Inclusion / Inheritance
Class Chain:
Instance Chain:
Inherits: Nokogiri::XML::SAX::PushParser
Defined in: ext/nokogiri/html4_sax_push_parser.c,
lib/nokogiri/html4/sax/push_parser.rb

Class Method Summary

::Nokogiri::XML::SAX::PushParser - Inherited

.new

Create a new PushParser with doc as the ::Nokogiri::HTML4::SAX Document, providing an optional file_name and encoding

Instance Attribute Summary

  • #document rw

    The Nokogiri::HTML4::SAX::Document on which the PushParser will be operating.

::Nokogiri::XML::SAX::PushParser - Inherited

#document

The ::Nokogiri::XML::SAX::Document on which the PushParser will be operating.

#options, #options=,
#replace_entities

See Document@Entity+Handling for an explanation of the behavior controlled by this flag.

#replace_entities=

See Document@Entity+Handling for an explanation of the behavior controlled by this flag.

Instance Method Summary

::Nokogiri::XML::SAX::PushParser - Inherited

#<<
#finish

Finish the parsing.

#write

Write a chunk of ::Nokogiri::XML to the PushParser.

#initialize_native

Initialize the push parser with xml_sax using filename

#native_write

Write chunk to PushParser.

Constructor Details

.new(doc = HTML4::SAX::Document.new, file_name = nil, encoding = "UTF-8") ⇒ PushParser

[ GitHub ]

  
# File 'lib/nokogiri/html4/sax/push_parser.rb', line 11

def initialize(doc = HTML4::SAX::Document.new, file_name = nil, encoding = "UTF-8")
  @document = doc
  @encoding = encoding
  @sax_parser = HTML4::SAX::Parser.new(doc, @encoding)

  ## Create our push parser context
  initialize_native(@sax_parser, file_name, encoding)
end

Instance Attribute Details

#document (rw)

The Nokogiri::HTML4::SAX::Document on which the PushParser will be operating

[ GitHub ]

  
# File 'lib/nokogiri/html4/sax/push_parser.rb', line 9

attr_accessor :document

Instance Method Details

#<<(chunk, last_chunk = false)

Alias for #write.

[ GitHub ]

  
# File 'lib/nokogiri/html4/sax/push_parser.rb', line 26

alias_method :<<, :write

#finish

Finish the parsing. This method is only necessary for Nokogiri::HTML4::SAX::Document#end_document to be called.

[ GitHub ]

  
# File 'lib/nokogiri/html4/sax/push_parser.rb', line 31

def finish
  write("", true)
end

#initialize_native(rb_xml_sax, rb_filename, encoding) (private)

Initialize the push parser with xml_sax using filename

[ GitHub ]

  
# File 'ext/nokogiri/html4_sax_push_parser.c', line 42

static VALUE
noko_html4_sax_push_parser__initialize_native(
  VALUE self,
  VALUE rb_xml_sax,
  VALUE rb_filename,
  VALUE encoding
)
{
  htmlSAXHandlerPtr sax;
  const char *filename = NULL;
  htmlParserCtxtPtr ctx;
  xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;

  sax = noko_xml_sax_parser_unwrap(rb_xml_sax);

  if (rb_filename != Qnil) { filename = StringValueCStr(rb_filename); }

  if (!NIL_P(encoding)) {
    enc = xmlParseCharEncoding(StringValueCStr(encoding));
    if (enc == XML_CHAR_ENCODING_ERROR) {
      rb_raise(rb_eArgError, "Unsupported Encoding");
    }
  }

  ctx = htmlCreatePushParserCtxt(
          sax,
          NULL,
          NULL,
          0,
          filename,
          enc
        );
  if (ctx == NULL) {
    rb_raise(rb_eRuntimeError, "Could not create a parser context");
  }

  ctx->userData = ctx;
  ctx->_private = (void *)rb_xml_sax;

  DATA_PTR(self) = ctx;
  return self;
}

#native_write(rb_chunk, rb_last_chunk) (private)

Write chunk to PushParser. last_chunk triggers the end_document handle

[ GitHub ]

  
# File 'ext/nokogiri/html4_sax_push_parser.c', line 8

static VALUE
noko_html4_sax_push_parser__native_write(VALUE self, VALUE rb_chunk, VALUE rb_last_chunk)
{
  xmlParserCtxtPtr ctx;
  const char *chunk = NULL;
  int size = 0;
  int status = 0;
  libxmlStructuredErrorHandlerState handler_state;

  ctx = noko_xml_sax_push_parser_unwrap(self);

  if (Qnil != rb_chunk) {
    chunk = StringValuePtr(rb_chunk);
    size = (int)RSTRING_LEN(rb_chunk);
  }

  noko__structured_error_func_save_and_set(&handler_state, NULL, NULL);

  status = htmlParseChunk(ctx, chunk, size, Qtrue == rb_last_chunk ? 1 : 0);

  noko__structured_error_func_restore(&handler_state);

  if ((status != 0) && !(xmlCtxtGetOptions(ctx) & XML_PARSE_RECOVER)) {
    // TODO: there appear to be no tests for this block
    xmlErrorConstPtr e = xmlCtxtGetLastError(ctx);
    noko__error_raise(NULL, e);
  }

  return self;
}

#write(chunk, last_chunk = false) Also known as: #<<

Write a chunk of ::Nokogiri::HTML to the PushParser. Any callback methods that can be called will be called immediately.

[ GitHub ]

  
# File 'lib/nokogiri/html4/sax/push_parser.rb', line 23

def write(chunk, last_chunk = false)
  native_write(chunk, last_chunk)
end