Class: Nokogiri::XML::SAX::PushParser
Relationships & Source Files | |
Extension / Inclusion / Inheritance Descendants | |
Subclasses:
|
|
Inherits: | Object |
Defined in: | lib/nokogiri/xml/sax/push_parser.rb, ext/nokogiri/xml_sax_push_parser.c |
Overview
PushParser
can parse a document that is fed to it manually. It must be given a Document
object which will be called with ::Nokogiri::XML::SAX
events as the document is being parsed.
Calling #<< writes ::Nokogiri::XML
to the parser, calling any ::Nokogiri::XML::SAX
callbacks it can.
#finish tells the parser that the document is finished and calls the end_document ::Nokogiri::XML::SAX
method.
Example:
parser = PushParser.new(Class.new(XML::SAX::Document) {
def start_document
puts "start document called"
end
}.new)
parser << "<div>hello<"
parser << "/div>"
parser.finish
Class Method Summary
-
.new(doc = XML::SAX::Document.new, file_name = nil, encoding = "UTF-8") ⇒ PushParser
constructor
Create a new
PushParser
withdoc
as the::Nokogiri::XML::SAX
Document, providing an optionalfile_name
andencoding
Instance Attribute Summary
-
#document
rw
The
Document
on which thePushParser
will be operating. - #options rw
- #options=(options) rw
-
#replace_entities
rw
See Document@Entity+Handling for an explanation of the behavior controlled by this flag.
-
#replace_entities=(value)
rw
See Document@Entity+Handling for an explanation of the behavior controlled by this flag.
Instance Method Summary
-
#<<(chunk, last_chunk = false)
Alias for #write.
-
#finish
Finish the parsing.
-
#write(chunk, last_chunk = false)
(also: #<<)
Write a
chunk
of::Nokogiri::XML
to thePushParser
. -
#initialize_native(xml_sax, filename)
private
Initialize the push parser with
xml_sax
usingfilename
-
#native_write(_chunk, _last_chunk)
private
Write
chunk
toPushParser
.
Constructor Details
.new(doc = XML::SAX::Document.new, file_name = nil, encoding = "UTF-8") ⇒ PushParser
Create a new PushParser
with doc
as the ::Nokogiri::XML::SAX
Document, providing an optional file_name
and encoding
Instance Attribute Details
#document (rw)
The Document
on which the PushParser
will be operating
# File 'lib/nokogiri/xml/sax/push_parser.rb', line 30
attr_accessor :document
#options (rw)
[ GitHub ]# File 'ext/nokogiri/xml_sax_push_parser.c', line 103
static VALUE noko_xml_sax_push_parser__options_get(VALUE self) { xmlParserCtxtPtr ctx; ctx = noko_xml_sax_push_parser_unwrap(self); return INT2NUM(xmlCtxtGetOptions(ctx)); }
#options=(options) (rw)
[ GitHub ]# File 'ext/nokogiri/xml_sax_push_parser.c', line 113
static VALUE noko_xml_sax_push_parser__options_set(VALUE self, VALUE options) { int error; xmlParserCtxtPtr ctx; ctx = noko_xml_sax_push_parser_unwrap(self); error = xmlCtxtSetOptions(ctx, (int)NUM2INT(options)); if (error) { rb_raise(rb_eRuntimeError, "Cannot set XML parser context options (%x)", error); } return Qnil; }
#replace_entities (rw)
See Document@Entity+Handling for an explanation of the behavior controlled by this flag.
- Returns
-
(Boolean) Value of the parse option. (Default
false
)
This option is perhaps misnamed by the libxml2 author, since it controls resolution and not replacement.
# File 'ext/nokogiri/xml_sax_push_parser.c', line 140
static VALUE noko_xml_sax_push_parser__replace_entities_get(VALUE self) { xmlParserCtxtPtr ctxt = noko_xml_sax_push_parser_unwrap(self); if (xmlCtxtGetOptions(ctxt) & XML_PARSE_NOENT) { return Qtrue; } else { return Qfalse; } }
#replace_entities=(value) (rw)
See Document@Entity+Handling for an explanation of the behavior controlled by this flag.
- Parameters
-
value
(Boolean) Whether external parsed entities will be resolved.
⚠ It is UNSAFE to set this option to true
when parsing untrusted documents. The option defaults to false
for this reason.
This option is perhaps misnamed by the libxml2 author, since it controls resolution and not replacement.
# File 'ext/nokogiri/xml_sax_push_parser.c', line 167
static VALUE noko_xml_sax_push_parser__replace_entities_set(VALUE self, VALUE value) { int error; xmlParserCtxtPtr ctxt = noko_xml_sax_push_parser_unwrap(self); if (RB_TEST(value)) { error = xmlCtxtSetOptions(ctxt, xmlCtxtGetOptions(ctxt) | XML_PARSE_NOENT); } else { error = xmlCtxtSetOptions(ctxt, xmlCtxtGetOptions(ctxt) & ~XML_PARSE_NOENT); } if (error) { rb_raise(rb_eRuntimeError, "failed to set parser context options (%x)", error); } return value; }
Instance Method Details
#<<(chunk, last_chunk = false)
Alias for #write.
# File 'lib/nokogiri/xml/sax/push_parser.rb', line 50
alias_method :<<, :write
#finish
Finish the parsing. This method is only necessary for Document#end_document to be called.
⚠ Note that empty documents are treated as an error when using the libxml2-based implementation (CRuby), but are fine when using the Xerces-based implementation (JRuby).
# File 'lib/nokogiri/xml/sax/push_parser.rb', line 58
def finish write("", true) end
#initialize_native(xml_sax, filename) (private)
Initialize the push parser with xml_sax
using filename
# File 'ext/nokogiri/xml_sax_push_parser.c', line 74
static VALUE noko_xml_sax_push_parser__initialize_native(VALUE self, VALUE _xml_sax, VALUE _filename) { xmlSAXHandlerPtr sax; const char *filename = NULL; xmlParserCtxtPtr ctx; sax = noko_xml_sax_parser_unwrap(_xml_sax); if (_filename != Qnil) { filename = StringValueCStr(_filename); } ctx = xmlCreatePushParserCtxt( sax, NULL, NULL, 0, filename ); if (ctx == NULL) { rb_raise(rb_eRuntimeError, "Could not create a parser context"); } ctx->userData = ctx; ctx->_private = (void *)_xml_sax; DATA_PTR(self) = ctx; return self; }
#native_write(_chunk, _last_chunk) (private)
Write chunk
to PushParser
. last_chunk
triggers the end_document handle
# File 'ext/nokogiri/xml_sax_push_parser.c', line 42
static VALUE noko_xml_sax_push_parser__native_write(VALUE self, VALUE _chunk, VALUE _last_chunk) { xmlParserCtxtPtr ctx; const char *chunk = NULL; int size = 0; ctx = noko_xml_sax_push_parser_unwrap(self); if (Qnil != _chunk) { chunk = StringValuePtr(_chunk); size = (int)RSTRING_LEN(_chunk); } xmlSetStructuredErrorFunc(NULL, NULL); if (xmlParseChunk(ctx, chunk, size, Qtrue == _last_chunk ? 1 : 0)) { if (!(xmlCtxtGetOptions(ctx) & XML_PARSE_RECOVER)) { xmlErrorConstPtr e = xmlCtxtGetLastError(ctx); noko__error_raise(NULL, e); } } return self; }
#write(chunk, last_chunk = false) Also known as: #<<
Write a chunk
of ::Nokogiri::XML
to the PushParser
. Any callback methods that can be called will be called immediately.
# File 'lib/nokogiri/xml/sax/push_parser.rb', line 47
def write(chunk, last_chunk = false) native_write(chunk, last_chunk) end