Class: Psych::Parser
| Relationships & Source Files | |
| Namespace Children | |
|
Classes:
| |
| Inherits: | Object |
| Defined in: | ext/psych/lib/psych/parser.rb, ext/psych/psych_parser.c |
Overview
YAML event parser class. This class parses a YAML document and calls
events on the handler that is passed to the constructor. The events can
be used for things such as constructing a YAML AST or deserializing YAML
documents. It can even be fed back to Emitter to emit the same
document that was parsed.
See Handler for documentation on the events that Parser emits.
Here is an example that prints out ever scalar found in a YAML document:
# Handler for detecting scalar values
class ScalarHandler < Psych::Handler
def scalar value, anchor, tag, plain, quoted, style
puts value
end
end
parser = Psych::Parser.new(ScalarHandler.new)
parser.parse(yaml_document)
Here is an example that feeds the parser back in to Emitter. The
YAML document is read from STDIN and written back out to STDERR:
parser = Psych::Parser.new(Psych::Emitter.new($stderr))
parser.parse($stdin)
::Psych uses Parser in combination with TreeBuilder to
construct an AST of the parsed YAML document.
Constant Summary
-
ANY =
# File 'ext/psych/psych_parser.c', line 544
Let the parser choose the encoding
Any encoding
-
BOM =
private
# File 'ext/psych/lib/psych/parser.rb', line 67{ Encoding::UTF_8 => "\u{FEFF}".freeze, Encoding::UTF_16LE => "\u{FEFF}".encode(Encoding::UTF_16LE).freeze, Encoding::UTF_16BE => "\u{FEFF}".encode(Encoding::UTF_16BE).freeze, Encoding::UTF_32LE => "\u{FEFF}".encode(Encoding::UTF_32LE).freeze, Encoding::UTF_32BE => "\u{FEFF}".encode(Encoding::UTF_32BE).freeze, }.freeze -
UTF16BE =
# File 'ext/psych/psych_parser.c', line 553
UTF-16-BE Encoding with BOM
INT2NUM(YAML_UTF16BE_ENCODING)
-
UTF16LE =
# File 'ext/psych/psych_parser.c', line 550
UTF-16-LE Encoding with BOM
INT2NUM(YAML_UTF16LE_ENCODING)
-
UTF8 =
# File 'ext/psych/psych_parser.c', line 547
UTF-8 Encoding
INT2NUM(YAML_UTF8_ENCODING)
Class Method Summary
-
.new(handler = Handler.new) ⇒ Parser
constructor
Creates a new
Parserinstance with #handler.
Instance Attribute Summary
-
#external_encoding=(value)
writeonly
Setthe encoding for this parser toencoding -
#handler
rw
The handler on which events will be called.
Instance Method Summary
-
#mark(#) ⇒ Parser
Returns a
Markobject that contains line, column, and index information. -
#parse(yaml)
Parse the YAML document contained in
yaml. - #_native_parse(handler, yaml, path) private
- #skip_io_bom(io, bom) private
-
#strip_bom(yaml)
private
libyaml only skips a leading byte order mark when it detects the stream encoding by itself.
Constructor Details
.new(handler = Handler.new) ⇒ Parser
Instance Attribute Details
#external_encoding=(value) (writeonly)
Set the encoding for this parser to encoding
# File 'ext/psych/lib/psych/parser.rb', line 41
attr_writer :external_encoding
#handler (rw)
The handler on which events will be called
# File 'ext/psych/lib/psych/parser.rb', line 38
attr_accessor :handler
Instance Method Details
#_native_parse(handler, yaml, path) (private)
[ GitHub ]# File 'ext/psych/psych_parser.c', line 258
static VALUE parse(VALUE self, VALUE handler, VALUE yaml, VALUE path)
{
yaml_parser_t * parser;
yaml_event_t event;
int done = 0;
int state = 0;
int parser_encoding = YAML_ANY_ENCODING;
int encoding = rb_utf8_encindex();
rb_encoding * internal_enc = rb_default_internal_encoding();
TypedData_Get_Struct(self, yaml_parser_t, &psych_parser_type, parser);
yaml_parser_delete(parser);
yaml_parser_initialize(parser);
if (rb_respond_to(yaml, id_read)) {
yaml = transcode_io(yaml, &parser_encoding);
yaml_parser_set_encoding(parser, parser_encoding);
yaml_parser_set_input(parser, io_reader, (void *)yaml);
} else {
StringValue(yaml);
yaml = transcode_string(yaml, &parser_encoding);
yaml_parser_set_encoding(parser, parser_encoding);
yaml_parser_set_input_string(
parser,
(const unsigned char *)RSTRING_PTR(yaml),
(size_t)RSTRING_LEN(yaml)
);
}
while(!done) {
VALUE event_args[5];
VALUE start_line, start_column, end_line, end_column;
if(parser->error || !yaml_parser_parse(parser, &event)) {
VALUE exception;
exception = make_exception(parser, path);
yaml_parser_delete(parser);
yaml_parser_initialize(parser);
rb_exc_raise(exception);
}
start_line = SIZET2NUM(event.start_mark.line);
start_column = SIZET2NUM(event.start_mark.column);
end_line = SIZET2NUM(event.end_mark.line);
end_column = SIZET2NUM(event.end_mark.column);
event_args[0] = handler;
event_args[1] = start_line;
event_args[2] = start_column;
event_args[3] = end_line;
event_args[4] = end_column;
rb_protect(protected_event_location, (VALUE)event_args, &state);
switch(event.type) {
case YAML_STREAM_START_EVENT:
{
VALUE args[2];
args[0] = handler;
args[1] = INT2NUM(event.data.stream_start.encoding);
rb_protect(protected_start_stream, (VALUE)args, &state);
}
break;
case YAML_DOCUMENT_START_EVENT:
{
VALUE args[4];
/* Get a list of tag directives (if any) */
VALUE tag_directives = rb_ary_new();
/* Grab the document version */
VALUE version = event.data.document_start.version_directive ?
rb_ary_new3(
(long)2,
INT2NUM(event.data.document_start.version_directive->major),
INT2NUM(event.data.document_start.version_directive->minor)
) : rb_ary_new();
if(event.data.document_start.tag_directives.start) {
yaml_tag_directive_t *start =
event.data.document_start.tag_directives.start;
yaml_tag_directive_t *end =
event.data.document_start.tag_directives.end;
for(; start != end; start++) {
VALUE handle = Qnil;
VALUE prefix = Qnil;
if(start->handle) {
handle = rb_str_new2((const char *)start->handle);
PSYCH_TRANSCODE(handle, encoding, internal_enc);
}
if(start->prefix) {
prefix = rb_str_new2((const char *)start->prefix);
PSYCH_TRANSCODE(prefix, encoding, internal_enc);
}
rb_ary_push(tag_directives, rb_ary_new3((long)2, handle, prefix));
}
}
args[0] = handler;
args[1] = version;
args[2] = tag_directives;
args[3] = event.data.document_start.implicit == 1 ? Qtrue : Qfalse;
rb_protect(protected_start_document, (VALUE)args, &state);
}
break;
case YAML_DOCUMENT_END_EVENT:
{
VALUE args[2];
args[0] = handler;
args[1] = event.data.document_end.implicit == 1 ? Qtrue : Qfalse;
rb_protect(protected_end_document, (VALUE)args, &state);
}
break;
case YAML_ALIAS_EVENT:
{
VALUE args[2];
VALUE alias = Qnil;
if(event.data.alias.anchor) {
alias = rb_str_new2((const char *)event.data.alias.anchor);
PSYCH_TRANSCODE(alias, encoding, internal_enc);
}
args[0] = handler;
args[1] = alias;
rb_protect(protected_alias, (VALUE)args, &state);
}
break;
case YAML_SCALAR_EVENT:
{
VALUE args[7];
VALUE anchor = Qnil;
VALUE tag = Qnil;
VALUE plain_implicit, quoted_implicit, style;
VALUE val = rb_str_new(
(const char *)event.data.scalar.value,
(long)event.data.scalar.length
);
PSYCH_TRANSCODE(val, encoding, internal_enc);
if(event.data.scalar.anchor) {
anchor = rb_str_new2((const char *)event.data.scalar.anchor);
PSYCH_TRANSCODE(anchor, encoding, internal_enc);
}
if(event.data.scalar.tag) {
tag = rb_str_new2((const char *)event.data.scalar.tag);
PSYCH_TRANSCODE(tag, encoding, internal_enc);
}
plain_implicit =
event.data.scalar.plain_implicit == 0 ? Qfalse : Qtrue;
quoted_implicit =
event.data.scalar.quoted_implicit == 0 ? Qfalse : Qtrue;
style = INT2NUM(event.data.scalar.style);
args[0] = handler;
args[1] = val;
args[2] = anchor;
args[3] = tag;
args[4] = plain_implicit;
args[5] = quoted_implicit;
args[6] = style;
rb_protect(protected_scalar, (VALUE)args, &state);
}
break;
case YAML_SEQUENCE_START_EVENT:
{
VALUE args[5];
VALUE anchor = Qnil;
VALUE tag = Qnil;
VALUE implicit, style;
if(event.data.sequence_start.anchor) {
anchor = rb_str_new2((const char *)event.data.sequence_start.anchor);
PSYCH_TRANSCODE(anchor, encoding, internal_enc);
}
tag = Qnil;
if(event.data.sequence_start.tag) {
tag = rb_str_new2((const char *)event.data.sequence_start.tag);
PSYCH_TRANSCODE(tag, encoding, internal_enc);
}
implicit =
event.data.sequence_start.implicit == 0 ? Qfalse : Qtrue;
style = INT2NUM(event.data.sequence_start.style);
args[0] = handler;
args[1] = anchor;
args[2] = tag;
args[3] = implicit;
args[4] = style;
rb_protect(protected_start_sequence, (VALUE)args, &state);
}
break;
case YAML_SEQUENCE_END_EVENT:
rb_protect(protected_end_sequence, handler, &state);
break;
case YAML_MAPPING_START_EVENT:
{
VALUE args[5];
VALUE anchor = Qnil;
VALUE tag = Qnil;
VALUE implicit, style;
if(event.data.mapping_start.anchor) {
anchor = rb_str_new2((const char *)event.data.mapping_start.anchor);
PSYCH_TRANSCODE(anchor, encoding, internal_enc);
}
if(event.data.mapping_start.tag) {
tag = rb_str_new2((const char *)event.data.mapping_start.tag);
PSYCH_TRANSCODE(tag, encoding, internal_enc);
}
implicit =
event.data.mapping_start.implicit == 0 ? Qfalse : Qtrue;
style = INT2NUM(event.data.mapping_start.style);
args[0] = handler;
args[1] = anchor;
args[2] = tag;
args[3] = implicit;
args[4] = style;
rb_protect(protected_start_mapping, (VALUE)args, &state);
}
break;
case YAML_MAPPING_END_EVENT:
rb_protect(protected_end_mapping, handler, &state);
break;
case YAML_NO_EVENT:
rb_protect(protected_empty, handler, &state);
break;
case YAML_STREAM_END_EVENT:
rb_protect(protected_end_stream, handler, &state);
done = 1;
break;
}
yaml_event_delete(&event);
if (state) rb_jump_tag(state);
}
return self;
}
#mark(#) ⇒ Parser
Returns a Parser::Mark object that contains line, column, and index
information.
# File 'ext/psych/psych_parser.c', line 518
static VALUE mark(VALUE self)
{
VALUE mark_klass;
VALUE args[3];
yaml_parser_t * parser;
TypedData_Get_Struct(self, yaml_parser_t, &psych_parser_type, parser);
mark_klass = rb_const_get_at(cPsychParser, rb_intern("Mark"));
args[0] = SIZET2NUM(parser->mark.index);
args[1] = SIZET2NUM(parser->mark.line);
args[2] = SIZET2NUM(parser->mark.column);
return rb_class_new_instance(3, args, mark_klass);
}
#parse(yaml)
Parse the YAML document contained in yaml. Events will be called on
the handler set on the parser instance.
See Parser and #handler
# File 'ext/psych/lib/psych/parser.rb', line 61
def parse yaml, path = yaml.respond_to?(:path) ? yaml.path : "<unknown>" _native_parse @handler, strip_bom(yaml), path end
#skip_io_bom(io, bom) (private)
[ GitHub ]# File 'ext/psych/lib/psych/parser.rb', line 94
def skip_io_bom io, bom begin pos = io.pos rescue SystemCallError, IOError return # Not seekable; nothing has been consumed yet. end head = io.read(bom.bytesize) io.seek(pos, IO::SEEK_SET) if head && head.b != bom end
#strip_bom(yaml) (private)
libyaml only skips a leading byte order mark when it detects the stream
encoding by itself. ::Psych passes the encoding explicitly whenever it is
known, and on that path libyaml counts the BOM as a first-line character,
which shifts the column of every token on the first line and silently
terminates a block mapping at the second line [Bug #13615].
# File 'ext/psych/lib/psych/parser.rb', line 81
def strip_bom yaml if String === yaml bom = BOM[yaml.encoding] # delete_prefix copies even when there is no prefix, so keep the guard. return yaml.delete_prefix(bom) if bom && yaml.start_with?(bom) elsif yaml.respond_to?(:read) && yaml.respond_to?(:external_encoding) && yaml.respond_to?(:pos) && yaml.respond_to?(:seek) bom = BOM[yaml.external_encoding] skip_io_bom yaml, bom.b if bom end yaml end