123456789_123456789_123456789_123456789_123456789_

Class: Psych::Parser

Relationships & Source Files
Namespace Children
Classes:
Inherits: Object
Defined in: ext/psych/lib/psych/parser.rb,
ext/psych/psych_parser.c

Overview

YAML event parser class. This class parses a YAML document and calls events on the handler that is passed to the constructor. The events can be used for things such as constructing a YAML AST or deserializing YAML documents. It can even be fed back to Emitter to emit the same document that was parsed.

See Handler for documentation on the events that Parser emits.

Here is an example that prints out ever scalar found in a YAML document:

# Handler for detecting scalar values
class ScalarHandler < Psych::Handler
  def scalar value, anchor, tag, plain, quoted, style
    puts value
  end
end

parser = Psych::Parser.new(ScalarHandler.new)
parser.parse(yaml_document)

Here is an example that feeds the parser back in to Emitter. The YAML document is read from STDIN and written back out to STDERR:

parser = Psych::Parser.new(Psych::Emitter.new($stderr))
parser.parse($stdin)

::Psych uses Parser in combination with TreeBuilder to construct an AST of the parsed YAML document.

Constant Summary

Class Method Summary

Instance Attribute Summary

Instance Method Summary

Constructor Details

.new(handler = Handler.new) ⇒ Parser

Creates a new Parser instance with #handler. YAML events will be called on #handler. See Parser for more details.

[ GitHub ]

  
# File 'ext/psych/lib/psych/parser.rb', line 47

def initialize handler = Handler.new
  @handler = handler
  @external_encoding = ANY
end

Instance Attribute Details

#external_encoding=(value) (writeonly)

Set the encoding for this parser to encoding

[ GitHub ]

  
# File 'ext/psych/lib/psych/parser.rb', line 41

attr_writer :external_encoding

#handler (rw)

The handler on which events will be called

[ GitHub ]

  
# File 'ext/psych/lib/psych/parser.rb', line 38

attr_accessor :handler

Instance Method Details

#mark(#) ⇒ Parser

Returns a Parser::Mark object that contains line, column, and index information.

[ GitHub ]

  
# File 'ext/psych/psych_parser.c', line 522

static VALUE mark(VALUE self)
{
    VALUE mark_klass;
    VALUE args[3];
    yaml_parser_t * parser;

    TypedData_Get_Struct(self, yaml_parser_t, &psych_parser_type, parser);
    mark_klass = rb_const_get_at(cPsychParser, rb_intern("Mark"));
    args[0] = INT2NUM(parser->mark.index);
    args[1] = INT2NUM(parser->mark.line);
    args[2] = INT2NUM(parser->mark.column);

    return rb_class_new_instance(3, args, mark_klass);
}

#parse(yaml)

Parse the YAML document contained in yaml. Events will be called on the handler set on the parser instance.

See Parser and #handler

[ GitHub ]

  
# File 'ext/psych/psych_parser.c', line 253

static VALUE parse(int argc, VALUE *argv, VALUE self)
{
    VALUE yaml, path;
    yaml_parser_t * parser;
    yaml_event_t event;
    int done = 0;
    int state = 0;
    int parser_encoding = YAML_ANY_ENCODING;
    int encoding = rb_utf8_encindex();
    rb_encoding * internal_enc = rb_default_internal_encoding();
    VALUE handler = rb_iv_get(self, "@handler");

    if (rb_scan_args(argc, argv, "11", &yaml, &path) == 1) {
	if(rb_respond_to(yaml, id_path))
	    path = rb_funcall(yaml, id_path, 0);
	else
	    path = rb_str_new2("<unknown>");
    }

    TypedData_Get_Struct(self, yaml_parser_t, &psych_parser_type, parser);

    yaml_parser_delete(parser);
    yaml_parser_initialize(parser);

    if (rb_respond_to(yaml, id_read)) {
	yaml = transcode_io(yaml, &parser_encoding);
	yaml_parser_set_encoding(parser, parser_encoding);
	yaml_parser_set_input(parser, io_reader, (void *)yaml);
    } else {
	StringValue(yaml);
	yaml = transcode_string(yaml, &parser_encoding);
	yaml_parser_set_encoding(parser, parser_encoding);
	yaml_parser_set_input_string(
		parser,
		(const unsigned char *)RSTRING_PTR(yaml),
		(size_t)RSTRING_LEN(yaml)
		);
    }

    while(!done) {
	VALUE event_args[5];
	VALUE start_line, start_column, end_line, end_column;

	if(!yaml_parser_parse(parser, &event)) {
	    VALUE exception;

	    exception = make_exception(parser, path);
	    yaml_parser_delete(parser);
	    yaml_parser_initialize(parser);

	    rb_exc_raise(exception);
	}

	start_line = INT2NUM((long)event.start_mark.line);
	start_column = INT2NUM((long)event.start_mark.column);
	end_line = INT2NUM((long)event.end_mark.line);
	end_column = INT2NUM((long)event.end_mark.column);

	event_args[0] = handler;
	event_args[1] = start_line;
	event_args[2] = start_column;
	event_args[3] = end_line;
	event_args[4] = end_column;
	rb_protect(protected_event_location, (VALUE)event_args, &state);

	switch(event.type) {
	    case YAML_STREAM_START_EVENT:
	      {
		  VALUE args[2];

		  args[0] = handler;
		  args[1] = INT2NUM((long)event.data.stream_start.encoding);
		  rb_protect(protected_start_stream, (VALUE)args, &state);
	      }
	      break;
	  case YAML_DOCUMENT_START_EVENT:
	    {
		VALUE args[4];
		/* Get a list of tag directives (if any) */
		VALUE tag_directives = rb_ary_new();
		/* Grab the document version */
		VALUE version = event.data.document_start.version_directive ?
		    rb_ary_new3(
			(long)2,
			INT2NUM((long)event.data.document_start.version_directive->major),
			INT2NUM((long)event.data.document_start.version_directive->minor)
			) : rb_ary_new();

		if(event.data.document_start.tag_directives.start) {
		    yaml_tag_directive_t *start =
			event.data.document_start.tag_directives.start;
		    yaml_tag_directive_t *end =
			event.data.document_start.tag_directives.end;
		    for(; start != end; start++) {
			VALUE handle = Qnil;
			VALUE prefix = Qnil;
			if(start->handle) {
			    handle = rb_str_new2((const char *)start->handle);
			    PSYCH_TRANSCODE(handle, encoding, internal_enc);
			}

			if(start->prefix) {
			    prefix = rb_str_new2((const char *)start->prefix);
			    PSYCH_TRANSCODE(prefix, encoding, internal_enc);
			}

			rb_ary_push(tag_directives, rb_ary_new3((long)2, handle, prefix));
		    }
		}
		args[0] = handler;
		args[1] = version;
		args[2] = tag_directives;
		args[3] = event.data.document_start.implicit == 1 ? Qtrue : Qfalse;
		rb_protect(protected_start_document, (VALUE)args, &state);
	    }
	    break;
	  case YAML_DOCUMENT_END_EVENT:
	    {
		VALUE args[2];

		args[0] = handler;
		args[1] = event.data.document_end.implicit == 1 ? Qtrue : Qfalse;
		rb_protect(protected_end_document, (VALUE)args, &state);
	    }
	    break;
	  case YAML_ALIAS_EVENT:
	    {
		VALUE args[2];
		VALUE alias = Qnil;
		if(event.data.alias.anchor) {
		    alias = rb_str_new2((const char *)event.data.alias.anchor);
		    PSYCH_TRANSCODE(alias, encoding, internal_enc);
		}

		args[0] = handler;
		args[1] = alias;
		rb_protect(protected_alias, (VALUE)args, &state);
	    }
	    break;
	  case YAML_SCALAR_EVENT:
	    {
		VALUE args[7];
		VALUE anchor = Qnil;
		VALUE tag = Qnil;
		VALUE plain_implicit, quoted_implicit, style;
		VALUE val = rb_str_new(
		    (const char *)event.data.scalar.value,
		    (long)event.data.scalar.length
		    );

		PSYCH_TRANSCODE(val, encoding, internal_enc);

		if(event.data.scalar.anchor) {
		    anchor = rb_str_new2((const char *)event.data.scalar.anchor);
		    PSYCH_TRANSCODE(anchor, encoding, internal_enc);
		}

		if(event.data.scalar.tag) {
		    tag = rb_str_new2((const char *)event.data.scalar.tag);
		    PSYCH_TRANSCODE(tag, encoding, internal_enc);
		}

		plain_implicit =
		    event.data.scalar.plain_implicit == 0 ? Qfalse : Qtrue;

		quoted_implicit =
		    event.data.scalar.quoted_implicit == 0 ? Qfalse : Qtrue;

		style = INT2NUM((long)event.data.scalar.style);

		args[0] = handler;
		args[1] = val;
		args[2] = anchor;
		args[3] = tag;
		args[4] = plain_implicit;
		args[5] = quoted_implicit;
		args[6] = style;
		rb_protect(protected_scalar, (VALUE)args, &state);
	    }
	    break;
	  case YAML_SEQUENCE_START_EVENT:
	    {
		VALUE args[5];
		VALUE anchor = Qnil;
		VALUE tag = Qnil;
		VALUE implicit, style;
		if(event.data.sequence_start.anchor) {
		    anchor = rb_str_new2((const char *)event.data.sequence_start.anchor);
		    PSYCH_TRANSCODE(anchor, encoding, internal_enc);
		}

		tag = Qnil;
		if(event.data.sequence_start.tag) {
		    tag = rb_str_new2((const char *)event.data.sequence_start.tag);
		    PSYCH_TRANSCODE(tag, encoding, internal_enc);
		}

		implicit =
		    event.data.sequence_start.implicit == 0 ? Qfalse : Qtrue;

		style = INT2NUM((long)event.data.sequence_start.style);

		args[0] = handler;
		args[1] = anchor;
		args[2] = tag;
		args[3] = implicit;
		args[4] = style;

		rb_protect(protected_start_sequence, (VALUE)args, &state);
	    }
	    break;
	  case YAML_SEQUENCE_END_EVENT:
	    rb_protect(protected_end_sequence, handler, &state);
	    break;
	  case YAML_MAPPING_START_EVENT:
	    {
		VALUE args[5];
		VALUE anchor = Qnil;
		VALUE tag = Qnil;
		VALUE implicit, style;
		if(event.data.mapping_start.anchor) {
		    anchor = rb_str_new2((const char *)event.data.mapping_start.anchor);
		    PSYCH_TRANSCODE(anchor, encoding, internal_enc);
		}

		if(event.data.mapping_start.tag) {
		    tag = rb_str_new2((const char *)event.data.mapping_start.tag);
		    PSYCH_TRANSCODE(tag, encoding, internal_enc);
		}

		implicit =
		    event.data.mapping_start.implicit == 0 ? Qfalse : Qtrue;

		style = INT2NUM((long)event.data.mapping_start.style);

		args[0] = handler;
		args[1] = anchor;
		args[2] = tag;
		args[3] = implicit;
		args[4] = style;

		rb_protect(protected_start_mapping, (VALUE)args, &state);
	    }
	    break;
	  case YAML_MAPPING_END_EVENT:
	    rb_protect(protected_end_mapping, handler, &state);
	    break;
	  case YAML_NO_EVENT:
	    rb_protect(protected_empty, handler, &state);
	    break;
	  case YAML_STREAM_END_EVENT:
	    rb_protect(protected_end_stream, handler, &state);
	    done = 1;
	    break;
	}
	yaml_event_delete(&event);
	if (state) rb_jump_tag(state);
    }

    return self;
}