123456789_123456789_123456789_123456789_123456789_

Class: JSON::ResumableParser

Relationships & Source Files
Inherits: Object
Defined in: ext/json/parser/parser.c

Class Method Summary

Instance Attribute Summary

Instance Method Summary

Constructor Details

.new(opts) ⇒ {} (private)

Creates a new ResumableParser instance.

Argument opts, if given, contains a Hash of options for the parsing. See Parsing Options.

A ResumableParser is able to parse partial documents and resume parsing later when more of the document is provided:

parser = JSON::ResumableParser.new
parser << '{"user": "george", "role": "ad'
parser.parse # => false
parser.eos? # => true
parser.partial_value # => { "user" => "george", "role" => nil }
parser.rest # => '"ad'

parser << 'min" }[1, 2, 3]'
parser.parse # => true
parser.value # => { "user" => "george", "role" => "admin" }

parser.parse # => true
parser.value # => [1, 2, 3]

Limitations

While ResumableParser is able to parse streams of documents without any explicit separators between them, it is highly recommended to separate documents by either spaces or newlines, as otherwise the JSON syntax for numbers may be ambiguous. When parsing a number, ResumableParser will not consider the number complete until something follows:

parser << '123'
parser.parse # => false
parser << ' '
parser.parse # => true
parser.value # => 123

Security

An incomplete document is buffered in full and there is no size limit, so when reading from an untrusted source the caller is responsible for bounding how much data is fed. For example:

loop do if parser.parsed_bytes > DOCUMENT_MAX_SIZE raise "document too large" end

parser << read_chunk
while parser.parse
 process(parser.value)
end

end

[ GitHub ]

  
# File 'ext/json/parser/parser.c', line 2352

static VALUE cResumableParser_initialize(int argc, VALUE *argv, VALUE self)
{
    rb_check_frozen(self);

    VALUE opts = Qfalse;
    rb_scan_args_kw(RB_SCAN_ARGS_LAST_HASH_KEYWORDS, argc, argv, "0:", &opts);
    JSON_ResumableParser *parser = cResumableParser_get(self);

    opts = argc > 0 ? argv[0] : Qnil;
    parser_config_init(&parser->config, opts, self, true);

    return self;
}

Instance Attribute Details

#eos?Boolean (readonly)

Returns whether the internal buffer has been entirely consumed.

[ GitHub ]

  
# File 'ext/json/parser/parser.c', line 2734

static VALUE cResumableParser_eos_p(VALUE self)
{
    JSON_ResumableParser *parser = cResumableParser_get(self);
    return eos(&parser->state) ? Qtrue : Qfalse;
}

#valueObject (readonly)

Returns and consume the last parsed value. Raises ArgumentError if there is no parsed value or if it was already retrieved:

parser << '[1][2]'
parser.value # ArgumentError no ready value
parser.parse # => true
parser.value # => [1]
parser.value # ArgumentError no ready value
[ GitHub ]

  
# File 'ext/json/parser/parser.c', line 2571

static VALUE cResumableParser_value(VALUE self)
{
    JSON_ResumableParser *parser = ResumableParser_acquire(self, false);

    if (parser->frames.head > 0) {
        json_frame *frame = json_frame_stack_peek(&parser->frames);

        if (frame->phase == JSON_PHASE_DONE) {
            VALUE result = *rvalue_stack_peek(parser->state.value_stack, 1);
            rvalue_stack_pop(parser->state.value_stack, 1);
            json_frame_stack_pop(parser->state.frames);
            return result;
        }
    }
    rb_raise(rb_eArgError, "no ready value");
}

#value?Boolean (readonly)

Returns whether a parsed value is available.

[ GitHub ]

  
# File 'ext/json/parser/parser.c', line 2547

static VALUE cResumableParser_value_p(VALUE self)
{
    JSON_ResumableParser *parser = ResumableParser_acquire(self, false);

    if (parser->value_stack.head > 0) {
        json_frame *frame = json_frame_stack_peek(&parser->frames);
        if (frame->phase == JSON_PHASE_DONE) {
            return Qtrue;
        }
    }
    return Qfalse;
}

Instance Method Details

#<<(string) ⇒ self

Appends the given string to the parser's buffer.

[ GitHub ]

  
# File 'ext/json/parser/parser.c', line 2373

static VALUE cResumableParser_feed(VALUE self, VALUE str)
{
    rb_check_frozen(self);

    JSON_ResumableParser *parser = ResumableParser_acquire(self, false);

    str = convert_encoding(str);
    if (!RSTRING_LEN(str)) {
        return self;
    }

    size_t offset = parser->state.cursor - parser->state.start;
    const size_t remaining = parser->state.end - parser->state.cursor;

    if (!remaining) {
        if (parser->buffer) {
            json_str_clear(parser->buffer);
        }
        parser->buffer = RB_OBJ_FROZEN_RAW(str) ? str : rb_obj_hide(rb_str_new_shared(str));
        offset = 0;
    } else {
        JSON_ASSERT(parser->buffer);

        const size_t size = parser->state.end - parser->state.start;
        const size_t consumed = size - remaining;

        if (RB_OBJ_FROZEN_RAW(parser->buffer)) {
            VALUE new_buffer = rb_obj_hide(rb_str_buf_new(remaining + RSTRING_LEN(str)));
            rb_enc_associate_index(new_buffer, utf8_encindex);

            char *old_ptr = RSTRING_PTR(parser->buffer);
            memcpy(RSTRING_PTR(new_buffer), old_ptr + consumed, remaining);
            rb_str_set_len(new_buffer, remaining);
            offset = 0;
            parser->buffer = new_buffer;
        } else if (consumed > (size / 2) && size >= 512) {
            rb_str_modify(parser->buffer);
            char *old_ptr = RSTRING_PTR(parser->buffer);
            memmove(old_ptr, old_ptr + consumed, remaining);
            rb_str_set_len(parser->buffer, remaining);
            offset = 0;
        }
        rb_str_append(parser->buffer, str);
    }

    long len;
    const char *start;
    RSTRING_GETMEM(parser->buffer, start, len);
    parser->state.start = start;
    parser->state.end = start + len;
    parser->state.cursor = parser->state.start + offset;

    return self;
}

#clearself

Entirely reset the parser state and buffer.

[ GitHub ]

  
# File 'ext/json/parser/parser.c', line 2593

static VALUE cResumableParser_clear(VALUE self)
{
    JSON_ResumableParser *parser = ResumableParser_acquire(self, false);
    parser->buffer = 0;
    parser->complete = true;
    parser->parsed_bytes = 0;
    parser->incomplete_bytes = 0;
    parser->frames.head = 0;
    parser->value_stack.head = 0;
    parser->state.name_cache.length = 0;
    parser->state.current_nesting = 0;
    parser->state.in_array = 1;
    parser->state.emitted_deprecations = 0;
    parser->state.start = parser->state.cursor = parser->state.end = NULL;
    return self;
}

#parseBoolean

Attemps to parse a ::JSON document from the internal buffer. Returns whether a complete document could be parsed.

It does raise ParserError when encountering invalid JSON syntax.

The parsed object can be retrieved by calling #value

[ GitHub ]

  
# File 'ext/json/parser/parser.c', line 2478

static VALUE cResumableParser_parse(VALUE self)
{
    JSON_ResumableParser *parser = ResumableParser_acquire(self, true);

    if (parser->complete) {
        parser->parsed_bytes = 0;
        parser->incomplete_bytes = 0;
        parser->complete = false;
    }

    if (!parser->buffer) {
        parser->in_use = false;
        return Qfalse;
    }

    if (parser->frames.head == 0) {
        json_frame_stack_push(&parser->state, (json_frame){
            .type = JSON_FRAME_ROOT,
            .phase = JSON_PHASE_VALUE,
        });
    }

    VALUE Vsource = parser->buffer; // Prevent compaction

    json_frame *frame = json_frame_stack_peek(&parser->frames);

    if (frame->phase == JSON_PHASE_DONE) {
        JSON_ASSERT(parser->value_stack.head == 1);
        JSON_ASSERT(parser->frames.head == 1);

        frame->phase = JSON_PHASE_VALUE;
        rvalue_stack_pop(parser->state.value_stack, 1);
    }

    struct json_parse_any_args args = {
        .state = &parser->state,
        .config = &parser->config,
        .parser = self,
    };
    int status;
    const char *initial_cursor = parser->state.cursor;
    parser->complete = rb_protect(json_parse_any_resumable_safe, (VALUE)&args, &status);

    if (status) {
        parser->complete = true; // a parse error is considered complete
    }

    parser->parsed_bytes += parser->state.cursor - initial_cursor;
    parser->incomplete_bytes = parser->complete ? 0 : parser->state.end - parser->state.cursor;

    json_eat_whitespace(&parser->state, &parser->config, false);
    if (eos(&parser->state)) {
        json_str_clear(parser->buffer);
        parser->buffer = Qfalse;
    }
    parser->in_use = false;

    if (status) {
        rb_jump_tag(status); // reraise
    }
    RB_GC_GUARD(Vsource);
    return parser->complete ? Qtrue : Qfalse;
}

#parsed_bytesInteger

Returns the number of bytes parsed since the start of the current partial value. This is intended to be used for securing against untrusted input:

loop do if parser.parsed_bytes > DOCUMENT_MAX_SIZE raise "document too large" end

parser << read_chunk
while parser.parse
 process(parser.value)
end

end

[ GitHub ]

  
# File 'ext/json/parser/parser.c', line 2757

static VALUE cResumableParser_parsed_bytes(VALUE self)
{
    JSON_ResumableParser *parser = cResumableParser_get(self);
    return ULL2NUM(parser->parsed_bytes + parser->incomplete_bytes);
}

#partial_valueObject

Returns the Ruby objects parsed up to this point:

parser << '[1, [2, 3,'
parser.parse # => false
parser.value # ArgumentError no ready value
parser.partial_value # => [1, [2, 3]]
[ GitHub ]

  
# File 'ext/json/parser/parser.c', line 2693

static VALUE cResumableParser_partial_value(VALUE self)
{
    JSON_ResumableParser *parser = ResumableParser_acquire(self, true);

    int status;
    VALUE result = rb_protect(cResumableParser_partial_value_body, self, &status);
    parser->in_use = false;
    if (status) {
        rb_jump_tag(status);
    }
    return result;
}

#restString

Returns a string containing what remains to be parsed in the buffer parser << '{ "message": "unterminated message' parser.parse # => false parser.rest # => '"unterminated message"'

[ GitHub ]

  
# File 'ext/json/parser/parser.c', line 2714

static VALUE cResumableParser_rest(VALUE self)
{
    JSON_ResumableParser *parser = cResumableParser_get(self);

    if (!parser->buffer) {
        return rb_utf8_str_new("", 0);
    }

    size_t offset = parser->state.cursor - parser->state.start;
    const char *ptr;
    long len;
    RSTRING_GETMEM(parser->buffer, ptr, len);
    return rb_utf8_str_new(ptr + offset, len - offset);
}