Class: JSON::ResumableParser
| Relationships & Source Files | |
| Inherits: | Object |
| Defined in: | ext/json/parser/parser.c |
Class Method Summary
-
.new(opts) ⇒ {}
constructor
private
Creates a new
ResumableParserinstance.
Instance Attribute Summary
-
#eos? ⇒ Boolean
readonly
Returns whether the internal buffer has been entirely consumed.
-
#value ⇒ Object
readonly
Returns and consume the last parsed value.
-
#value? ⇒ Boolean
readonly
Returns whether a parsed value is available.
Instance Method Summary
-
#<<(string) ⇒ self
Appends the given string to the parser's buffer.
-
#clear ⇒ self
Entirely reset the parser state and buffer.
-
#parse ⇒ Boolean
Attemps to parse a
::JSONdocument from the internal buffer. -
#parsed_bytes ⇒ Integer
Returns the number of bytes parsed since the start of the current partial value.
-
#partial_value ⇒ Object
Returns the Ruby objects parsed up to this point:
-
#rest ⇒ String
Returns a string containing what remains to be parsed in the buffer.
Constructor Details
.new(opts) ⇒ {} (private)
Creates a new ResumableParser instance.
Argument opts, if given, contains a Hash of options for the parsing.
See Parsing Options.
A ResumableParser is able to parse partial documents and resume parsing later when more of the document is provided:
parser = JSON::ResumableParser.new
parser << '{"user": "george", "role": "ad'
parser.parse # => false
parser.eos? # => true
parser.partial_value # => { "user" => "george", "role" => nil }
parser.rest # => '"ad'
parser << 'min" }[1, 2, 3]'
parser.parse # => true
parser.value # => { "user" => "george", "role" => "admin" }
parser.parse # => true
parser.value # => [1, 2, 3]
Limitations
While ResumableParser is able to parse streams of documents without any
explicit separators between them, it is highly recommended to separate documents
by either spaces or newlines, as otherwise the JSON syntax for numbers may be ambiguous.
When parsing a number, ResumableParser will not consider the number complete until something follows:
parser << '123'
parser.parse # => false
parser << ' '
parser.parse # => true
parser.value # => 123
Security
An incomplete document is buffered in full and there is no size limit, so when reading from an untrusted source the caller is responsible for bounding how much data is fed. For example:
loop do if parser.parsed_bytes > DOCUMENT_MAX_SIZE raise "document too large" end
parser << read_chunk
while parser.parse
process(parser.value)
end
end
# File 'ext/json/parser/parser.c', line 2352
static VALUE cResumableParser_initialize(int argc, VALUE *argv, VALUE self)
{
rb_check_frozen(self);
VALUE opts = Qfalse;
rb_scan_args_kw(RB_SCAN_ARGS_LAST_HASH_KEYWORDS, argc, argv, "0:", &opts);
JSON_ResumableParser *parser = cResumableParser_get(self);
opts = argc > 0 ? argv[0] : Qnil;
parser_config_init(&parser->config, opts, self, true);
return self;
}
Instance Attribute Details
#eos? ⇒ Boolean (readonly)
Returns whether the internal buffer has been entirely consumed.
# File 'ext/json/parser/parser.c', line 2734
static VALUE cResumableParser_eos_p(VALUE self)
{
JSON_ResumableParser *parser = cResumableParser_get(self);
return eos(&parser->state) ? Qtrue : Qfalse;
}
#value ⇒ Object (readonly)
# File 'ext/json/parser/parser.c', line 2571
static VALUE cResumableParser_value(VALUE self)
{
JSON_ResumableParser *parser = ResumableParser_acquire(self, false);
if (parser->frames.head > 0) {
json_frame *frame = json_frame_stack_peek(&parser->frames);
if (frame->phase == JSON_PHASE_DONE) {
VALUE result = *rvalue_stack_peek(parser->state.value_stack, 1);
rvalue_stack_pop(parser->state.value_stack, 1);
json_frame_stack_pop(parser->state.frames);
return result;
}
}
rb_raise(rb_eArgError, "no ready value");
}
#value? ⇒ Boolean (readonly)
Returns whether a parsed value is available.
# File 'ext/json/parser/parser.c', line 2547
static VALUE cResumableParser_value_p(VALUE self)
{
JSON_ResumableParser *parser = ResumableParser_acquire(self, false);
if (parser->value_stack.head > 0) {
json_frame *frame = json_frame_stack_peek(&parser->frames);
if (frame->phase == JSON_PHASE_DONE) {
return Qtrue;
}
}
return Qfalse;
}
Instance Method Details
#<<(string) ⇒ self
Appends the given string to the parser's buffer.
# File 'ext/json/parser/parser.c', line 2373
static VALUE cResumableParser_feed(VALUE self, VALUE str)
{
rb_check_frozen(self);
JSON_ResumableParser *parser = ResumableParser_acquire(self, false);
str = convert_encoding(str);
if (!RSTRING_LEN(str)) {
return self;
}
size_t offset = parser->state.cursor - parser->state.start;
const size_t remaining = parser->state.end - parser->state.cursor;
if (!remaining) {
if (parser->buffer) {
json_str_clear(parser->buffer);
}
parser->buffer = RB_OBJ_FROZEN_RAW(str) ? str : rb_obj_hide(rb_str_new_shared(str));
offset = 0;
} else {
JSON_ASSERT(parser->buffer);
const size_t size = parser->state.end - parser->state.start;
const size_t consumed = size - remaining;
if (RB_OBJ_FROZEN_RAW(parser->buffer)) {
VALUE new_buffer = rb_obj_hide(rb_str_buf_new(remaining + RSTRING_LEN(str)));
rb_enc_associate_index(new_buffer, utf8_encindex);
char *old_ptr = RSTRING_PTR(parser->buffer);
memcpy(RSTRING_PTR(new_buffer), old_ptr + consumed, remaining);
rb_str_set_len(new_buffer, remaining);
offset = 0;
parser->buffer = new_buffer;
} else if (consumed > (size / 2) && size >= 512) {
rb_str_modify(parser->buffer);
char *old_ptr = RSTRING_PTR(parser->buffer);
memmove(old_ptr, old_ptr + consumed, remaining);
rb_str_set_len(parser->buffer, remaining);
offset = 0;
}
rb_str_append(parser->buffer, str);
}
long len;
const char *start;
RSTRING_GETMEM(parser->buffer, start, len);
parser->state.start = start;
parser->state.end = start + len;
parser->state.cursor = parser->state.start + offset;
return self;
}
#clear ⇒ self
Entirely reset the parser state and buffer.
# File 'ext/json/parser/parser.c', line 2593
static VALUE cResumableParser_clear(VALUE self)
{
JSON_ResumableParser *parser = ResumableParser_acquire(self, false);
parser->buffer = 0;
parser->complete = true;
parser->parsed_bytes = 0;
parser->incomplete_bytes = 0;
parser->frames.head = 0;
parser->value_stack.head = 0;
parser->state.name_cache.length = 0;
parser->state.current_nesting = 0;
parser->state.in_array = 1;
parser->state.emitted_deprecations = 0;
parser->state.start = parser->state.cursor = parser->state.end = NULL;
return self;
}
#parse ⇒ Boolean
Attemps to parse a ::JSON document from the internal buffer.
Returns whether a complete document could be parsed.
It does raise ParserError when encountering invalid JSON syntax.
The parsed object can be retrieved by calling #value
# File 'ext/json/parser/parser.c', line 2478
static VALUE cResumableParser_parse(VALUE self)
{
JSON_ResumableParser *parser = ResumableParser_acquire(self, true);
if (parser->complete) {
parser->parsed_bytes = 0;
parser->incomplete_bytes = 0;
parser->complete = false;
}
if (!parser->buffer) {
parser->in_use = false;
return Qfalse;
}
if (parser->frames.head == 0) {
json_frame_stack_push(&parser->state, (json_frame){
.type = JSON_FRAME_ROOT,
.phase = JSON_PHASE_VALUE,
});
}
VALUE Vsource = parser->buffer; // Prevent compaction
json_frame *frame = json_frame_stack_peek(&parser->frames);
if (frame->phase == JSON_PHASE_DONE) {
JSON_ASSERT(parser->value_stack.head == 1);
JSON_ASSERT(parser->frames.head == 1);
frame->phase = JSON_PHASE_VALUE;
rvalue_stack_pop(parser->state.value_stack, 1);
}
struct json_parse_any_args args = {
.state = &parser->state,
.config = &parser->config,
.parser = self,
};
int status;
const char *initial_cursor = parser->state.cursor;
parser->complete = rb_protect(json_parse_any_resumable_safe, (VALUE)&args, &status);
if (status) {
parser->complete = true; // a parse error is considered complete
}
parser->parsed_bytes += parser->state.cursor - initial_cursor;
parser->incomplete_bytes = parser->complete ? 0 : parser->state.end - parser->state.cursor;
json_eat_whitespace(&parser->state, &parser->config, false);
if (eos(&parser->state)) {
json_str_clear(parser->buffer);
parser->buffer = Qfalse;
}
parser->in_use = false;
if (status) {
rb_jump_tag(status); // reraise
}
RB_GC_GUARD(Vsource);
return parser->complete ? Qtrue : Qfalse;
}
#parsed_bytes ⇒ Integer
# File 'ext/json/parser/parser.c', line 2757
static VALUE cResumableParser_parsed_bytes(VALUE self)
{
JSON_ResumableParser *parser = cResumableParser_get(self);
return ULL2NUM(parser->parsed_bytes + parser->incomplete_bytes);
}
#partial_value ⇒ Object
# File 'ext/json/parser/parser.c', line 2693
static VALUE cResumableParser_partial_value(VALUE self)
{
JSON_ResumableParser *parser = ResumableParser_acquire(self, true);
int status;
VALUE result = rb_protect(cResumableParser_partial_value_body, self, &status);
parser->in_use = false;
if (status) {
rb_jump_tag(status);
}
return result;
}
#rest ⇒ String
Returns a string containing what remains to be parsed in the buffer parser << '{ "message": "unterminated message' parser.parse # => false parser.rest # => '"unterminated message"'
# File 'ext/json/parser/parser.c', line 2714
static VALUE cResumableParser_rest(VALUE self)
{
JSON_ResumableParser *parser = cResumableParser_get(self);
if (!parser->buffer) {
return rb_utf8_str_new("", 0);
}
size_t offset = parser->state.cursor - parser->state.start;
const char *ptr;
long len;
RSTRING_GETMEM(parser->buffer, ptr, len);
return rb_utf8_str_new(ptr + offset, len - offset);
}