Class: CSV::Parser::InputsScanner
Relationships & Source Files | |
Inherits: | Object |
Defined in: | lib/csv/parser.rb |
Overview
CSV::InputsScanner
receives IO inputs, encoding and the chunk_size. It also controls the life cycle of the object with its methods #keep_start, #keep_end, #keep_back, #keep_drop.
CSV::InputsScanner
.scan() tries to match with pattern at the current position. If there’s a match, the scanner advances the “scan pointer” and returns the matched string. Otherwise, the scanner returns nil.
CSV::InputsScanner
.rest() returns the “rest” of the string (i.e. everything after the scan pointer). If there is no more data (eos? = true), it returns “”.
Class Method Summary
Instance Attribute Summary
- #eos? ⇒ Boolean readonly
Instance Method Summary
Constructor Details
.new(inputs, encoding, chunk_size: 8192) ⇒ InputsScanner
# File 'lib/csv/parser.rb', line 87
def initialize(inputs, encoding, chunk_size: 8192) @inputs = inputs.dup @encoding = encoding @chunk_size = chunk_size @last_scanner = @inputs.empty? @keeps = [] read_chunk end
Instance Attribute Details
#eos? ⇒ Boolean
(readonly)
[ GitHub ]
# File 'lib/csv/parser.rb', line 163
def eos? @scanner.eos? end
Instance Method Details
#each_line(row_separator) {|buffer| ... }
# File 'lib/csv/parser.rb', line 96
def each_line(row_separator) buffer = nil input = @scanner.rest position = @scanner.pos offset = 0 n_row_separator_chars = row_separator.size while true input.each_line(row_separator) do |line| @scanner.pos += line.bytesize if buffer if n_row_separator_chars == 2 and buffer.end_with?(row_separator[0]) and line.start_with?(row_separator[1]) buffer << line[0] line = line[1..-1] position += buffer.bytesize + offset @scanner.pos = position offset = 0 yield(buffer) buffer = nil next if line.empty? else buffer << line line = buffer buffer = nil end end if line.end_with?(row_separator) position += line.bytesize + offset @scanner.pos = position offset = 0 yield(line) else buffer = line end end break unless read_chunk input = @scanner.rest position = @scanner.pos offset = -buffer.bytesize if buffer end yield(buffer) if buffer end
#keep_back
[ GitHub ]# File 'lib/csv/parser.rb', line 181
def keep_back start, buffer = @keeps.pop if buffer string = @scanner.string keep = string.byteslice(start, string.bytesize - start) if keep and not keep.empty? @inputs.unshift(StringIO.new(keep)) @last_scanner = false end @scanner = StringScanner.new(buffer) else @scanner.pos = start end read_chunk if @scanner.eos? end
#keep_drop
[ GitHub ]# File 'lib/csv/parser.rb', line 197
def keep_drop @keeps.pop end
#keep_end
[ GitHub ]# File 'lib/csv/parser.rb', line 171
def keep_end start, buffer = @keeps.pop keep = @scanner.string.byteslice(start, @scanner.pos - start) if buffer buffer << keep keep = buffer end keep end
#keep_start
[ GitHub ]# File 'lib/csv/parser.rb', line 167
def keep_start @keeps.push([@scanner.pos, nil]) end
#read_chunk (private)
[ GitHub ]# File 'lib/csv/parser.rb', line 206
def read_chunk return false if @last_scanner unless @keeps.empty? keep = @keeps.last keep_start = keep[0] string = @scanner.string keep_data = string.byteslice(keep_start, @scanner.pos - keep_start) if keep_data keep_buffer = keep[1] if keep_buffer keep_buffer << keep_data else keep[1] = keep_data.dup end end keep[0] = 0 end input = @inputs.first case input when StringIO string = input.read raise InvalidEncoding unless string.valid_encoding? @scanner = StringScanner.new(string) @inputs.shift @last_scanner = @inputs.empty? true else chunk = input.gets(nil, @chunk_size) if chunk raise InvalidEncoding unless chunk.valid_encoding? @scanner = StringScanner.new(chunk) if input.respond_to?(:eof?) and input.eof? @inputs.shift @last_scanner = @inputs.empty? end true else @scanner = StringScanner.new("".encode(@encoding)) @inputs.shift @last_scanner = @inputs.empty? if @last_scanner false else read_chunk end end end end
#rest
[ GitHub ]# File 'lib/csv/parser.rb', line 201
def rest @scanner.rest end
#scan(pattern)
[ GitHub ]# File 'lib/csv/parser.rb', line 140
def scan(pattern) value = @scanner.scan(pattern) return value if @last_scanner if value read_chunk if @scanner.eos? return value else nil end end
#scan_all(pattern)
[ GitHub ]# File 'lib/csv/parser.rb', line 152
def scan_all(pattern) value = @scanner.scan(pattern) return value if @last_scanner return nil if value.nil? while @scanner.eos? and read_chunk and (sub_value = @scanner.scan(pattern)) value << sub_value end value end