Class: CSV::Parser::InputsScanner
Relationships & Source Files | |
Inherits: | Object |
Defined in: | lib/csv/parser.rb |
Overview
CSV::InputsScanner
receives IO inputs, encoding and the chunk_size. It also controls the life cycle of the object with its methods #keep_start, #keep_end, #keep_back, #keep_drop.
CSV::InputsScanner
.scan() tries to match with pattern at the current position. If there’s a match, the scanner advances the “scan pointer” and returns the matched string. Otherwise, the scanner returns nil.
CSV::InputsScanner
.rest() returns the “rest” of the string (i.e. everything after the scan pointer). If there is no more data (eos? = true), it returns “”.
Class Method Summary
Instance Attribute Summary
- #eos? ⇒ Boolean readonly
Instance Method Summary
Constructor Details
.new(inputs, encoding, row_separator, chunk_size: 8192) ⇒ InputsScanner
# File 'lib/csv/parser.rb', line 87
def initialize(inputs, encoding, row_separator, chunk_size: 8192) @inputs = inputs.dup @encoding = encoding @row_separator = row_separator @chunk_size = chunk_size @last_scanner = @inputs.empty? @keeps = [] read_chunk end
Instance Attribute Details
#eos? ⇒ Boolean
(readonly)
[ GitHub ]
# File 'lib/csv/parser.rb', line 170
def eos? @scanner.eos? end
Instance Method Details
#adjust_last_keep (private)
[ GitHub ]# File 'lib/csv/parser.rb', line 266
def adjust_last_keep # trace(__method__, :start) keep = @keeps.last # trace(__method__, :done, :empty) if keep.nil? return if keep.nil? scanner, start, buffer = keep string = @scanner.string if @scanner != scanner start = 0 end if start == 0 and @scanner.eos? keep_data = string else keep_data = string.byteslice(start, @scanner.pos - start) end if keep_data if buffer buffer << keep_data else keep[2] = keep_data.dup end end # trace(__method__, :done) end
#check(pattern)
[ GitHub ]# File 'lib/csv/parser.rb', line 257
def check(pattern) @scanner.check(pattern) end
#each_line(row_separator) {|buffer| ... }
# File 'lib/csv/parser.rb', line 97
def each_line(row_separator) return enum_for(__method__, row_separator) unless block_given? buffer = nil input = @scanner.rest position = @scanner.pos offset = 0 n_row_separator_chars = row_separator.size # trace(__method__, :start, input) while true input.each_line(row_separator) do |line| @scanner.pos += line.bytesize if buffer if n_row_separator_chars == 2 and buffer.end_with?(row_separator[0]) and line.start_with?(row_separator[1]) buffer << line[0] line = line[1..-1] position += buffer.bytesize + offset @scanner.pos = position offset = 0 yield(buffer) buffer = nil next if line.empty? else buffer << line line = buffer buffer = nil end end if line.end_with?(row_separator) position += line.bytesize + offset @scanner.pos = position offset = 0 yield(line) else buffer = line end end break unless read_chunk input = @scanner.rest position = @scanner.pos offset = -buffer.bytesize if buffer end yield(buffer) if buffer end
#keep_back
[ GitHub ]# File 'lib/csv/parser.rb', line 197
def keep_back # trace(__method__, :start) scanner, start, buffer = @keeps.pop if buffer # trace(__method__, :rescan, start, buffer) string = @scanner.string if scanner == @scanner keep = string.byteslice(start, string.bytesize - @scanner.pos - start) else keep = string end if keep and not keep.empty? @inputs.unshift(StringIO.new(keep)) @last_scanner = false end @scanner = StringScanner.new(buffer) else if @scanner != scanner = "scanners are different but no buffer: " += "#{@scanner.inspect}(#{@scanner.object_id}): " += "#{scanner.inspect}(#{scanner.object_id})" raise UnexpectedError, end # trace(__method__, :repos, start, buffer) @scanner.pos = start last_scanner, last_start, last_buffer = @keeps.last # Drop the last buffer when the last buffer is the same data # in the last keep. If we keep it, we have duplicated data # by the next keep_back. if last_scanner == @scanner and last_buffer and last_buffer == last_scanner.string.byteslice(last_start, start) @keeps.last[2] = nil end end read_chunk if @scanner.eos? end
#keep_drop
[ GitHub ]# File 'lib/csv/parser.rb', line 236
def keep_drop _, _, buffer = @keeps.pop # trace(__method__, :done, :empty) unless buffer return unless buffer last_keep = @keeps.last # trace(__method__, :done, :no_last_keep) unless last_keep return unless last_keep if last_keep[2] last_keep[2] << buffer else last_keep[2] = buffer end # trace(__method__, :done) end
#keep_end
[ GitHub ]# File 'lib/csv/parser.rb', line 181
def keep_end # trace(__method__, :start) scanner, start, buffer = @keeps.pop if scanner == @scanner keep = @scanner.string.byteslice(start, @scanner.pos - start) else keep = @scanner.string.byteslice(0, @scanner.pos) end if buffer buffer << keep keep = buffer end # trace(__method__, :done, keep) keep end
#keep_start
[ GitHub ]# File 'lib/csv/parser.rb', line 174
def keep_start # trace(__method__, :start) adjust_last_keep @keeps.push([@scanner, @scanner.pos, nil]) # trace(__method__, :done) end
#read_chunk (private)
[ GitHub ]# File 'lib/csv/parser.rb', line 294
def read_chunk return false if @last_scanner adjust_last_keep input = @inputs.first case input when StringIO string = input.read raise InvalidEncoding unless string.valid_encoding? # trace(__method__, :stringio, string) @scanner = StringScanner.new(string) @inputs.shift @last_scanner = @inputs.empty? true else chunk = input.gets(@row_separator, @chunk_size) if chunk raise InvalidEncoding unless chunk.valid_encoding? # trace(__method__, :chunk, chunk) @scanner = StringScanner.new(chunk) if input.respond_to?(:eof?) and input.eof? @inputs.shift @last_scanner = @inputs.empty? end true else # trace(__method__, :no_chunk) @scanner = StringScanner.new("".encode(@encoding)) @inputs.shift @last_scanner = @inputs.empty? if @last_scanner false else read_chunk end end end end
#rest
[ GitHub ]# File 'lib/csv/parser.rb', line 253
def rest @scanner.rest end
#scan(pattern)
[ GitHub ]# File 'lib/csv/parser.rb', line 143
def scan(pattern) # trace(__method__, pattern, :start) value = @scanner.scan(pattern) # trace(__method__, pattern, :done, :last, value) if @last_scanner return value if @last_scanner read_chunk if value and @scanner.eos? # trace(__method__, pattern, :done, value) value end
#scan_all(pattern)
[ GitHub ]# File 'lib/csv/parser.rb', line 154
def scan_all(pattern) # trace(__method__, pattern, :start) value = @scanner.scan(pattern) # trace(__method__, pattern, :done, :last, value) if @last_scanner return value if @last_scanner # trace(__method__, pattern, :done, :nil) if value.nil? return nil if value.nil? while @scanner.eos? and read_chunk and (sub_value = @scanner.scan(pattern)) # trace(__method__, pattern, :sub, sub_value) value << sub_value end # trace(__method__, pattern, :done, value) value end
#trace(*args) (private)
[ GitHub ]# File 'lib/csv/parser.rb', line 262
def trace(*args) pp([*args, @scanner, @scanner&.string, @scanner&.pos, @keeps]) end