Class: CSV::Parser

Relationships & Source Files
Namespace Children
Classes: `InputsScanner`, `Scanner`, `UnoptimizedStringIO`
Exceptions: `InvalidEncoding`, `UnexpectedError`
Inherits:	Object
Defined in:	lib/csv/parser.rb

Overview

Note: Don’t use this class directly. This is an internal class.

Constant Summary

SCANNER_TEST =
# File 'lib/csv/parser.rb', line 836
```
(ENV["CSV_PARSER_SCANNER_TEST"] == "yes")
```
SCANNER_TEST_CHUNK_SIZE_NAME =
# File 'lib/csv/parser.rb', line 838
```
"CSV_PARSER_SCANNER_TEST_CHUNK_SIZE"
```
SCANNER_TEST_CHUNK_SIZE_VALUE =
# File 'lib/csv/parser.rb', line 839

Class Method Summary

.new(input, options) ⇒ Parser constructor

Instance Attribute Summary

#header_row? ⇒ Boolean readonly
#liberal_parsing? ⇒ Boolean readonly
#return_headers? ⇒ Boolean readonly
#skip_blanks? ⇒ Boolean readonly
#unconverted_fields? ⇒ Boolean readonly
#use_headers? ⇒ Boolean readonly
#may_quoted? ⇒ Boolean readonly private

Instance Method Summary

#add_unconverted_fields(row, fields)

This method injects an instance variable unconverted_fields into row and an accessor method for row called unconverted_fields().
#build_scanner

See additional method definition at line 840.
#column_separator
#emit_row(row, quoted_fields) {|row| ... }
#field_size_limit
#headers
#ignore_broken_line
#line
#lineno
#max_field_size
#parse(&block)
#parse_column_end
#parse_column_value
#parse_no_quote(&block)
#parse_quotable_loose(&block)
#parse_quotable_robust(&block)
#parse_quoted_column_value
#parse_row_end
#parse_unquoted_column_value
#quote_character
#row_separator
#skip_line?(line) ⇒ Boolean
#skip_lines
#skip_needless_lines
#start_row
#strip_value(value)
#validate_field_size(field)
#adjust_headers(headers, quoted_fields) private
#detect_row_separator(sample, cr, lf) private
#last_line private
#parse_headers(row) private
#prepare private

A set of tasks to prepare the file in order to parse it.
#prepare_backslash private
#prepare_header private
#prepare_line private
#prepare_parser private
#prepare_quote_character private
#prepare_quoted private
#prepare_separators private
#prepare_skip_lines private
#prepare_strip private
#prepare_unquoted private
#prepare_variable private
#resolve_row_separator(separator) private
#validate_strip_and_col_sep_options private

This method verifies that there are no (obvious) ambiguities with the provided #col_sep and strip parsing options.

Constructor Details

.new(input, options) ⇒ `Parser`

[ GitHub ]

# File 'lib/csv/parser.rb', line 324


def initialize(input, options)
  @input = input
  @options = options
  @samples = []

  prepare
end

Instance Attribute Details

#header_row? ⇒ `Boolean` (readonly)

[ GitHub ]

# File 'lib/csv/parser.rb', line 364


def header_row?
  @use_headers and @headers.nil?
end

#liberal_parsing? ⇒ `Boolean` (readonly)

[ GitHub ]

# File 'lib/csv/parser.rb', line 376


def liberal_parsing?
  @liberal_parsing
end

#may_quoted? ⇒ `Boolean` (readonly, private)

[ GitHub ]

# File 'lib/csv/parser.rb', line 804


def may_quoted?
  return false if @quote_character.nil?

  if @input.is_a?(StringIO)
    pos = @input.pos
    sample = @input.read
    @input.seek(pos)
  else
    return false if @samples.empty?
    sample = @samples.first
  end
  sample[0, 128].index(@quote_character)
end

#return_headers? ⇒ `Boolean` (readonly)

[ GitHub ]

# File 'lib/csv/parser.rb', line 368


def return_headers?
  @return_headers
end

#skip_blanks? ⇒ `Boolean` (readonly)

[ GitHub ]

# File 'lib/csv/parser.rb', line 372


def skip_blanks?
  @skip_blanks
end

#unconverted_fields? ⇒ `Boolean` (readonly)

[ GitHub ]

# File 'lib/csv/parser.rb', line 356


def unconverted_fields?
  @unconverted_fields
end

#use_headers? ⇒ `Boolean` (readonly)

[ GitHub ]

# File 'lib/csv/parser.rb', line 430


def use_headers?
  @use_headers
end

Instance Method Details

#add_unconverted_fields(row, fields)

This method injects an instance variable unconverted_fields into row and an accessor method for row called unconverted_fields(). The variable is set to the contents of fields.

[ GitHub ]

# File 'lib/csv/parser.rb', line 1281


def add_unconverted_fields(row, fields)
  class << row
    attr_reader :unconverted_fields
  end
  row.instance_variable_set(:@unconverted_fields, fields)
  row
end

#adjust_headers(headers, quoted_fields) (private)

[ GitHub ]

# File 'lib/csv/parser.rb', line 794


def adjust_headers(headers, quoted_fields)
  adjusted_headers = @header_fields_converter.convert(headers, nil, @lineno, quoted_fields)
  adjusted_headers.each {|h| h.freeze if h.is_a? String}
  adjusted_headers
end

#build_scanner

See additional method definition at line 840.

[ GitHub ]

# File 'lib/csv/parser.rb', line 862


def build_scanner
  inputs = @samples.collect do |sample|
    UnoptimizedStringIO.new(sample)
  end
  if @input.is_a?(StringIO)
    inputs << UnoptimizedStringIO.new(@input.read)
  else
    inputs << @input
  end
  begin
    chunk_size_value = ENV[SCANNER_TEST_CHUNK_SIZE_NAME]
  rescue # Ractor::IsolationError
    # Ractor on Ruby 3.0 can't read ENV value.
    chunk_size_value = SCANNER_TEST_CHUNK_SIZE_VALUE
  end
  chunk_size = Integer((chunk_size_value || "1"), 10)
  InputsScanner.new(inputs,
                    @encoding,
                    @row_separator,
                    chunk_size: chunk_size)
end

#column_separator

[ GitHub ]

# File 'lib/csv/parser.rb', line 332


def column_separator
  @column_separator
end

#detect_row_separator(sample, cr, lf) (private)

[ GitHub ]

# File 'lib/csv/parser.rb', line 716


def detect_row_separator(sample, cr, lf)
  lf_index = sample.index(lf)
  if lf_index
    cr_index = sample[0, lf_index].index(cr)
  else
    cr_index = sample.index(cr)
  end
  if cr_index and lf_index
    if cr_index + 1 == lf_index
      cr + lf
    elsif cr_index < lf_index
      cr
    else
      lf
    end
  elsif cr_index
    cr
  elsif lf_index
    lf
  else
    :auto
  end
end

#emit_row(row, quoted_fields) {|row| ... }

Yields:

(row)

[ GitHub ]

# File 'lib/csv/parser.rb', line 1252


def emit_row(row, quoted_fields, &block)
  @lineno += 1

  raw_row = row
  if @use_headers
    if @headers.nil?
      @headers = adjust_headers(row, quoted_fields)
      return unless @return_headers
      row = Row.new(@headers, row, true)
    else
      row = Row.new(@headers,
                    @fields_converter.convert(raw_row, @headers, @lineno, quoted_fields))
    end
  else
    # convert fields, if needed...
    row = @fields_converter.convert(raw_row, nil, @lineno, quoted_fields)
  end

  # inject unconverted fields and accessor, if requested...
  if @unconverted_fields and not row.respond_to?(:unconverted_fields)
    add_unconverted_fields(row, raw_row)
  end

  yield(row)
end

#field_size_limit

[ GitHub ]

# File 'lib/csv/parser.rb', line 344


def field_size_limit
  @max_field_size&.succ
end

#headers

[ GitHub ]

# File 'lib/csv/parser.rb', line 360


def headers
  @headers
end

#ignore_broken_line

[ GitHub ]

# File 'lib/csv/parser.rb', line 1237


def ignore_broken_line
  @scanner.scan_all(@not_line_end)
  @scanner.scan_all(@line_end)
  @lineno += 1
end

#last_line (private)

[ GitHub ]

# File 'lib/csv/parser.rb', line 746


def last_line
  if @scanner
    @last_line ||= @scanner.keep_end
  else
    @last_line
  end
end

#line

[ GitHub ]

# File 'lib/csv/parser.rb', line 384


def line
  last_line
end

#lineno

[ GitHub ]

# File 'lib/csv/parser.rb', line 380


def lineno
  @lineno
end

#max_field_size

[ GitHub ]

# File 'lib/csv/parser.rb', line 348


def max_field_size
  @max_field_size
end

#parse(&block)

[ GitHub ]

# File 'lib/csv/parser.rb', line 388


def parse(&block)
  return to_enum(__method__) unless block_given?

  if @return_headers and @headers and @raw_headers
    headers = Row.new(@headers, @raw_headers, true)
    if @unconverted_fields
      headers = add_unconverted_fields(headers, [])
    end
    yield headers
  end

  begin
    @scanner ||= build_scanner
    if quote_character.nil?
      parse_no_quote(&block)
    elsif @need_robust_parsing
      parse_quotable_robust(&block)
    else
      parse_quotable_loose(&block)
    end
  rescue InvalidEncoding
    if @scanner
      ignore_broken_line
      lineno = @lineno
    else
      lineno = @lineno + 1
    end
    message = "Invalid byte sequence in #{@encoding}"
    raise MalformedCSVError.new(message, lineno)
  rescue UnexpectedError => error
    if @scanner
      ignore_broken_line
      lineno = @lineno
    else
      lineno = @lineno + 1
    end
    message = "This should not be happen: #{error.message}: "
    message += "Please report this to https://github.com/ruby/csv/issues"
    raise MalformedCSVError.new(message, lineno)
  end
end

#parse_column_end

[ GitHub ]

# File 'lib/csv/parser.rb', line 1192


def parse_column_end
  return true if @scanner.scan(@column_end)
  return false unless @column_ends

  @scanner.keep_start
  if @column_ends.all? {|column_end| @scanner.scan(column_end)}
    @scanner.keep_drop
    true
  else
    @scanner.keep_back
    false
  end
end

#parse_column_value

[ GitHub ]

# File 'lib/csv/parser.rb', line 1092


def parse_column_value
  if @liberal_parsing
    quoted_value = parse_quoted_column_value
    if quoted_value
      @scanner.scan_all(@strip_value) if @strip_value
      unquoted_value = parse_unquoted_column_value
      if unquoted_value
        if @double_quote_outside_quote
          unquoted_value = unquoted_value.gsub(@quote_character * 2,
                                               @quote_character)
          if quoted_value.empty? # %Q{""...} case
            return @quote_character + unquoted_value
          end
        end
        @quote_character + quoted_value + @quote_character + unquoted_value
      else
        quoted_value
      end
    else
      parse_unquoted_column_value
    end
  elsif @may_quoted
    parse_quoted_column_value ||
      parse_unquoted_column_value
  else
    parse_unquoted_column_value ||
      parse_quoted_column_value
  end
end

#parse_headers(row) (private)

[ GitHub ]

# File 'lib/csv/parser.rb', line 780


def parse_headers(row)
  quoted_fields = []
  converter = lambda do |field, info|
    quoted_fields << info.quoted?
    field
  end
  headers = CSV.parse_line(row,
                           col_sep:    @column_separator,
                           row_sep:    @row_separator,
                           quote_char: @quote_character,
                           converters: [converter])
  [headers, quoted_fields]
end

#parse_no_quote(&block)

[ GitHub ]

# File 'lib/csv/parser.rb', line 930


def parse_no_quote(&block)
  @scanner.each_line(@row_separator) do |line|
    next if @skip_lines and skip_line?(line)
    original_line = line
    line = line.delete_suffix(@row_separator)

    if line.empty?
      next if @skip_blanks
      row = []
      quoted_fields = []
    else
      line = strip_value(line)
      row = line.split(@split_column_separator, -1)
      quoted_fields = [false] * row.size
      if @max_field_size
        row.each do |column|
          validate_field_size(column)
        end
      end
      n_columns = row.size
      i = 0
      while i < n_columns
        row[i] = nil if row[i].empty?
        i += 1
      end
    end
    @last_line = original_line
    emit_row(row, quoted_fields, &block)
  end
end

#parse_quotable_loose(&block)

[ GitHub ]

# File 'lib/csv/parser.rb', line 961


def parse_quotable_loose(&block)
  @scanner.keep_start
  @scanner.each_line(@row_separator) do |line|
    if @skip_lines and skip_line?(line)
      @scanner.keep_drop
      @scanner.keep_start
      next
    end
    original_line = line
    line = line.delete_suffix(@row_separator)

    if line.empty?
      if @skip_blanks
        @scanner.keep_drop
        @scanner.keep_start
        next
      end
      row = []
      quoted_fields = []
    elsif line.include?(@cr) or line.include?(@lf)
      @scanner.keep_back
      @need_robust_parsing = true
      return parse_quotable_robust(&block)
    else
      row = line.split(@split_column_separator, -1)
      quoted_fields = []
      n_columns = row.size
      i = 0
      while i < n_columns
        column = row[i]
        if column.empty?
          quoted_fields << false
          row[i] = nil
        else
          n_quotes = column.count(@quote_character)
          if n_quotes.zero?
            quoted_fields << false
            # no quote
          elsif n_quotes == 2 and
               column.start_with?(@quote_character) and
               column.end_with?(@quote_character)
            quoted_fields << true
            row[i] = column[1..-2]
          else
            @scanner.keep_back
            @need_robust_parsing = true
            return parse_quotable_robust(&block)
          end
          validate_field_size(row[i])
        end
        i += 1
      end
    end
    @scanner.keep_drop
    @scanner.keep_start
    @last_line = original_line
    emit_row(row, quoted_fields, &block)
  end
  @scanner.keep_drop
end

#parse_quotable_robust(&block)

[ GitHub ]

# File 'lib/csv/parser.rb', line 1022


def parse_quotable_robust(&block)
  row = []
  quoted_fields = []
  skip_needless_lines
  start_row
  while true
    @quoted_column_value = false
    @unquoted_column_value = false
    @scanner.scan_all(@strip_value) if @strip_value
    value = parse_column_value
    if value
      @scanner.scan_all(@strip_value) if @strip_value
      validate_field_size(value)
    end
    if parse_column_end
      row << value
      quoted_fields << @quoted_column_value
    elsif parse_row_end
      if row.empty? and value.nil?
        emit_row([], [], &block) unless @skip_blanks
      else
        row << value
        quoted_fields << @quoted_column_value
        emit_row(row, quoted_fields, &block)
        row = []
        quoted_fields = []
      end
      skip_needless_lines
      start_row
    elsif @scanner.eos?
      break if row.empty? and value.nil?
      row << value
      quoted_fields << @quoted_column_value
      emit_row(row, quoted_fields, &block)
      break
    else
      if @quoted_column_value
        if liberal_parsing? and (new_line = @scanner.check(@line_end))
          message =
            "Illegal end-of-line sequence outside of a quoted field " +
            "<#{new_line.inspect}>"
        else
          message = "Any value after quoted field isn't allowed"
        end
        ignore_broken_line
        raise MalformedCSVError.new(message, @lineno)
      elsif @unquoted_column_value and
            (new_line = @scanner.scan(@line_end))
        ignore_broken_line
        message = "Unquoted fields do not allow new line " +
                  "<#{new_line.inspect}>"
        raise MalformedCSVError.new(message, @lineno)
      elsif @scanner.rest.start_with?(@quote_character)
        ignore_broken_line
        message = "Illegal quoting"
        raise MalformedCSVError.new(message, @lineno)
      elsif (new_line = @scanner.scan(@line_end))
        ignore_broken_line
        message = "New line must be <#{@row_separator.inspect}> " +
                  "not <#{new_line.inspect}>"
        raise MalformedCSVError.new(message, @lineno)
      else
        ignore_broken_line
        raise MalformedCSVError.new("TODO: Meaningful message",
                                    @lineno)
      end
    end
  end
end

#parse_quoted_column_value

[ GitHub ]

# File 'lib/csv/parser.rb', line 1150


def parse_quoted_column_value
  quotes = @scanner.scan_all(@quotes)
  return nil unless quotes

  @quoted_column_value = true
  n_quotes = quotes.size
  if (n_quotes % 2).zero?
    quotes[0, (n_quotes - 2) / 2]
  else
    value = quotes[0, n_quotes / 2]
    while true
      quoted_value = @scanner.scan_all(@quoted_value)
      value << quoted_value if quoted_value
      if @backslash_quote
        if @scanner.scan(@escaped_backslash)
          if @scanner.scan(@escaped_quote)
            value << @quote_character
          else
            value << @backslash_character
          end
          next
        end
      end

      quotes = @scanner.scan_all(@quotes)
      unless quotes
        ignore_broken_line
        message = "Unclosed quoted field"
        raise MalformedCSVError.new(message, @lineno)
      end
      n_quotes = quotes.size
      if n_quotes == 1
        break
      else
        value << quotes[0, n_quotes / 2]
        break if (n_quotes % 2) == 1
      end
    end
    value
  end
end

#parse_row_end

[ GitHub ]

# File 'lib/csv/parser.rb', line 1206


def parse_row_end
  return true if @scanner.scan(@row_end)
  return false unless @row_ends
  @scanner.keep_start
  if @row_ends.all? {|row_end| @scanner.scan(row_end)}
    @scanner.keep_drop
    true
  else
    @scanner.keep_back
    false
  end
end

#parse_unquoted_column_value

[ GitHub ]

# File 'lib/csv/parser.rb', line 1122


def parse_unquoted_column_value
  value = @scanner.scan_all(@unquoted_value)
  return nil unless value

  @unquoted_column_value = true
  if @first_column_separators
    while true
      @scanner.keep_start
      is_column_end = @column_ends.all? do |column_end|
        @scanner.scan(column_end)
      end
      @scanner.keep_back
      break if is_column_end
      sub_separator = @scanner.scan_all(@first_column_separators)
      break if sub_separator.nil?
      value << sub_separator
      sub_value = @scanner.scan_all(@unquoted_value)
      break if sub_value.nil?
      value << sub_value
    end
  end
  value.gsub!(@backslash_quote_character, @quote_character) if @backslash_quote
  if @rstrip_value
    value.gsub!(@rstrip_value, "")
  end
  value
end

#prepare (private)

A set of tasks to prepare the file in order to parse it

[ GitHub ]

# File 'lib/csv/parser.rb', line 436


def prepare
  prepare_variable
  prepare_quote_character
  prepare_backslash
  prepare_skip_lines
  prepare_strip
  prepare_separators
  validate_strip_and_col_sep_options
  prepare_quoted
  prepare_unquoted
  prepare_line
  prepare_header
  prepare_parser
end

#prepare_backslash (private)

[ GitHub ]

# File 'lib/csv/parser.rb', line 494


def prepare_backslash
  return unless @backslash_quote

  @backslash_character = "\\".encode(@encoding)

  @escaped_backslash_character = Regexp.escape(@backslash_character)
  @escaped_backslash = Regexp.new(@escaped_backslash_character)
  if @quote_character.nil?
    @backslash_quote_character = nil
  else
    @backslash_quote_character =
      @backslash_character + @escaped_quote_character
  end
end

#prepare_header (private)

[ GitHub ]

# File 'lib/csv/parser.rb', line 754


def prepare_header
  @return_headers = @options[:return_headers]

  headers = @options[:headers]
  case headers
  when Array
    @raw_headers = headers
    quoted_fields = [false] * @raw_headers.size
    @use_headers = true
  when String
    @raw_headers, quoted_fields = parse_headers(headers)
    @use_headers = true
  when nil, false
    @raw_headers = nil
    @use_headers = false
  else
    @raw_headers = nil
    @use_headers = true
  end
  if @raw_headers
    @headers = adjust_headers(@raw_headers, quoted_fields)
  else
    @headers = nil
  end
end

#prepare_line (private)

[ GitHub ]

# File 'lib/csv/parser.rb', line 740


def prepare_line
  @lineno = 0
  @last_line = nil
  @scanner = nil
end

#prepare_parser (private)

[ GitHub ]

# File 'lib/csv/parser.rb', line 800


def prepare_parser
  @may_quoted = may_quoted?
end

#prepare_quote_character (private)

[ GitHub ]

# File 'lib/csv/parser.rb', line 477


def prepare_quote_character
  @quote_character = @options[:quote_character]
  if @quote_character.nil?
    @escaped_quote_character = nil
    @escaped_quote = nil
  else
    @quote_character = @quote_character.to_s.encode(@encoding)
    if @quote_character.length != 1
      message = ":quote_char has to be nil or a single character String"
      raise ArgumentError, message
    end
    @double_quote_character = @quote_character * 2
    @escaped_quote_character = Regexp.escape(@quote_character)
    @escaped_quote = Regexp.new(@escaped_quote_character)
  end
end

#prepare_quoted (private)

[ GitHub ]

# File 'lib/csv/parser.rb', line 636


def prepare_quoted
  if @quote_character
    @quotes = Regexp.new(@escaped_quote_character +
                         "+".encode(@encoding))
    no_quoted_values = @escaped_quote_character.dup
    if @backslash_quote
      no_quoted_values << @escaped_backslash_character
    end
    @quoted_value = Regexp.new("[^".encode(@encoding) +
                               no_quoted_values +
                               "]+".encode(@encoding))
  end
  if @escaped_strip
    @split_column_separator = Regexp.new(@escaped_strip +
                                         "*".encode(@encoding) +
                                         @escaped_column_separator +
                                         @escaped_strip +
                                         "*".encode(@encoding))
  else
    if @column_separator == " ".encode(@encoding)
      @split_column_separator = Regexp.new(@escaped_column_separator)
    else
      @split_column_separator = @column_separator
    end
  end
end

#prepare_separators (private)

[ GitHub ]

# File 'lib/csv/parser.rb', line 568


def prepare_separators
  column_separator = @options[:column_separator]
  @column_separator = column_separator.to_s.encode(@encoding)
  if @column_separator.size < 1
    message = ":col_sep must be 1 or more characters: "
    message += column_separator.inspect
    raise ArgumentError, message
  end
  @row_separator =
    resolve_row_separator(@options[:row_separator]).encode(@encoding)

  @escaped_column_separator = Regexp.escape(@column_separator)
  @escaped_first_column_separator = Regexp.escape(@column_separator[0])
  if @column_separator.size > 1
    @column_end = Regexp.new(@escaped_column_separator)
    @column_ends = @column_separator.each_char.collect do |char|
      Regexp.new(Regexp.escape(char))
    end
    @first_column_separators = Regexp.new(@escaped_first_column_separator +
                                          "+".encode(@encoding))
  else
    if STRING_SCANNER_SCAN_ACCEPT_STRING
      @column_end = @column_separator
    else
      @column_end = Regexp.new(@escaped_column_separator)
    end
    @column_ends = nil
    @first_column_separators = nil
  end

  escaped_row_separator = Regexp.escape(@row_separator)
  @row_end = Regexp.new(escaped_row_separator)
  if @row_separator.size > 1
    @row_ends = @row_separator.each_char.collect do |char|
      Regexp.new(Regexp.escape(char))
    end
  else
    @row_ends = nil
  end

  @cr = "\r".encode(@encoding)
  @lf = "\n".encode(@encoding)
  @line_end = Regexp.new("\r\n|\n|\r".encode(@encoding))
  @not_line_end = Regexp.new("[^\r\n]+".encode(@encoding))
end

#prepare_skip_lines (private)

[ GitHub ]

# File 'lib/csv/parser.rb', line 509


def prepare_skip_lines
  skip_lines = @options[:skip_lines]
  case skip_lines
  when String
    @skip_lines = skip_lines.encode(@encoding)
  when Regexp, nil
    @skip_lines = skip_lines
  else
    unless skip_lines.respond_to?(:match)
      message =
        ":skip_lines has to respond to \#match: #{skip_lines.inspect}"
      raise ArgumentError, message
    end
    @skip_lines = skip_lines
  end
end

#prepare_strip (private)

[ GitHub ]

# File 'lib/csv/parser.rb', line 526


def prepare_strip
  @strip = @options[:strip]
  @escaped_strip = nil
  @strip_value = nil
  @rstrip_value = nil
  if @strip.is_a?(String)
    case @strip.length
    when 0
      raise ArgumentError, ":strip must not be an empty String"
    when 1
      # ok
    else
      raise ArgumentError, ":strip doesn't support 2 or more characters yet"
    end
    @strip = @strip.encode(@encoding)
    @escaped_strip = Regexp.escape(@strip)
    if @quote_character
      @strip_value = Regexp.new(@escaped_strip +
                                "+".encode(@encoding))
      @rstrip_value = Regexp.new(@escaped_strip +
                                 "+\\z".encode(@encoding))
    end
    @need_robust_parsing = true
  elsif @strip
    strip_values = " \t\f\v"
    @escaped_strip = strip_values.encode(@encoding)
    if @quote_character
      @strip_value = Regexp.new("[#{strip_values}]+".encode(@encoding))
      @rstrip_value = Regexp.new("[#{strip_values}]+\\z".encode(@encoding))
    end
    @need_robust_parsing = true
  end
end

#prepare_unquoted (private)

[ GitHub ]

# File 'lib/csv/parser.rb', line 663


def prepare_unquoted
  return if @quote_character.nil?

  no_unquoted_values = "\r\n".encode(@encoding)
  no_unquoted_values << @escaped_first_column_separator
  unless @liberal_parsing
    no_unquoted_values << @escaped_quote_character
  end
  @unquoted_value = Regexp.new("[^".encode(@encoding) +
                               no_unquoted_values +
                               "]+".encode(@encoding))
end

#prepare_variable (private)

[ GitHub ]

# File 'lib/csv/parser.rb', line 451


def prepare_variable
  @need_robust_parsing = false
  @encoding = @options[:encoding]
  liberal_parsing = @options[:liberal_parsing]
  if liberal_parsing
    @liberal_parsing = true
    if liberal_parsing.is_a?(Hash)
      @double_quote_outside_quote =
        liberal_parsing[:double_quote_outside_quote]
      @backslash_quote = liberal_parsing[:backslash_quote]
    else
      @double_quote_outside_quote = false
      @backslash_quote = false
    end
    @need_robust_parsing = true
  else
    @liberal_parsing = false
    @backslash_quote = false
  end
  @unconverted_fields = @options[:unconverted_fields]
  @max_field_size = @options[:max_field_size]
  @skip_blanks = @options[:skip_blanks]
  @fields_converter = @options[:fields_converter]
  @header_fields_converter = @options[:header_fields_converter]
end

#quote_character

[ GitHub ]

# File 'lib/csv/parser.rb', line 340


def quote_character
  @quote_character
end

#resolve_row_separator(separator) (private)

[ GitHub ]

# File 'lib/csv/parser.rb', line 676


def resolve_row_separator(separator)
  if separator == :auto
    cr = "\r".encode(@encoding)
    lf = "\n".encode(@encoding)
    if @input.is_a?(StringIO)
      pos = @input.pos
      separator = detect_row_separator(@input.read, cr, lf)
      @input.seek(pos)
    elsif @input.respond_to?(:gets)
      if @input.is_a?(File)
        chunk_size = 32 * 1024
      else
        chunk_size = 1024
      end
      begin
        while separator == :auto
          #
          # if we run out of data, it's probably a single line
          # (ensure will set default value)
          #
          break unless sample = @input.gets(nil, chunk_size)

          # extend sample if we're unsure of the line ending
          if sample.end_with?(cr)
            sample << (@input.gets(nil, 1) || "")
          end

          @samples << sample

          separator = detect_row_separator(sample, cr, lf)
        end
      rescue IOError
        # do nothing:  ensure will set default
      end
    end
    separator = InputRecordSeparator.value if separator == :auto
  end
  separator.to_s.encode(@encoding)
end

#row_separator

[ GitHub ]

# File 'lib/csv/parser.rb', line 336


def row_separator
  @row_separator
end

#skip_line?(line) ⇒ `Boolean`

[ GitHub ]

# File 'lib/csv/parser.rb', line 910


def skip_line?(line)
  line = line.delete_suffix(@row_separator)
  case @skip_lines
  when String
    line.include?(@skip_lines)
  when Regexp
    @skip_lines.match?(line)
  else
    @skip_lines.match(line)
  end
end

#skip_lines

[ GitHub ]

# File 'lib/csv/parser.rb', line 352


def skip_lines
  @skip_lines
end

#skip_needless_lines

[ GitHub ]

# File 'lib/csv/parser.rb', line 893


def skip_needless_lines
  return unless @skip_lines

  until @scanner.eos?
    @scanner.keep_start
    line = @scanner.scan_all(@not_line_end) || "".encode(@encoding)
    line << @row_separator if parse_row_end
    if skip_line?(line)
      @lineno += 1
      @scanner.keep_drop
    else
      @scanner.keep_back
      return
    end
  end
end

#start_row

[ GitHub ]

# File 'lib/csv/parser.rb', line 1243


def start_row
  if @last_line
    @last_line = nil
  else
    @scanner.keep_drop
  end
  @scanner.keep_start
end

#strip_value(value)

[ GitHub ]

# File 'lib/csv/parser.rb', line 1219


def strip_value(value)
  return value unless @strip
  return value if value.nil?

  case @strip
  when String
    while value.delete_prefix!(@strip)
      # do nothing
    end
    while value.delete_suffix!(@strip)
      # do nothing
    end
  else
    value.strip!
  end
  value
end

#validate_field_size(field)

Raises:

(MalformedCSVError)

[ GitHub ]

# File 'lib/csv/parser.rb', line 922


def validate_field_size(field)
  return unless @max_field_size
  return if field.size <= @max_field_size
  ignore_broken_line
  message = "Field size exceeded: #{field.size} > #{@max_field_size}"
  raise MalformedCSVError.new(message, @lineno)
end

#validate_strip_and_col_sep_options (private)

This method verifies that there are no (obvious) ambiguities with the provided CSV#col_sep and strip parsing options. For example, if CSV#col_sep and strip were both equal to t, then there would be no clear way to parse the input.

[ GitHub ]

# File 'lib/csv/parser.rb', line 618


def validate_strip_and_col_sep_options
  return unless @strip

  if @strip.is_a?(String)
    if @column_separator.start_with?(@strip) || @column_separator.end_with?(@strip)
      raise ArgumentError,
            "The provided strip (#{@escaped_strip}) and " \
            "col_sep (#{@escaped_column_separator}) options are incompatible."
    end
  else
    if Regexp.new("\\A[#{@escaped_strip}]|[#{@escaped_strip}]\\z").match?(@column_separator)
      raise ArgumentError,
            "The provided strip (true) and " \
            "col_sep (#{@escaped_column_separator}) options are incompatible."
    end
  end
end

Class: CSV::Parser

Overview

Constant Summary

Class Method Summary

Instance Attribute Summary

Instance Method Summary

Constructor Details

.new(input, options) ⇒ Parser

Instance Attribute Details

#header_row? ⇒ Boolean (readonly)

#liberal_parsing? ⇒ Boolean (readonly)

#may_quoted? ⇒ Boolean (readonly, private)

#return_headers? ⇒ Boolean (readonly)

#skip_blanks? ⇒ Boolean (readonly)

#unconverted_fields? ⇒ Boolean (readonly)

#use_headers? ⇒ Boolean (readonly)

Instance Method Details

#add_unconverted_fields(row, fields)

#adjust_headers(headers, quoted_fields) (private)

#build_scanner

#column_separator

#detect_row_separator(sample, cr, lf) (private)

#emit_row(row, quoted_fields) {|row| ... }

#field_size_limit

#headers

#ignore_broken_line

#last_line (private)

#line

#lineno

#max_field_size

#parse(&block)

#parse_column_end

#parse_column_value

#parse_headers(row) (private)

#parse_no_quote(&block)

#parse_quotable_loose(&block)

#parse_quotable_robust(&block)

#parse_quoted_column_value

#parse_row_end

#parse_unquoted_column_value

#prepare (private)

#prepare_backslash (private)

#prepare_header (private)

#prepare_line (private)

#prepare_parser (private)

#prepare_quote_character (private)

#prepare_quoted (private)

#prepare_separators (private)

#prepare_skip_lines (private)

#prepare_strip (private)

#prepare_unquoted (private)

#prepare_variable (private)

#quote_character

#resolve_row_separator(separator) (private)

#row_separator

#skip_line?(line) ⇒ Boolean

#skip_lines

#skip_needless_lines

#start_row

#strip_value(value)

#validate_field_size(field)

#validate_strip_and_col_sep_options (private)

.new(input, options) ⇒ `Parser`

#header_row? ⇒ `Boolean` (readonly)

#liberal_parsing? ⇒ `Boolean` (readonly)

#may_quoted? ⇒ `Boolean` (readonly, private)

#return_headers? ⇒ `Boolean` (readonly)

#skip_blanks? ⇒ `Boolean` (readonly)

#unconverted_fields? ⇒ `Boolean` (readonly)

#use_headers? ⇒ `Boolean` (readonly)

#skip_line?(line) ⇒ `Boolean`