123456789_123456789_123456789_123456789_123456789_

Class: Scanf::FormatSpecifier

Do not use. This class is for internal use only.
Relationships & Source Files
Inherits: Object
Defined in: lib/scanf.rb

Overview

Technical notes

Rationale behind scanf for Ruby

The impetus for a scanf implementation in Ruby comes chiefly from the fact that existing pattern matching operations, such as Regexp#match and String#scan, return all results as strings, which have to be converted to integers or floats explicitly in cases where what’s ultimately wanted are integer or float values.

Design of scanf for Ruby

scanf for Ruby is essentially a <format string>-to-<regular expression> converter.

When scanf is called, a FormatString object is generated from the format string (“%d%s…”) argument. The FormatString object breaks the format string down into atoms (“%d”, “%5f”, “blah”, etc.), and from each atom it creates a FormatSpecifier object, which it saves.

Each FormatSpecifier has a regular expression fragment and a “handler” associated with it. For example, the regular expression fragment associated with the format “%d” is “([-+]?d+)”, and the handler associated with it is a wrapper around String#to_i. scanf itself calls FormatString#match, passing in the input string. FormatString#match iterates through its FormatSpecifiers; for each one, it matches the corresponding regular expression fragment against the string. If there’s a match, it sends the matched string to the handler associated with the FormatSpecifier.

Thus, to follow up the “%d” example: if “123” occurs in the input string when a FormatSpecifier consisting of “%d” is reached, the “123” will be matched against “([-+]?d+)”, and the matched string will be rendered into an integer by a call to to_i.

The rendered match is then saved to an accumulator array, and the input string is reduced to the post-match substring. Thus the string is “eaten” from the left as the FormatSpecifiers are applied in sequence. (This is done to a duplicate string; the original string is not altered.)

As soon as a regular expression fragment fails to match the string, or when the FormatString object runs out of FormatSpecifiers, scanning stops and results accumulated so far are returned in an array.

Class Method Summary

Instance Attribute Summary

Instance Method Summary

Constructor Details

.new(str) ⇒ FormatSpecifier

[ GitHub ]

  
# File 'lib/scanf.rb', line 332

def initialize(str)
  @spec_string = str
  h = '[A-Fa-f0-9]'

  @re_string, @handler =
    case @spec_string

      # %[[:...:]]
    when /%\*?(\[\[:[a-z]+:\]\])/
      [ "(#{$1}+)", :extract_plain ]

      # %5[[:...:]]
    when /%\*?(\d)(\[\[:[a-z]:\]\])/
      [ "(#{$2}{1,#{$1}})", :extract_plain ]

      # %[...]
    when /%\*?\[([^\]]*)\]/
      yes = $1
      if /^\^/.match(yes) then no = yes[1..-1] else no = '^' + yes end
      [ "([#{yes}]+)(?=[#{no}]|\\z)", :extract_plain ]

      # %5[...]
    when /%\*?(\d+)\[([^\]]*)\]/
      yes = $2
      w = $1
      [ "([#{yes}]{1,#{w}})", :extract_plain ]

      # %i
    when /%\*?i/
      [ "([-]?(?:(?:0[0-7])|(?:0[Xx]#{h}+)|(?:[1-9]\\d*)))", :extract_integer ]

      # %5i
    when /%\*?(\d+)i/
      n = $1.to_i
      s = "("
      if n > 1 then s += "[1-9]\\d{1,#{n-1}}|" end
      if n > 1 then s += "0[0-7]{1,#{n-1}}|" end
      if n > 2 then s += "[-+]0[0-7]{1,#{n-2}}|" end
      if n > 2 then s += "[-+][1-9]\\d{1,#{n-2}}|" end
      if n > 2 then s += "0[Xx]#{h}{1,#{n-2}}|" end
      if n > 3 then s += "[-+]0[Xx]#{h}{1,#{n-3}}|" end
      s += "\\d"
      s += ")"
      [ s, :extract_integer ]

      # %d, %u
    when /%\*?[du]/
      [ '([-]?\d)', :extract_decimal ]

      # %5d, %5u
    when /%\*?(\d+)[du]/
      n = $1.to_i
      s = "("
      if n > 1 then s += "[-+]\\d{1,#{n-1}}|" end
      s += "\\d{1,#{$1}})"
      [ s, :extract_decimal ]

      # %x
    when /%\*?[Xx]/
      [ "([-]?(?:0[Xx])?#{h})", :extract_hex ]

      # %5x
    when /%\*?(\d+)[Xx]/
      n = $1.to_i
      s = "("
      if n > 3 then s += "[-+]0[Xx]#{h}{1,#{n-3}}|" end
      if n > 2 then s += "0[Xx]#{h}{1,#{n-2}}|" end
      if n > 1 then s += "[-+]#{h}{1,#{n-1}}|" end
      s += "#{h}{1,#{n}}"
      s += ")"
      [ s, :extract_hex ]

      # %o
    when /%\*?o/
      [ '([-]?[0-7])', :extract_octal ]

      # %5o
    when /%\*?(\d+)o/
      [ "([-+][0-7]{1,#{$1.to_i-1}}|[0-7]{1,#{$1}})", :extract_octal ]

      # %f
    when /%\*?[aefgAEFG]/
      [ '([-]?(?:0[xX](?:\.\h|\h(?:\.\h*)?)[pP][-]?\d|\d(?![\d.])|\d*\.\d*(?:[eE][-]?\d)?))', :extract_float ]

      # %5f
    when /%\*?(\d+)[aefgAEFG]/
      [ '(?=[-]?(?:0[xX](?:\.\h|\h(?:\.\h*)?)[pP][-]?\d|\d(?![\d.])|\d*\.\d*(?:[eE][-]?\d)?))' +
        "(\\S{1,#{$1}})", :extract_float ]

      # %5s
    when /%\*?(\d+)s/
      [ "(\\S{1,#{$1}})", :extract_plain ]

      # %s
    when /%\*?s/
      [ '(\S+)', :extract_plain ]

      # %c
    when /\s%\*?c/
      [ "\\s*(.)", :extract_plain ]

      # %c
    when /%\*?c/
      [ "(.)", :extract_plain ]

      # %5c (whitespace issues are handled by the count_*_space? methods)
    when /%\*?(\d+)c/
      [ "(.{1,#{$1}})", :extract_plain ]

      # %%
    when /%%/
      [ '(\s*%)', :nil_proc ]

      # literal characters
    else
      [ "(#{Regexp.escape(@spec_string)})", :nil_proc ]
    end

  @re_string = '\A' + @re_string
end

Instance Attribute Details

#conversion (readonly)

[ GitHub ]

  
# File 'lib/scanf.rb', line 290

attr_reader :re_string, :matched_string, :conversion, :matched

#count_space?Boolean (readonly)

[ GitHub ]

  
# File 'lib/scanf.rb', line 328

def count_space?
  /(?:\A|\S)%\*?\d*c|%\d*\[/.match(@spec_string)
end

#matched (readonly)

[ GitHub ]

  
# File 'lib/scanf.rb', line 290

attr_reader :re_string, :matched_string, :conversion, :matched

#matched_string (readonly)

[ GitHub ]

  
# File 'lib/scanf.rb', line 290

attr_reader :re_string, :matched_string, :conversion, :matched

#mid_match?Boolean (readonly)

[ GitHub ]

  
# File 'lib/scanf.rb', line 478

def mid_match?
  return false unless @matched
  cc_no_width    = letter == '[' &&! width
  c_or_cc_width  = (letter == 'c' || letter == '[') && width
  width_left     = c_or_cc_width && (matched_string.size < width)

  return width_left || cc_no_width
end

#re_string (readonly)

[ GitHub ]

  
# File 'lib/scanf.rb', line 290

attr_reader :re_string, :matched_string, :conversion, :matched

Instance Method Details

#extract_decimal(s) (private)

[ GitHub ]

  
# File 'lib/scanf.rb', line 314

def extract_decimal(s); s.to_i if s &&! skip; end

#extract_float(s) (private)

[ GitHub ]

  
# File 'lib/scanf.rb', line 296

def extract_float(s)
  return nil unless s &&! skip
  if /\A(?<sign>[-]?)0[xX](?<frac>\.\h|\h(?:\.\h*)?)[pP](?<exp>[-]?\d+)/ =~ s
    f1, f2 = frac.split('.')
    f = f1.hex
    if f2
      len = f2.length
      if len > 0
        f += f2.hex / (16.0 ** len)
      end
    end
    (sign == ?- ? -1 : 1) * Math.ldexp(f, exp.to_i)
  elsif /\A([-]?\d)\.([eE][-]\d)/ =~ s
    ($1 << $2).to_f
  else
    s.to_f
  end
end

#extract_hex(s) (private)

[ GitHub ]

  
# File 'lib/scanf.rb', line 315

def extract_hex(s); s.hex if s &&! skip; end

#extract_integer(s) (private)

[ GitHub ]

  
# File 'lib/scanf.rb', line 317

def extract_integer(s); Integer(s) if s &&! skip; end

#extract_octal(s) (private)

[ GitHub ]

  
# File 'lib/scanf.rb', line 316

def extract_octal(s); s.oct if s &&! skip; end

#extract_plain(s) (private)

[ GitHub ]

  
# File 'lib/scanf.rb', line 318

def extract_plain(s); s unless skip; end

#letter

[ GitHub ]

  
# File 'lib/scanf.rb', line 470

def letter
  @spec_string[/%\*?\d*([a-z\[])/, 1]
end

#match(str)

[ GitHub ]

  
# File 'lib/scanf.rb', line 457

def match(str)
  @matched = false
  s = str.dup
  s.sub!(/\A\s+/,'') unless count_space?
  res = to_re.match(s)
  if res
    @conversion = send(@handler, res[1])
    @matched_string = @conversion.to_s
    @matched = true
  end
  res
end

#nil_proc(s) (private)

[ GitHub ]

  
# File 'lib/scanf.rb', line 320

def nil_proc(s); nil; end

#skip (private)

[ GitHub ]

  
# File 'lib/scanf.rb', line 294

def skip;  /^\s*%\*/.match(@spec_string); end

#to_re

[ GitHub ]

  
# File 'lib/scanf.rb', line 453

def to_re
  Regexp.new(@re_string,Regexp::MULTILINE)
end

#to_s

[ GitHub ]

  
# File 'lib/scanf.rb', line 324

def to_s
  @spec_string
end

#width

[ GitHub ]

  
# File 'lib/scanf.rb', line 474

def width
  @spec_string[/%\*?(\d+)/, 1]&.to_i
end