123456789_123456789_123456789_123456789_123456789_

Class: YARD::Parser::Ruby::Legacy::RubyLex Private

Do not use. This class is for internal use only.
Relationships & Source Files
Namespace Children
Classes:
Super Chains via Extension / Inclusion / Inheritance
Instance Chain:
self, ::IRB, RubyToken
Inherits: Object
Defined in: lib/yard/parser/ruby/legacy/ruby_lex.rb

Overview

Lexical analyzer for ::YARD::Parser::Ruby source

Constant Summary

RubyToken - Included

EXPR_ARG, EXPR_BEG, EXPR_CLASS, EXPR_DOT, EXPR_END, EXPR_FNAME, EXPR_MID, NEWLINE_TOKEN, TkReading2Token, TkSymbol2Token, TokenDefinitions

Class Attribute Summary

Class Method Summary

Instance Attribute Summary

Instance Method Summary

RubyToken - Included

Class Attribute Details

.debug?Boolean (readonly)

[ GitHub ]

  
# File 'lib/yard/parser/ruby/legacy/ruby_lex.rb', line 433

def self.debug?
  false
end

Instance Attribute Details

#continue (readonly)

[ GitHub ]

  
# File 'lib/yard/parser/ruby/legacy/ruby_lex.rb', line 430

attr_reader :continue

#exception_on_syntax_error (rw)

[ GitHub ]

  
# File 'lib/yard/parser/ruby/legacy/ruby_lex.rb', line 463

attr_accessor :exception_on_syntax_error

#indent (readonly)

[ GitHub ]

  
# File 'lib/yard/parser/ruby/legacy/ruby_lex.rb', line 465

attr :indent

#lex_state (readonly)

[ GitHub ]

  
# File 'lib/yard/parser/ruby/legacy/ruby_lex.rb', line 431

attr_reader :lex_state

#read_auto_clean_up (rw)

[ GitHub ]

  
# File 'lib/yard/parser/ruby/legacy/ruby_lex.rb', line 462

attr_accessor :read_auto_clean_up

#skip_space (rw)

[ GitHub ]

  
# File 'lib/yard/parser/ruby/legacy/ruby_lex.rb', line 461

attr_accessor :skip_space

Instance Method Details

#char_no

[ GitHub ]

  
# File 'lib/yard/parser/ruby/legacy/ruby_lex.rb', line 472

def char_no
  @reader.column
end

#get_read

[ GitHub ]

  
# File 'lib/yard/parser/ruby/legacy/ruby_lex.rb', line 476

def get_read
  @reader.get_read
end

#getc

[ GitHub ]

  
# File 'lib/yard/parser/ruby/legacy/ruby_lex.rb', line 480

def getc
  @reader.getc
end

#getc_of_rests

[ GitHub ]

  
# File 'lib/yard/parser/ruby/legacy/ruby_lex.rb', line 484

def getc_of_rests
  @reader.getc_already_read
end

#gets

[ GitHub ]

  
# File 'lib/yard/parser/ruby/legacy/ruby_lex.rb', line 488

def gets
  (c = getc) || return
  l = ""
  begin
    l.concat c unless c == "\r"
    break if c == "\n"
  end while c = getc # rubocop:disable Lint/Loop
  l
end

#identify_comment

[ GitHub ]

  
# File 'lib/yard/parser/ruby/legacy/ruby_lex.rb', line 1257

def identify_comment
  @ltype = "#"
  comment = String.new("#")
  while ch = getc
    if ch == "\\"
      ch = getc
      if ch == "\n"
        ch = " "
      else
        comment << "\\"
      end
    else
      if ch == "\n"
        @ltype = nil
        ungetc
        break
      end
    end
    comment << ch
  end
  Token(TkCOMMENT).set_text(comment)
end

#identify_gvar

[ GitHub ]

  
# File 'lib/yard/parser/ruby/legacy/ruby_lex.rb', line 945

def identify_gvar
  @lex_state = EXPR_END
  str = String.new("$")

  tk = case ch = getc
       when %r{[~_*$?!@/\\;,=:<>".]}
         str << ch
         Token(TkGVAR, str)

       when "-"
         str << "-" << getc
         Token(TkGVAR, str)

       when "&", "`", "'", "+"
         str << ch
         Token(TkBACK_REF, str)

       when /[1-9]/
         str << ch
         while (ch = getc) =~ /[0-9]/
           str << ch
         end
         ungetc
         Token(TkNTH_REF)
       when /\w/
         ungetc
         ungetc
         return identify_identifier
       else
         ungetc
         Token("$")
       end
  tk.set_text(str)
end

#identify_here_document

[ GitHub ]

  
# File 'lib/yard/parser/ruby/legacy/ruby_lex.rb', line 1062

def identify_here_document
  ch = getc
  if ch == "-"
    ch = getc
    indent = true
  end
  if /['"`]/ =~ ch # '
    lt = ch
    quoted = ""
    while (c = getc) && c != lt
      quoted.concat c
    end
  else
    lt = '"'
    quoted = ch.dup
    while (c = getc) && c =~ /\w/
      quoted.concat c
    end
    ungetc
  end

  ltback, @ltype = @ltype, lt
  reserve = String.new

  while ch = getc
    reserve << ch
    if ch == "\\" #"
      ch = getc
      reserve << ch
    elsif ch == "\n"
      break
    end
  end

  str = String.new
  while (l = gets)
    l.chomp!
    l.strip! if indent
    break if l == quoted
    str << l.chomp << "\n"
  end

  @reader.divert_read_from(reserve)

  @ltype = ltback
  @lex_state = EXPR_END
  Token(Ltype2Token[lt], str).set_text(str.dump)
end

#identify_identifier

[ GitHub ]

  
# File 'lib/yard/parser/ruby/legacy/ruby_lex.rb', line 980

def identify_identifier
  token = ""
  token.concat getc if peek(0) =~ /[$@]/
  token.concat getc if peek(0) == "@"

  while (ch = getc) =~ /\w|_/
    print ":", ch, ":" if RubyLex.debug?
    token.concat ch
  end
  ungetc

  if ch == "!" || ch == "?"
    token.concat getc
  end
  # fix token

  # $stderr.puts "identifier - #{token}, state = #@lex_state"

  case token
  when /^\$/
    return Token(TkGVAR, token).set_text(token)
  when /^\@/
    @lex_state = EXPR_END
    return Token(TkIVAR, token).set_text(token)
  end

  if @lex_state != EXPR_DOT
    print token, "\n" if RubyLex.debug?

    token_c, *trans = TkReading2Token[token]
    if token_c
      # reserved word?

      if @lex_state != EXPR_BEG &&
         @lex_state != EXPR_FNAME &&
         trans[1]
        # modifiers
        token_c = TkSymbol2Token[trans[1]]
        @lex_state = trans[0]
      else
        if @lex_state != EXPR_FNAME
          if ENINDENT_CLAUSE.include?(token)
            @indent += 1

            if ACCEPTS_COLON.include?(token)
              @colonblock_seen = true
            else
              @colonblock_seen = false
            end
          elsif DEINDENT_CLAUSE.include?(token)
            @indent -= 1
            @colonblock_seen = false
          end
          @lex_state = trans[0]
        else
          @lex_state = EXPR_END
        end
      end
      return Token(token_c, token).set_text(token)
    end
  end

  if @lex_state == EXPR_FNAME
    @lex_state = EXPR_END
    if peek(0) == '='
      token.concat getc
    end
  elsif @lex_state == EXPR_BEG || @lex_state == EXPR_DOT
    @lex_state = EXPR_ARG
  else
    @lex_state = EXPR_END
  end

  if token[0, 1] =~ /[A-Z]/
    return Token(TkCONSTANT, token).set_text(token)
  elsif token[token.size - 1, 1] =~ /[!?]/
    return Token(TkFID, token).set_text(token)
  else
    return Token(TkIDENTIFIER, token).set_text(token)
  end
end

#identify_number(start)

[ GitHub ]

  
# File 'lib/yard/parser/ruby/legacy/ruby_lex.rb', line 1130

def identify_number(start)
  str = start.dup

  if start == "+" || start == "-" || start == ""
    start = getc
    str << start
  end

  @lex_state = EXPR_END

  if start == "0"
    if peek(0) == "x"
      ch = getc
      str << ch
      match = /[0-9a-f_]/
    else
      match = /[0-7_]/
    end
    while ch = getc
      if ch !~ match
        ungetc
        break
      else
        str << ch
      end
    end
    return Token(TkINTEGER).set_text(str)
  end

  type = TkINTEGER
  allow_point = true
  allow_e = true
  while ch = getc
    case ch
    when /[0-9_]/
      str << ch

    when allow_point && "."
      type = TkFLOAT
      if peek(0) !~ /[0-9]/
        ungetc
        break
      end
      str << ch
      allow_point = false

    when allow_e && "e", allow_e && "E"
      str << ch
      type = TkFLOAT
      if peek(0) =~ /[+-]/
        str << getc
      end
      allow_e = false
      allow_point = false
    else
      ungetc
      break
    end
  end
  Token(type).set_text(str)
end

#identify_quotation(initial_char)

[ GitHub ]

  
# File 'lib/yard/parser/ruby/legacy/ruby_lex.rb', line 1111

def identify_quotation(initial_char)
  ch = getc
  if lt = PERCENT_LTYPE[ch]
    initial_char += ch
    ch = getc
  elsif ch =~ /\W/
    lt = "\""
  else
    # RubyLex.fail SyntaxError, "unknown type of %string ('#{ch}')"
  end
  # if ch !~ /\W/
  #   ungetc
  #   next
  # end
  # @ltype = lt
  @quoted = ch unless @quoted = PERCENT_PAREN[ch]
  identify_string(lt, @quoted, ch, initial_char) if lt
end

#identify_string(ltype, quoted = ltype, opener = nil, initial_char = nil)

[ GitHub ]

  
# File 'lib/yard/parser/ruby/legacy/ruby_lex.rb', line 1192

def identify_string(ltype, quoted = ltype, opener = nil, initial_char = nil)
  @ltype = ltype
  @quoted = quoted
  subtype = nil

  str = String.new
  str << initial_char if initial_char
  str << (opener || quoted)

  nest = 0
  begin
    while ch = getc
      str << ch
      if @quoted == ch
        if nest == 0
          break
        else
          nest -= 1
        end
      elsif opener == ch
        nest += 1
      elsif @ltype != "'" && @ltype != "]" && ch == "#"
        ch = getc
        if ch == "{"
          subtype = true
          str << ch << skip_inner_expression
        else
          ungetc(ch)
        end
      elsif ch == '\\' #'
        str << read_escape
      end
    end
    if @ltype == "/"
      if peek(0) =~ /i|o|n|e|s/
        str << getc
      end
    end
    if subtype
      Token(DLtype2Token[ltype], str)
    else
      Token(Ltype2Token[ltype], str)
    end.set_text(str)
  ensure
    @ltype = nil
    @quoted = nil
    @lex_state = EXPR_END
  end
end

#lex

[ GitHub ]

  
# File 'lib/yard/parser/ruby/legacy/ruby_lex.rb', line 510

def lex
  catch(:eof) do
    until ((tk = token).is_a?(TkNL) || tk.is_a?(TkEND_OF_SCRIPT)) &&
          !@continue ||
          tk.nil?
    end
    line = get_read

    if line == "" && tk.is_a?(TkEND_OF_SCRIPT) || tk.nil?
      nil
    else
      line
    end
  end
end

#lex_init

[ GitHub ]

  
# File 'lib/yard/parser/ruby/legacy/ruby_lex.rb', line 586

def lex_init()
  @OP = SLex.new
  @OP.def_rules("\0", "\004", "\032") do |chars, _io|
    Token(TkEND_OF_SCRIPT).set_text(chars)
  end

  @OP.def_rules(" ", "\t", "\f", "\r", "\13") do |chars, _io|
    @space_seen = true
    while (ch = getc) =~ /[ \t\f\r\13]/
      chars << ch
    end
    ungetc
    Token(TkSPACE).set_text(chars)
  end

  @OP.def_rule("#") do |_op, _io|
    identify_comment
  end

  @OP.def_rule("=begin", proc { @prev_char_no == 0 && peek(0) =~ /\s/ }) do |op, _io|
    str = String.new(op)
    @ltype = "="

    begin
      line = String.new
      begin
        ch = getc
        line << ch
      end until ch == "\n"
      str << line
    end until line =~ /^=end/

    ungetc

    @ltype = nil

    if str =~ /\A=begin\s+rdoc/i
      str.sub!(/\A=begin.*\n/, '')
      str.sub!(/^=end.*/m, '')
      Token(TkCOMMENT).set_text(str)
    else
      Token(TkCOMMENT).set_text(str)
    end
  end

  @OP.def_rule("\n") do
    print "\\n\n" if RubyLex.debug?
    @colonblock_seen = false
    case @lex_state
    when EXPR_BEG, EXPR_FNAME, EXPR_DOT
      @continue = true
    else
      @continue = false
      @lex_state = EXPR_BEG
    end
    Token(TkNL).set_text("\n")
  end

  @OP.def_rules("*", "**",
    "!", "!=", "!~",
    "=", "==", "===",
    "=~", "<=>",
    "<", "<=",
    ">", ">=", ">>") do |op, _io|
    @lex_state = EXPR_BEG
    Token(op).set_text(op)
  end

  @OP.def_rules("<<") do |op, _io|
    tk = nil
    if @lex_state != EXPR_END && @lex_state != EXPR_CLASS &&
       (@lex_state != EXPR_ARG || @space_seen)
      c = peek(0)
      tk = identify_here_document if /[-\w\"\'\`]/ =~ c
    end
    if !tk
      @lex_state = EXPR_BEG
      tk = Token(op).set_text(op)
    end
    tk
  end

  @OP.def_rules("'", '"') do |op, _io|
    identify_string(op)
  end

  @OP.def_rules("`") do |op, _io|
    if @lex_state == EXPR_FNAME
      Token(op).set_text(op)
    else
      identify_string(op)
    end
  end

  @OP.def_rules('?') do |op, _io|
    if @lex_state == EXPR_END
      @lex_state = EXPR_BEG
      Token(TkQUESTION).set_text(op)
    else
      ch = getc
      if @lex_state == EXPR_ARG && ch !~ /\s/
        ungetc
        @lex_state = EXPR_BEG
        Token(TkQUESTION).set_text(op)
      else
        str = String.new(op)
        str << ch
        if ch == '\\' #'
          str << read_escape
        end
        @lex_state = EXPR_END
        Token(TkINTEGER).set_text(str)
      end
    end
  end

  @OP.def_rules("&", "&&", "|", "||") do |op, _io|
    @lex_state = EXPR_BEG
    Token(op).set_text(op)
  end

  @OP.def_rules("+=", "-=", "*=", "**=",
    "&=", "|=", "^=", "<<=", ">>=", "||=", "&&=") do |op, _io|
    @lex_state = EXPR_BEG
    op =~ /^(.*)=$/
    Token(TkOPASGN, $1).set_text(op)
  end

  @OP.def_rule("+@", proc { @lex_state == EXPR_FNAME }) do |op, _io|
    Token(TkUPLUS).set_text(op)
  end

  @OP.def_rule("-@", proc { @lex_state == EXPR_FNAME }) do |op, _io|
    Token(TkUMINUS).set_text(op)
  end

  @OP.def_rules("+", "-") do |op, _io|
    catch(:RET) do
      if @lex_state == EXPR_ARG
        if @space_seen && peek(0) =~ /[0-9]/
          throw :RET, identify_number(op)
        else
          @lex_state = EXPR_BEG
        end
      elsif @lex_state != EXPR_END && peek(0) =~ /[0-9]/
        throw :RET, identify_number(op)
      else
        @lex_state = EXPR_BEG
      end
      Token(op).set_text(op)
    end
  end

  @OP.def_rule(".") do
    @lex_state = EXPR_BEG
    if peek(0) =~ /[0-9]/
      ungetc
      identify_number("")
    else
      # for obj.if
      @lex_state = EXPR_DOT
      Token(TkDOT).set_text(".")
    end
  end

  @OP.def_rules("..", "...") do |op, _io|
    @lex_state = EXPR_BEG
    Token(op).set_text(op)
  end

  lex_int2
end

#lex_int2

[ GitHub ]

  
# File 'lib/yard/parser/ruby/legacy/ruby_lex.rb', line 759

def lex_int2
  @OP.def_rules("]", "}", ")") do |op, _io|
    @lex_state = EXPR_END
    @indent -= 1
    Token(op).set_text(op)
  end

  @OP.def_rule(":") do
    if (@colonblock_seen && @lex_state != EXPR_BEG) || peek(0) =~ /\s/
      @lex_state = EXPR_BEG
      tk = Token(TkCOLON)
    else
      @lex_state = EXPR_FNAME
      tk = Token(TkSYMBEG)
    end
    tk.set_text(":")
  end

  @OP.def_rule("::") do
    # p @lex_state.id2name, @space_seen
    if @lex_state == EXPR_BEG || @lex_state == EXPR_ARG && @space_seen
      @lex_state = EXPR_BEG
      tk = Token(TkCOLON3)
    else
      @lex_state = EXPR_DOT
      tk = Token(TkCOLON2)
    end
    tk.set_text("::")
  end

  @OP.def_rule("/") do |op, _io|
    if @lex_state == EXPR_BEG || @lex_state == EXPR_MID
      identify_string(op)
    elsif peek(0) == '='
      getc
      @lex_state = EXPR_BEG
      Token(TkOPASGN, :/).set_text("/=") #")
    elsif @lex_state == EXPR_ARG && @space_seen && peek(0) !~ /\s/
      identify_string(op)
    else
      @lex_state = EXPR_BEG
      Token("/").set_text(op)
    end
  end

  @OP.def_rules("^") do
    @lex_state = EXPR_BEG
    Token("^").set_text("^")
  end

  # @OP.def_rules("^=") do
  #   @lex_state = EXPR_BEG
  #   Token(TkOPASGN, :^)
  # end

  @OP.def_rules(",", ";") do |op, _io|
    @colonblock_seen = false
    @lex_state = EXPR_BEG
    Token(op).set_text(op)
  end

  @OP.def_rule("~") do
    @lex_state = EXPR_BEG
    Token("~").set_text("~")
  end

  @OP.def_rule("~@", proc { @lex_state = EXPR_FNAME }) do
    @lex_state = EXPR_BEG
    Token("~").set_text("~@")
  end

  @OP.def_rule("(") do
    @indent += 1
      # if @lex_state == EXPR_BEG || @lex_state == EXPR_MID
      #  @lex_state = EXPR_BEG
      #  tk = Token(TkfLPAREN)
      # else
      @lex_state = EXPR_BEG
      tk = Token(TkLPAREN)
    # end
    tk.set_text("(")
  end

  @OP.def_rule("[]", proc { @lex_state == EXPR_FNAME }) do
    Token("[]").set_text("[]")
  end

  @OP.def_rule("[]=", proc { @lex_state == EXPR_FNAME }) do
    Token("[]=").set_text("[]=")
  end

  @OP.def_rule("[") do
    @indent += 1
    # if @lex_state == EXPR_FNAME
    #   t = Token(TkfLBRACK)
    # else
    #   if @lex_state == EXPR_BEG || @lex_state == EXPR_MID
    #     t = Token(TkLBRACK)
    #   elsif @lex_state == EXPR_ARG && @space_seen
    #   else
    #     t = Token(TkfLBRACK)
    #   end
    # end
    t = Token(TkLBRACK)
    @lex_state = EXPR_BEG
    t.set_text("[")
  end

  @OP.def_rule("{") do
    @indent += 1
    # if @lex_state != EXPR_END && @lex_state != EXPR_ARG
    #   t = Token(TkLBRACE)
    # else
    #   t = Token(TkfLBRACE)
    # end
    t = Token(TkLBRACE)
    @lex_state = EXPR_BEG
    t.set_text("{")
  end

  @OP.def_rule('\\') do #'
    if getc == "\n"
      @space_seen = true
      @continue = true
      Token(TkSPACE).set_text("\\\n")
    else
      ungetc
      Token("\\").set_text("\\") #"
    end
  end

  @OP.def_rule('%') do |_op, _io|
    if @lex_state == EXPR_BEG || @lex_state == EXPR_MID
      identify_quotation('%')
    elsif peek(0) == '='
      getc
      Token(TkOPASGN, "%").set_text("%=")
    elsif @lex_state == EXPR_ARG && @space_seen && peek(0) !~ /\s/
      identify_quotation('%')
    else
      @lex_state = EXPR_BEG
      Token("%").set_text("%")
    end
  end

  @OP.def_rule('$') do #'
    identify_gvar
  end

  @OP.def_rule('@') do
    if peek(0) =~ /[@\w]/
      ungetc
      identify_identifier
    else
      Token("@").set_text("@")
    end
  end

  # @OP.def_rule("def", proc{|op, io| /\s/ =~ io.peek(0)}) do
  #   |op, io|
  #   @indent += 1
  #   @lex_state = EXPR_FNAME
  # # @lex_state = EXPR_END
  # # until @rests[0] == "\n" or @rests[0] == ";"
  # #   rests.shift
  # # end
  # end

  @OP.def_rule("__END__", proc { @prev_char_no == 0 && peek(0) =~ /[\r\n]/ }) do
    throw :eof
  end

  @OP.def_rule("") do |op, io|
    printf "MATCH: start %s: %s\n", op, io.inspect if RubyLex.debug?
    if peek(0) =~ /[0-9]/
      t = identify_number("")
    elsif peek(0) =~ /[\w]/
      t = identify_identifier
    end
    printf "MATCH: end %s: %s\n", op, io.inspect if RubyLex.debug?
    t
  end

  p @OP if RubyLex.debug?
end

#line_no

io functions

[ GitHub ]

  
# File 'lib/yard/parser/ruby/legacy/ruby_lex.rb', line 468

def line_no
  @reader.line_num
end

#peek(i = 0)

[ GitHub ]

  
# File 'lib/yard/parser/ruby/legacy/ruby_lex.rb', line 506

def peek(i = 0)
  @reader.peek(i)
end

#peek_equal?(str) ⇒ Boolean

[ GitHub ]

  
# File 'lib/yard/parser/ruby/legacy/ruby_lex.rb', line 502

def peek_equal?(str)
  @reader.peek_equal(str)
end

#read_escape

[ GitHub ]

  
# File 'lib/yard/parser/ruby/legacy/ruby_lex.rb', line 1280

def read_escape
  res = String.new
  case ch = getc
  when /[0-7]/
    ungetc ch
    3.times do
    case ch = getc
    when /[0-7]/
    when nil
      break
    else
      ungetc
      break
    end
    res << ch
    end

  when "x"
    res << ch
    2.times do
    case ch = getc
    when /[0-9a-fA-F]/
    when nil
      break
    else
      ungetc
      break
    end
      res << ch
    end

  when "M"
    res << ch
    if (ch = getc) != '-'
      ungetc
    else
      res << ch
      if (ch = getc) == "\\" #"
        res << ch
        res << read_escape
      else
        res << ch
      end
    end

  when "C", "c" #, "^"
    res << ch
    if ch == "C" && (ch = getc) != "-"
      ungetc
    else
      res << ch
      if (ch = getc) == "\\" #"
        res << ch
        res << read_escape
      else
        res << ch
      end
    end
  else
    res << ch
  end
  res
end

#skip_inner_expression

[ GitHub ]

  
# File 'lib/yard/parser/ruby/legacy/ruby_lex.rb', line 1242

def skip_inner_expression
  res = String.new
  nest = 0
  while (ch = getc)
    res << ch
    if ch == '}'
      break if nest == 0
      nest -= 1
    elsif ch == '{'
      nest += 1
    end
  end
  res
end

#token

[ GitHub ]

  
# File 'lib/yard/parser/ruby/legacy/ruby_lex.rb', line 526

def token
  set_token_position(line_no, char_no)
  catch(:eof) do
    begin
      begin
        tk = @OP.match(self)
        @space_seen = tk.is_a?(TkSPACE)
      rescue SyntaxError
        abort if @exception_on_syntax_error
        tk = TkError.new(line_no, char_no)
      end
    end while @skip_space && tk.is_a?(TkSPACE)
    if @read_auto_clean_up
      get_read
    end
    # throw :eof unless tk
    p tk if $DEBUG
    tk.lex_state = lex_state if tk
    tk
  end
end

#ungetc(c = nil)

[ GitHub ]

  
# File 'lib/yard/parser/ruby/legacy/ruby_lex.rb', line 498

def ungetc(c = nil)
  @reader.ungetc(c)
end