123456789_123456789_123456789_123456789_123456789_

Module: RDoc::Parser::RubyColorizer

Relationships & Source Files
Namespace Children
Classes:
Defined in: lib/rdoc/parser/ruby_colorizer.rb

Overview

Ruby code syntax highlighter. Colorize result is an array of ColoredToken Actual color for each token kind is determined elsewhere (e.g., HTML generator)

Constant Summary

  • OP_TOKENS =

    Prism operator token types except assignment '='

    # File 'lib/rdoc/parser/ruby_colorizer.rb', line 14
    %i[
      AMPERSAND AMPERSAND_AMPERSAND
      BANG BANG_EQUAL BANG_TILDE CARET COLON COLON_COLON
      EQUAL_EQUAL EQUAL_GREATER EQUAL_TILDE
      GREATER GREATER_GREATER
      LESS LESS_EQUAL LESS_EQUAL_GREATER LESS_LESS
      MINUS MINUS_GREATER PERCENT PIPE PIPE_PIPE PLUS
      QUESTION_MARK SLASH STAR STAR_STAR TILDE
      UAMPERSAND UMINUS UPLUS USTAR USTAR_STAR
    ].to_set
  • TOKEN_TYPE_MAP =

    Prism token type to RubyColorizer::ColoredToken kind map

    # File 'lib/rdoc/parser/ruby_colorizer.rb', line 26
    {
      IDENTIFIER: :identifier,
      METHOD_NAME: :identifier,
      INSTANCE_VARIABLE: :ivar,
      CLASS_VARIABLE: :identifier,
      GLOBAL_VARIABLE: :identifier,
      BACK_REFERENCE: :identifier,
      NUMBERED_REFERENCE: :identifier,
      CONSTANT: :constant,
      LABEL: :value,
      INTEGER: :value,
      INTEGER_IMAGINARY: :value,
      INTEGER_RATIONAL: :value,
      INTEGER_RATIONAL_IMAGINARY: :value,
      FLOAT: :value,
      FLOAT_IMAGINARY: :value,
      FLOAT_RATIONAL: :value,
      FLOAT_RATIONAL_IMAGINARY: :value,
      COMMENT: :comment,
      EMBDOC_BEGIN: :comment,
      EMBDOC_LINE: :comment,
      EMBDOC_END: :comment
    }

Class Method Summary

Class Method Details

.colorize(code)

Colorize the entire code and returns colored token stream.

[ GitHub ]

  
# File 'lib/rdoc/parser/ruby_colorizer.rb', line 53

def colorize(code)
  result = Prism.parse_lex(code)
  program_node, unordered_tokens = result.value
  prism_tokens = unordered_tokens.map(&:first).sort_by! { |token| token.location.start_offset }
  partial_colorize(code, program_node, prism_tokens, 0, code.bytesize)
end

.normal_tokens(tokens) (private)

Convert normal Prism tokens to [kind, start_offset, end_offset]

[ GitHub ]

  
# File 'lib/rdoc/parser/ruby_colorizer.rb', line 118

def normal_tokens(tokens)
  tokens.map do |token,|
    kind =
      if token.type.start_with?('KEYWORD_')
        :keyword
      elsif OP_TOKENS.include?(token.type.to_sym)
        :operator
      else
        TOKEN_TYPE_MAP[token.type] || :plain
      end
    [kind, token.location.start_offset, token.location.end_offset]
  end
end

.partial_colorize(whole_code, node, prism_tokens, start_offset = nil, end_offset = nil)

Colorize partial node in whole_code and returns colored token stream.

[ GitHub ]

  
# File 'lib/rdoc/parser/ruby_colorizer.rb', line 61

def partial_colorize(whole_code, node, prism_tokens, start_offset = nil, end_offset = nil)
  start_offset ||= node.location.start_offset
  end_offset ||= node.location.end_offset
  visitor = NodeColorizeVisitor.new
  node.accept(visitor)
  prior_tokens = visitor.tokens.sort_by {|_, start_offset, _| start_offset }
  normal_tokens = normal_tokens(slice_by_location(prism_tokens, start_offset, end_offset))
  colored_tokens = unify_tokens(whole_code, prior_tokens, normal_tokens, start_offset, end_offset)
  colored_tokens.unshift(ColoredToken.new(:plain, ' ' * node.location.start_column)) if node.location.start_column > 0
  colored_tokens
end

.slice_by_location(items, start_offset, end_offset) (private)

[ GitHub ]

  
# File 'lib/rdoc/parser/ruby_colorizer.rb', line 75

def slice_by_location(items, start_offset, end_offset)
  start_index = items.bsearch_index { |item| item.location.end_offset > start_offset } || items.size
  end_index = items.bsearch_index { |item| item.location.start_offset >= end_offset } || items.size
  items[start_index...end_index]
end

.unify_tokens(whole_code, prior_tokens, normal_tokens, start_offset, end_offset) (private)

Unify prior tokens and normal tokens into a single token stream. Prior tokens have higher priority than normal tokens. Also adds missing text (spaces, newlines, etc.) as :plain tokens so that the entire range is covered.

[ GitHub ]

  
# File 'lib/rdoc/parser/ruby_colorizer.rb', line 85

def unify_tokens(whole_code, prior_tokens, normal_tokens, start_offset, end_offset)
  tokens = []
  offset = start_offset

  # Add missing text such as spaces and newlines as a separate :plain token
  flush = -> next_offset {
    return if offset == next_offset

    whole_code.byteslice(offset...next_offset).scan(/\n|\s|[^\s]/) do |text|
      tokens << ColoredToken.new(:plain, text)
    end
  }

  until prior_tokens.empty? && normal_tokens.empty?
    ptok = prior_tokens.first
    ntok = normal_tokens.first
    if ntok && (!ptok || ntok[2] <= ptok[1])
      token = normal_tokens.shift
    else
      token = prior_tokens.shift
    end
    kind, start_pos, end_pos = token
    next if start_pos < offset

    flush.call(start_pos)
    tokens << ColoredToken.new(kind, whole_code.byteslice(start_pos...end_pos))
    offset = end_pos
  end
  flush.call(end_offset)
  tokens
end