123456789_123456789_123456789_123456789_123456789_

Class: Nokogiri::CSS::XPathVisitor

Relationships & Source Files
Namespace Children
Modules:
Inherits: Object
Defined in: lib/nokogiri/css/xpath_visitor.rb

Overview

When translating ::Nokogiri::CSS selectors to XPath queries with xpath_for, the XPathVisitor class allows for changing some of the behaviors related to builtin xpath functions and quirks of ::Nokogiri::HTML5.

Constant Summary

Class Method Summary

Instance Attribute Summary

  • #builtins readonly

    The visitor configuration set via the builtins: keyword argument to .new.

  • #doctype readonly

    The visitor configuration set via the doctype: keyword argument to .new.

  • #namespaces readonly

    The visitor configuration set via the namespaces: keyword argument to .new.

  • #prefix readonly

    The visitor configuration set via the prefix: keyword argument to .new.

Instance Method Summary

Constructor Details

.new() → XPathVisitor) ⇒ XPathVisitor .new(builtins:, doctype:) → XPathVisitor) ⇒ XPathVisitor

Parameters
  • builtins: (BuiltinsConfig) Determine when to use Nokogiri’s built-in xpath functions for performance improvements.

  • doctype: (DoctypeConfig) Make document-type-specific accommodations for ::Nokogiri::CSS queries.

Returns

XPathVisitor

[ GitHub ]

  
# File 'lib/nokogiri/css/xpath_visitor.rb', line 69

def initialize(
  builtins: BuiltinsConfig::NEVER,
  doctype: DoctypeConfig::XML,
  prefix: Nokogiri::XML::XPath::GLOBAL_SEARCH_PREFIX,
  namespaces: nil
)
  unless BuiltinsConfig::VALUES.include?(builtins)
    raise(ArgumentError, "Invalid values #{builtins.inspect} for builtins: keyword parameter")
  end
  unless DoctypeConfig::VALUES.include?(doctype)
    raise(ArgumentError, "Invalid values #{doctype.inspect} for doctype: keyword parameter")
  end

  @builtins = builtins
  @doctype = doctype
  @prefix = prefix
  @namespaces = namespaces
end

Instance Attribute Details

#builtins (readonly)

The visitor configuration set via the builtins: keyword argument to .new.

[ GitHub ]

  
# File 'lib/nokogiri/css/xpath_visitor.rb', line 48

attr_reader :builtins

#doctype (readonly)

The visitor configuration set via the doctype: keyword argument to .new.

[ GitHub ]

  
# File 'lib/nokogiri/css/xpath_visitor.rb', line 51

attr_reader :doctype

#namespaces (readonly)

The visitor configuration set via the namespaces: keyword argument to .new.

[ GitHub ]

  
# File 'lib/nokogiri/css/xpath_visitor.rb', line 57

attr_reader :namespaces

#prefix (readonly)

The visitor configuration set via the prefix: keyword argument to .new.

[ GitHub ]

  
# File 'lib/nokogiri/css/xpath_visitor.rb', line 54

attr_reader :prefix

Instance Method Details

#accept(node)

This method is for internal use only.
[ GitHub ]

  
# File 'lib/nokogiri/css/xpath_visitor.rb', line 297

def accept(node)
  node.accept(self)
end

#config() → Hash)

Returns

a Hash representing the configuration of the XPathVisitor, suitable for use as part of the CSS cache key.

[ GitHub ]

  
# File 'lib/nokogiri/css/xpath_visitor.rb', line 93

def config
  { builtins: @builtins, doctype: @doctype, prefix: @prefix, namespaces: @namespaces }
end

#css_class(hay, needle) (private)

This method is for internal use only.
[ GitHub ]

  
# File 'lib/nokogiri/css/xpath_visitor.rb', line 364

def css_class(hay, needle)
  if @builtins == BuiltinsConfig::ALWAYS || (@builtins == BuiltinsConfig::OPTIMAL && Nokogiri.uses_libxml?)
    # use the builtin implementation
    "nokogiri-builtin:css-class(#{hay},'#{needle}')"
  else
    # use only ordinary xpath functions
    "contains(concat(' ',normalize-space(#{hay}),' '),' #{needle} ')"
  end
end

#html5_element_name_needs_namespace_handling(node) (private)

This method is for internal use only.
[ GitHub ]

  
# File 'lib/nokogiri/css/xpath_visitor.rb', line 309

def html5_element_name_needs_namespace_handling(node)
  # if there is already a namespace (i.e., it is a prefixed QName), use it as normal
  node.value.length == 1 &&
    # if this is the wildcard selector "*", use it as normal
    node.value.first != "*"
end

#is_of_type_pseudo_class?(node) ⇒ Boolean (private)

This method is for internal use only.
[ GitHub ]

  
# File 'lib/nokogiri/css/xpath_visitor.rb', line 354

def is_of_type_pseudo_class?(node) # rubocop:disable Naming/PredicateName
  if node.type == :PSEUDO_CLASS
    if node.value[0].is_a?(Nokogiri::CSS::Node) && (node.value[0].type == :FUNCTION)
      node.value[0].value[0]
    else
      node.value[0]
    end =~ /(nth|first|last|only)-of-type(\()?/
  end
end

#nth(node, options = {}) (private)

This method is for internal use only.
[ GitHub ]

  
# File 'lib/nokogiri/css/xpath_visitor.rb', line 316

def nth(node, options = {})
  unless node.value.size == 4
    raise(ArgumentError, "expected an+b node to contain 4 tokens, but is #{node.value.inspect}")
  end

  a, b = read_a_and_positive_b(node.value)
  position = if options[:child]
    options[:last] ? "(count(following-sibling::*)+1)" : "(count(preceding-sibling::*)+1)"
  else
    options[:last] ? "(last()-position()+1)" : "position()"
  end

  if b.zero?
    "(#{position} mod #{a})=0"
  else
    compare = a < 0 ? "<=" : ">="
    if a.abs == 1
      "#{position}#{compare}#{b}"
    else
      "(#{position}#{compare}#{b}) and (((#{position}-#{b}) mod #{a.abs})=0)"
    end
  end
end

#read_a_and_positive_b(values) (private)

This method is for internal use only.
[ GitHub ]

  
# File 'lib/nokogiri/css/xpath_visitor.rb', line 340

def read_a_and_positive_b(values)
  op = values[2].strip
  if op == "+"
    a = values[0].to_i
    b = values[3].to_i
  elsif op == "-"
    a = values[0].to_i
    b = a - (values[3].to_i % a)
  else
    raise ArgumentError, "expected an+b node to have either + or - as the operator, but is #{op.inspect}"
  end
  [a, b]
end

#validate_xpath_function_name(name) (private)

This method is for internal use only.
[ GitHub ]

  
# File 'lib/nokogiri/css/xpath_visitor.rb', line 303

def validate_xpath_function_name(name)
  if name.start_with?("-")
    raise Nokogiri::CSS::SyntaxError, "Invalid XPath function name '#{name}'"
  end
end

#visit_attrib_name(node)

This method is for internal use only.
[ GitHub ]

  
# File 'lib/nokogiri/css/xpath_visitor.rb', line 293

def visit_attrib_name(node)
  "@#{node.value.first}"
end

#visit_attribute_condition(node)

This method is for internal use only.
[ GitHub ]

  
# File 'lib/nokogiri/css/xpath_visitor.rb', line 175

def visit_attribute_condition(node)
  attribute = node.value.first.accept(self)
  return attribute if node.value.length == 1

  value = node.value.last
  value = "'#{value}'" unless /^['"]/.match?(value)

  # quoted values - see test_attribute_value_with_quotes in test/css/test_parser.rb
  if (value[0] == value[-1]) && %q{"'}.include?(value[0])
    str_value = value[1..-2]
    if str_value.include?(value[0])
      value = 'concat("' + str_value.split('"', -1).join(%q{",'"',"}) + '","")'
    end
  end

  case node.value[1]
  when :equal
    attribute + "=" + value.to_s
  when :not_equal
    attribute + "!=" + value.to_s
  when :substring_match
    "contains(#{attribute},#{value})"
  when :prefix_match
    "starts-with(#{attribute},#{value})"
  when :dash_match
    "#{attribute}=#{value} or starts-with(#{attribute},concat(#{value},'-'))"
  when :includes
    value = value[1..-2] # strip quotes
    css_class(attribute, value)
  when :suffix_match
    "substring(#{attribute},string-length(#{attribute})-string-length(#{value})+1,string-length(#{value}))=#{value}"
  else
    attribute + " #{node.value[1]} " + value.to_s
  end
end

#visit_class_condition(node)

This method is for internal use only.
[ GitHub ]

  
# File 'lib/nokogiri/css/xpath_visitor.rb', line 237

def visit_class_condition(node)
  css_class("@class", node.value.first)
end

#visit_combinator(node)

This method is for internal use only.
[ GitHub ]

  
# File 'lib/nokogiri/css/xpath_visitor.rb', line 241

def visit_combinator(node)
  if is_of_type_pseudo_class?(node.value.last)
    "#{node.value.first&.accept(self)}][#{node.value.last.accept(self)}"
  else
    "#{node.value.first&.accept(self)} and #{node.value.last.accept(self)}"
  end
end

#visit_conditional_selector(node)

This method is for internal use only.
[ GitHub ]

  
# File 'lib/nokogiri/css/xpath_visitor.rb', line 262

def visit_conditional_selector(node)
  node.value.first.accept(self) + "[" +
    node.value.last.accept(self) + "]"
end

#visit_element_name(node)

This method is for internal use only.
[ GitHub ]

  
# File 'lib/nokogiri/css/xpath_visitor.rb', line 267

def visit_element_name(node)
  if @doctype == DoctypeConfig::HTML5 && html5_element_name_needs_namespace_handling(node)
    # HTML5 has namespaces that should be ignored in CSS queries
    # https://github.com/sparklemotion/nokogiri/issues/2376
    if @builtins == BuiltinsConfig::ALWAYS || (@builtins == BuiltinsConfig::OPTIMAL && Nokogiri.uses_libxml?)
      if WILDCARD_NAMESPACES
        "*:#{node.value.first}"
      else
        "*[nokogiri-builtin:local-name-is('#{node.value.first}')]"
      end
    else
      "*[local-name()='#{node.value.first}']"
    end
  elsif node.value.length == 2 # has a namespace prefix
    if node.value.first.nil? # namespace prefix is empty
      node.value.last
    else
      node.value.join(":")
    end
  elsif @namespaces&.key?("xmlns") # apply the default namespace if it's declared
    "xmlns:#{node.value.first}"
  else
    node.value.first
  end
end

#visit_function(node)

This method is for internal use only.
[ GitHub ]

  
# File 'lib/nokogiri/css/xpath_visitor.rb', line 98

def visit_function(node)
  msg = :"visit_function_#{node.value.first.gsub(/[(]/, "")}"
  return send(msg, node) if respond_to?(msg)

  case node.value.first
  when /^text\(/
    "child::text()"
  when /^self\(/
    "self::#{node.value[1]}"
  when /^eq\(/
    "position()=#{node.value[1]}"
  when /^(nth|nth-of-type)\(/
    if node.value[1].is_a?(Nokogiri::CSS::Node) && (node.value[1].type == :NTH)
      nth(node.value[1])
    else
      "position()=#{node.value[1]}"
    end
  when /^nth-child\(/
    if node.value[1].is_a?(Nokogiri::CSS::Node) && (node.value[1].type == :NTH)
      nth(node.value[1], child: true)
    else
      "count(preceding-sibling::*)=#{node.value[1].to_i - 1}"
    end
  when /^nth-last-of-type\(/
    if node.value[1].is_a?(Nokogiri::CSS::Node) && (node.value[1].type == :NTH)
      nth(node.value[1], last: true)
    else
      index = node.value[1].to_i - 1
      index == 0 ? "position()=last()" : "position()=last()-#{index}"
    end
  when /^nth-last-child\(/
    if node.value[1].is_a?(Nokogiri::CSS::Node) && (node.value[1].type == :NTH)
      nth(node.value[1], last: true, child: true)
    else
      "count(following-sibling::*)=#{node.value[1].to_i - 1}"
    end
  when /^(first|first-of-type)\(/
    "position()=1"
  when /^(last|last-of-type)\(/
    "position()=last()"
  when /^contains\(/
    "contains(.,#{node.value[1]})"
  when /^gt\(/
    "position()>#{node.value[1]}"
  when /^only-child\(/
    "last()=1"
  when /^comment\(/
    "comment()"
  when /^has\(/
    is_direct = node.value[1].value[0].nil? # e.g. "has(> a)", "has(~ a)", "has(+ a)"
    ".#{"//" unless is_direct}#{node.value[1].accept(self)}"
  else
    validate_xpath_function_name(node.value.first)

    # xpath function call, let's marshal those arguments
    args = ["."]
    args += node.value[1..-1].map do |n|
      n.is_a?(Nokogiri::CSS::Node) ? n.accept(self) : n
    end
    "nokogiri:#{node.value.first}#{args.join(",")})"
  end
end

#visit_id(node)

This method is for internal use only.
[ GitHub ]

  
# File 'lib/nokogiri/css/xpath_visitor.rb', line 170

def visit_id(node)
  node.value.first =~ /^#(.*)$/
  "@id='#{Regexp.last_match(1)}'"
end

#visit_not(node)

This method is for internal use only.
[ GitHub ]

  
# File 'lib/nokogiri/css/xpath_visitor.rb', line 161

def visit_not(node)
  child = node.value.first
  if :ELEMENT_NAME == child.type
    "not(self::#{child.accept(self)})"
  else
    "not(#{child.accept(self)})"
  end
end

#visit_pseudo_class(node)

This method is for internal use only.
[ GitHub ]

  
# File 'lib/nokogiri/css/xpath_visitor.rb', line 211

def visit_pseudo_class(node)
  if node.value.first.is_a?(Nokogiri::CSS::Node) && (node.value.first.type == :FUNCTION)
    node.value.first.accept(self)
  else
    msg = :"visit_pseudo_class_#{node.value.first.gsub(/[(]/, "")}"
    return send(msg, node) if respond_to?(msg)

    case node.value.first
    when "first" then "position()=1"
    when "first-child" then "count(preceding-sibling::*)=0"
    when "last" then "position()=last()"
    when "last-child" then "count(following-sibling::*)=0"
    when "first-of-type" then "position()=1"
    when "last-of-type" then "position()=last()"
    when "only-child" then "count(preceding-sibling::*)=0 and count(following-sibling::*)=0"
    when "only-of-type" then "last()=1"
    when "empty" then "not(node())"
    when "parent" then "node()"
    when "root" then "not(parent::*)"
    else
      validate_xpath_function_name(node.value.first)
      "nokogiri:#{node.value.first}(.)"
    end
  end
end