123456789_123456789_123456789_123456789_123456789_

Module: ActionText::MarkdownConversion

Relationships & Source Files
Defined in: actiontext/lib/action_text/markdown_conversion.rb

Overview

Converts an HTML fragment into a Markdown string. Used by Content#to_markdown and Fragment#to_markdown to produce Markdown representations of rich text.

Example: <h1>Release Notes</h1> => # Release Notes, a markdown heading.

Note that this converter escapes text nodes so it won’t render as markdown.

Example: <p># Release Notes</p> => \# Release Notes, not a heading.

Constant Summary

Instance Method Summary

Instance Method Details

#ancestor_named?(node, names, max_depth:) ⇒ Boolean (private)

[ GitHub ]

  
# File 'actiontext/lib/action_text/markdown_conversion.rb', line 345

def ancestor_named?(node, names, max_depth:)
  current = node.parent
  max_depth.times do
    break unless current&.element?
    return true if current.name.in?(names)
    current = current.parent
  end
  false
end

#child_values_for_elements(node, child_values) (private)

[ GitHub ]

  
# File 'actiontext/lib/action_text/markdown_conversion.rb', line 284

def child_values_for_elements(node, child_values)
  node.children.zip(child_values).filter_map do |child, value|
    value if child.element?
  end
end

#code_fence(content) (private)

[ GitHub ]

  
# File 'actiontext/lib/action_text/markdown_conversion.rb', line 312

def code_fence(content)
  max_run = content.scan(/`{3,}/).map(&:length).max || 0
  "`" * [3, max_run + 1].max
end

#encode_href(href) (private)

[ GitHub ]

  
# File 'actiontext/lib/action_text/markdown_conversion.rb', line 355

def encode_href(href)
  URI::RFC2396_PARSER.escape(href, ENCODE_HREF_CHARS)
end

#escape_markdown_text(text)

Backslash-escapes CommonMark metacharacters in text so they are treated as literal characters by Markdown renderers.

MarkdownConversion.escape_markdown_text("**Important**")
# => "\\*\\*Important\\*\\*"
[ GitHub ]

  
# File 'actiontext/lib/action_text/markdown_conversion.rb', line 46

def escape_markdown_text(text)
  text.gsub(MARKDOWN_METACHARACTERS) { |c| "\\#{c}" }
end

#format_list_item(lines, bullet) (private)

[ GitHub ]

  
# File 'actiontext/lib/action_text/markdown_conversion.rb', line 250

def format_list_item(lines, bullet)
  first, *rest = lines
  leader = first.match?(LIST_BULLET) ? LIST_INDENT : bullet
  ([ leader + first ] + rest.map { |line| LIST_INDENT + line }).join("\n")
end

#inline_code(content) (private)

[ GitHub ]

  
# File 'actiontext/lib/action_text/markdown_conversion.rb', line 317

def inline_code(content)
  max_run = content.scan(/`+/).map(&:length).max || 0
  fence = "`" * [1, max_run + 1].max
  if content.start_with?("`") || content.end_with?("`")
    "#{fence} #{content} #{fence}"
  else
    "#{fence}#{content}#{fence}"
  end
end

#inline_sibling?(sibling) ⇒ Boolean (private)

[ GitHub ]

  
# File 'actiontext/lib/action_text/markdown_conversion.rb', line 341

def inline_sibling?(sibling)
  sibling&.text? || sibling&.name&.in?(INLINE_ELEMENTS)
end

#join_children(child_values) (private)

[ GitHub ]

  
# File 'actiontext/lib/action_text/markdown_conversion.rb', line 256

def join_children(child_values)
  merged = []

  child_values.each do |value|
    # Merge adjacent bold/italic runs which Lexxy emits
    if value.is_a?(Array) && (value[0] == :bold || value[0] == :italic)
      if merged.last.is_a?(Array) && merged.last[0] == value[0]
        merged.last[1] = merged.last[1] + value[1]
      else
        merged << [ value[0], value[1] ]
      end
    else
      merged << value
    end
  end

  parts = merged.map { |v| stringify(v) }
  result = +""
  parts.each do |part|
    # Nested block elements (e.g., lists and blockquotes) need an initial newline injected
    if !result.empty? && !result.end_with?("\n") && part.end_with?("\n\n")
      result << "\n"
    end
    result << part
  end
  result
end

#list_item_lines(list_node, child_values, prefix:) (private)

[ GitHub ]

  
# File 'actiontext/lib/action_text/markdown_conversion.rb', line 238

def list_item_lines(list_node, child_values, prefix:)
  element_values = child_values_for_elements(list_node, child_values)
  element_values.each_with_index.filter_map do |value, index|
    text = stringify(value)
    lines = text.split("\n").reject(&:blank?)
    next if lines.empty?

    bullet = prefix.respond_to?(:call) ? prefix.call(index) : prefix
    format_list_item(lines, bullet)
  end.join("\n")
end

#markdown_for_node(node, child_values) (private)

[ GitHub ]

  
# File 'actiontext/lib/action_text/markdown_conversion.rb', line 76

def markdown_for_node(node, child_values)
  if node.text?
    if node.content.blank? && !significant_whitespace?(node)
      ""
    elsif skip_markdown_escaping?(node)
      node.content
    else
      escape_markdown_text(strip_pretty_print_indentation(node))
    end
  elsif node.element?
    method_name = :"visit_#{node.name.tr("-", "_")}"
    if respond_to?(method_name, true)
      send(method_name, node, child_values)
    else
      join_children(child_values).strip
    end
  else
    join_children(child_values)
  end
end

#node_to_markdown(node)

Converts a Nokogiri HTML node into a Markdown string.

node = Nokogiri::HTML4.fragment("<p>Hello <strong>world</strong></p>")
MarkdownConversion.node_to_markdown(node) # => "Hello **world**"
[ GitHub ]

  
# File 'actiontext/lib/action_text/markdown_conversion.rb', line 25

def node_to_markdown(node)
  BottomUpReducer.new(node).reduce do |n, child_values|
    markdown_for_node(n, child_values)
  end.strip
end

#significant_whitespace?(node) ⇒ Boolean (private)

[ GitHub ]

  
# File 'actiontext/lib/action_text/markdown_conversion.rb', line 336

def significant_whitespace?(node)
  inline_sibling?(node.previous_sibling) &&
    inline_sibling?(node.next_sibling)
end

#skip_markdown_escaping?(node) ⇒ Boolean (private)

[ GitHub ]

  
# File 'actiontext/lib/action_text/markdown_conversion.rb', line 359

def skip_markdown_escaping?(node)
  node.parent&.name.in?(SKIP_ESCAPING_PARENTS)
end

#stringify(value) (private)

[ GitHub ]

  
# File 'actiontext/lib/action_text/markdown_conversion.rb', line 290

def stringify(value)
  case value
  when Array
    case value[0]
    when :bold then wrap_emphasis(value[1], "**")
    when :italic then wrap_emphasis(value[1], "*")
    else value.join
    end
  else
    value.to_s
  end
end

#strip_pretty_print_indentation(node) (private)

[ GitHub ]

  
# File 'actiontext/lib/action_text/markdown_conversion.rb', line 327

def strip_pretty_print_indentation(node)
  content = node.content
  return content unless content.include?("\n")

  content
    .sub(LEADING_PRETTY_PRINT_WHITESPACE, inline_sibling?(node.previous_sibling) ? " " : "")
    .sub(TRAILING_PRETTY_PRINT_WHITESPACE, inline_sibling?(node.next_sibling) ? " " : "")
end

#visit__heading(_node, child_values, level) (private)

[ GitHub ]

  
# File 'actiontext/lib/action_text/markdown_conversion.rb', line 152

def visit__heading(_node, child_values, level)
  "#{"#" * level} #{join_children(child_values)}\n\n"
end

#visit__passthrough(_node, child_values) (private) Also known as: #visit_li, #visit_td, #visit_th, #visit_thead, #visit_tbody

These elements pass through their content (parent handlers use child_values directly)

[ GitHub ]

  
# File 'actiontext/lib/action_text/markdown_conversion.rb', line 222

def visit__passthrough(_node, child_values)
  join_children(child_values)
end

#visit__table_header_row(node, child_values) (private)

[ GitHub ]

  
# File 'actiontext/lib/action_text/markdown_conversion.rb', line 231

def visit__table_header_row(node, child_values)
  cells = child_values_for_elements(node, child_values).map { |v| stringify(v).strip }
  row = "| #{cells.join(" | ")} |\n"
  separator = "| #{Array.new(cells.size, "---").join(" | ")} |\n"
  "#{row}#{separator}"
end

#visit__unsupported(_node, _child_values) (private) Also known as: #visit_script, #visit_style

Avoid including content from elements that aren’t meaningful for markdown output

[ GitHub ]

  
# File 'actiontext/lib/action_text/markdown_conversion.rb', line 215

def visit__unsupported(_node, _child_values)
  ""
end

#visit_a(node, child_values) (private)

[ GitHub ]

  
# File 'actiontext/lib/action_text/markdown_conversion.rb', line 177

def visit_a(node, child_values)
  inner = join_children(child_values)
  if (href = node["href"]) && Rails::HTML::Sanitizer.allowed_uri?(href)
    "[#{inner}](#{encode_href(href)})"
  else
    inner
  end
end

#visit_action_text_markdown(_node, child_values) (private)

Attachment markdown is wrapped in <action-text-markdown> by Content#to_markdown so it passes through without text escaping.

[ GitHub ]

  
# File 'actiontext/lib/action_text/markdown_conversion.rb', line 210

def visit_action_text_markdown(_node, child_values)
  join_children(child_values)
end

#visit_b(node, child_values) (private)

Alias for #visit_strong.

[ GitHub ]

  
# File 'actiontext/lib/action_text/markdown_conversion.rb', line 107

alias_method :visit_b, :visit_strong

#visit_blockquote(_node, child_values) (private)

[ GitHub ]

  
# File 'actiontext/lib/action_text/markdown_conversion.rb', line 162

def visit_blockquote(_node, child_values)
  quoted = join_children(child_values).strip.lines.map { |line| "> #{line}" }.join
  "#{quoted}\n\n"
end

#visit_br(_node, _child_values) (private)

[ GitHub ]

  
# File 'actiontext/lib/action_text/markdown_conversion.rb', line 200

def visit_br(_node, _child_values)
  "\n"
end

#visit_code(node, child_values) (private)

[ GitHub ]

  
# File 'actiontext/lib/action_text/markdown_conversion.rb', line 126

def visit_code(node, child_values)
  inner = join_children(child_values)
  if node.parent&.name == "pre"
    inner
  else
    inline_code(inner)
  end
end

#visit_del(_node, child_values) (private)

Alias for #visit_s.

[ GitHub ]

  
# File 'actiontext/lib/action_text/markdown_conversion.rb', line 124

alias_method :visit_del, :visit_s

#visit_div(_node, child_values) (private)

Trix uses <div> as its default block element and represents newlines as
tags (see piece_view.js and block_view.js in the Trix source). Unlike <p>, we don’t append paragraph-separating newlines here because the
children already provide spacing.

[ GitHub ]

  
# File 'actiontext/lib/action_text/markdown_conversion.rb', line 148

def visit_div(_node, child_values)
  join_children(child_values)
end

#visit_em(node, child_values) (private) Also known as: #visit_i

[ GitHub ]

  
# File 'actiontext/lib/action_text/markdown_conversion.rb', line 109

def visit_em(node, child_values)
  inner = join_children(child_values)

  # lexxy redundantly wraps emphasized subtrees in `<i>`
  if ancestor_named?(node, ITALIC_TAGS, max_depth: 4)
    inner
  else
    [ :italic, inner ]
  end
end

#visit_h1(node, child_values) (private)

[ GitHub ]

  
# File 'actiontext/lib/action_text/markdown_conversion.rb', line 155

def visit_h1(node, child_values) = visit__heading(node, child_values, 1)

#visit_h2(node, child_values) (private)

[ GitHub ]

  
# File 'actiontext/lib/action_text/markdown_conversion.rb', line 156

def visit_h2(node, child_values) = visit__heading(node, child_values, 2)

#visit_h3(node, child_values) (private)

[ GitHub ]

  
# File 'actiontext/lib/action_text/markdown_conversion.rb', line 157

def visit_h3(node, child_values) = visit__heading(node, child_values, 3)

#visit_h4(node, child_values) (private)

[ GitHub ]

  
# File 'actiontext/lib/action_text/markdown_conversion.rb', line 158

def visit_h4(node, child_values) = visit__heading(node, child_values, 4)

#visit_h5(node, child_values) (private)

[ GitHub ]

  
# File 'actiontext/lib/action_text/markdown_conversion.rb', line 159

def visit_h5(node, child_values) = visit__heading(node, child_values, 5)

#visit_h6(node, child_values) (private)

[ GitHub ]

  
# File 'actiontext/lib/action_text/markdown_conversion.rb', line 160

def visit_h6(node, child_values) = visit__heading(node, child_values, 6)

#visit_hr(_node, _child_values) (private)

[ GitHub ]

  
# File 'actiontext/lib/action_text/markdown_conversion.rb', line 204

def visit_hr(_node, _child_values)
  "---\n\n"
end

#visit_i(node, child_values) (private)

Alias for #visit_em.

[ GitHub ]

  
# File 'actiontext/lib/action_text/markdown_conversion.rb', line 119

alias_method :visit_i, :visit_em

#visit_li(_node, child_values) (private)

Alias for #visit__passthrough.

[ GitHub ]

  
# File 'actiontext/lib/action_text/markdown_conversion.rb', line 225

alias_method :visit_li, :visit__passthrough

#visit_ol(node, child_values) (private)

[ GitHub ]

  
# File 'actiontext/lib/action_text/markdown_conversion.rb', line 172

def visit_ol(node, child_values)
  items = list_item_lines(node, child_values, prefix: ->(i) { "#{i + 1}. " })
  "#{items}\n\n"
end

#visit_p(_node, child_values) (private)

[ GitHub ]

  
# File 'actiontext/lib/action_text/markdown_conversion.rb', line 141

def visit_p(_node, child_values)
  "#{join_children(child_values)}\n\n"
end

#visit_pre(_node, child_values) (private)

[ GitHub ]

  
# File 'actiontext/lib/action_text/markdown_conversion.rb', line 135

def visit_pre(_node, child_values)
  inner = join_children(child_values).delete_prefix("\n").delete_suffix("\n")
  fence = code_fence(inner)
  "#{fence}\n#{inner}\n#{fence}\n\n"
end

#visit_s(_node, child_values) (private) Also known as: #visit_del

[ GitHub ]

  
# File 'actiontext/lib/action_text/markdown_conversion.rb', line 121

def visit_s(_node, child_values)
  "~~#{join_children(child_values)}~~"
end

#visit_script(_node, _child_values) (private)

Alias for #visit__unsupported.

[ GitHub ]

  
# File 'actiontext/lib/action_text/markdown_conversion.rb', line 218

alias_method :visit_script, :visit__unsupported

#visit_strong(node, child_values) (private) Also known as: #visit_b

[ GitHub ]

  
# File 'actiontext/lib/action_text/markdown_conversion.rb', line 97

def visit_strong(node, child_values)
  inner = join_children(child_values)

  # lexxy redundantly wraps bold subtrees in `<b>`
  if ancestor_named?(node, BOLD_TAGS, max_depth: 4)
    inner
  else
    [ :bold, inner ]
  end
end

#visit_style(_node, _child_values) (private)

Alias for #visit__unsupported.

[ GitHub ]

  
# File 'actiontext/lib/action_text/markdown_conversion.rb', line 219

alias_method :visit_style, :visit__unsupported

#visit_summary(_node, child_values) (private)

[ GitHub ]

  
# File 'actiontext/lib/action_text/markdown_conversion.rb', line 196

def visit_summary(_node, child_values)
  "**#{join_children(child_values)}**\n\n"
end

#visit_tbody(_node, child_values) (private)

Alias for #visit__passthrough.

[ GitHub ]

  
# File 'actiontext/lib/action_text/markdown_conversion.rb', line 229

alias_method :visit_tbody, :visit__passthrough

#visit_td(_node, child_values) (private)

Alias for #visit__passthrough.

[ GitHub ]

  
# File 'actiontext/lib/action_text/markdown_conversion.rb', line 226

alias_method :visit_td, :visit__passthrough

#visit_th(_node, child_values) (private)

Alias for #visit__passthrough.

[ GitHub ]

  
# File 'actiontext/lib/action_text/markdown_conversion.rb', line 227

alias_method :visit_th, :visit__passthrough

#visit_thead(_node, child_values) (private)

Alias for #visit__passthrough.

[ GitHub ]

  
# File 'actiontext/lib/action_text/markdown_conversion.rb', line 228

alias_method :visit_thead, :visit__passthrough

#visit_tr(node, child_values) (private)

[ GitHub ]

  
# File 'actiontext/lib/action_text/markdown_conversion.rb', line 186

def visit_tr(node, child_values)
  # lexxy does not emit `thead`, so we need to infer header rows from `tr` contents
  if node.element_children.all? { |cell| cell.name == "th" }
    visit__table_header_row(node, child_values)
  else
    cells = child_values_for_elements(node, child_values).map { |v| stringify(v).strip }
    "| #{cells.join(" | ")} |\n"
  end
end

#visit_ul(node, child_values) (private)

[ GitHub ]

  
# File 'actiontext/lib/action_text/markdown_conversion.rb', line 167

def visit_ul(node, child_values)
  items = list_item_lines(node, child_values, prefix: "- ")
  "#{items}\n\n"
end

#wrap_emphasis(text, marker) (private)

Make sure <strong> hello </strong> becomes hello and not ** hello ** (the latter is not valid markdown).

[ GitHub ]

  
# File 'actiontext/lib/action_text/markdown_conversion.rb', line 305

def wrap_emphasis(text, marker)
  leading = text[/\A\s*/]
  trailing = text[/\s*\z/]
  inner = text.strip
  "#{leading}#{marker}#{inner}#{marker}#{trailing}"
end