Module: ActionText::MarkdownConversion
| Relationships & Source Files | |
| Defined in: | actiontext/lib/action_text/markdown_conversion.rb |
Overview
Converts an HTML fragment into a Markdown string. Used by Content#to_markdown and Fragment#to_markdown to produce Markdown representations of rich text.
Example: => Release Notes
# Release Notes, a markdown heading.
Note that this converter escapes text nodes so it won't render as markdown.
Example: # Release Notes => # Release Notes, not a heading.
Constant Summary
-
BOLD_TAGS =
private
# File 'actiontext/lib/action_text/markdown_conversion.rb', line 67%w[b strong].freeze
-
ENCODE_HREF_CHARS =
private
# File 'actiontext/lib/action_text/markdown_conversion.rb', line 71/[() <>\n\r\t]/ -
INLINE_ELEMENTS =
private
# File 'actiontext/lib/action_text/markdown_conversion.rb', line 81%w[ action-text-markdown a abbr b bdi bdo cite code data del dfn em i kbd mark q rp rt ruby s samp small span strong sub sup time u var ].freeze
-
ITALIC_TAGS =
private
# File 'actiontext/lib/action_text/markdown_conversion.rb', line 68%w[i em].freeze
-
LEADING_PRETTY_PRINT_WHITESPACE =
private
# File 'actiontext/lib/action_text/markdown_conversion.rb', line 86/\A\s*\n\s*/ -
LIST_BULLET =
private
# File 'actiontext/lib/action_text/markdown_conversion.rb', line 69/\A(-|\d+\.) / -
LIST_INDENT =
private
# File 'actiontext/lib/action_text/markdown_conversion.rb', line 70" " -
MARKDOWN_METACHARACTERS =
private
# File 'actiontext/lib/action_text/markdown_conversion.rb', line 72/ [\\`*_{}\[\]|~<>] # metacharacters that should be escaped generally | \A\#(?=[\s\#]|\z) # leading hash before space or another hash: ATX heading | \A=(?=[=\s]|\z) # leading equals before space or another equals: setext heading | \A- # leading hyphen: list item, thematic break, or setext heading | \A\+(?=\s|\z) # leading plus before space: list item | \A\d+\K\.(?=\s|\z) # leading "1." with trailing space: ordered list item (only the dot is matched) /x -
SKIP_ESCAPING_PARENTS =
private
# File 'actiontext/lib/action_text/markdown_conversion.rb', line 80%w[ action-text-markdown code pre ].freeze
-
TRAILING_PRETTY_PRINT_WHITESPACE =
private
# File 'actiontext/lib/action_text/markdown_conversion.rb', line 87/\s*\n\s*\z/
Instance Method Summary
-
#escape_markdown_text(text)
Backslash-escapes CommonMark metacharacters in
textso they are treated as literal characters by Markdown renderers. -
#markdown_link(title, url, image: false)
Returns a Markdown link:
[title](url). -
#node_to_markdown(node)
Converts a Nokogiri HTML
nodeinto a Markdown string. - #ancestor_named?(node, names, max_depth:) ⇒ Boolean private
- #child_values_for_elements(node, child_values) private
- #code_fence(content) private
- #encode_href(href) private
- #format_list_item(lines, bullet) private
- #inline_code(content) private
- #inline_sibling?(sibling) ⇒ Boolean private
- #join_children(child_values) private
- #list_item_lines(list_node, child_values, prefix:) private
- #markdown_for_node(node, child_values) private
- #significant_whitespace?(node) ⇒ Boolean private
- #skip_markdown_escaping?(node) ⇒ Boolean private
- #stringify(value) private
- #strip_pretty_print_indentation(node) private
- #visit__heading(_node, child_values, level) private
-
#visit__passthrough(_node, child_values)
(also: #visit_li, #visit_td, #visit_th, #visit_thead, #visit_tbody)
private
These elements pass through their content (parent handlers use child_values directly).
- #visit__table_header_row(node, child_values) private
-
#visit__unsupported(_node, _child_values)
(also: #visit_script, #visit_style)
private
Avoid including content from elements that aren't meaningful for markdown output.
- #visit_a(node, child_values) private
-
#visit_action_text_markdown(_node, child_values)
private
Attachmentmarkdown is wrapped inby Content#to_markdown so it passes through without text escaping. -
#visit_b(node, child_values)
private
Alias for #visit_strong.
- #visit_blockquote(_node, child_values) private
- #visit_br(_node, _child_values) private
- #visit_code(node, child_values) private
-
#visit_del(_node, child_values)
private
Alias for #visit_s.
-
#visit_div(_node, child_values)
private
Trix uses
as its default block element and represents newlines as
tags (see piece_view.js and block_view.js in the Trix source).- #visit_em(node, child_values) (also: #visit_i) private
- #visit_h1(node, child_values) private
- #visit_h2(node, child_values) private
- #visit_h3(node, child_values) private
- #visit_h4(node, child_values) private
- #visit_h5(node, child_values) private
- #visit_h6(node, child_values) private
- #visit_hr(_node, _child_values) private
- #visit_i(node, child_values) private
Alias for #visit_em.
- #visit_li(_node, child_values) private
Alias for #visit__passthrough.
- #visit_ol(node, child_values) private
- #visit_p(_node, child_values) private
- #visit_pre(_node, child_values) private
- #visit_s(_node, child_values) (also: #visit_del) private
- #visit_script(_node, _child_values) private
Alias for #visit__unsupported.
- #visit_strong(node, child_values) (also: #visit_b) private
- #visit_style(_node, _child_values) private
Alias for #visit__unsupported.
- #visit_summary(_node, child_values) private
- #visit_tbody(_node, child_values) private
Alias for #visit__passthrough.
- #visit_td(_node, child_values) private
Alias for #visit__passthrough.
- #visit_th(_node, child_values) private
Alias for #visit__passthrough.
- #visit_thead(_node, child_values) private
Alias for #visit__passthrough.
- #visit_tr(node, child_values) private
- #visit_ul(node, child_values) private
- #wrap_emphasis(text, marker) private
Make sure
hellobecomeshelloand not** hello **(the latter is not valid markdown).Instance Method Details
#ancestor_named?(node, names, max_depth:) ⇒
Boolean(private)[ GitHub ]# File 'actiontext/lib/action_text/markdown_conversion.rb', line 361def ancestor_named?(node, names, max_depth:) current = node.parent max_depth.times do break unless current&.element? return true if current.name.in?(names) current = current.parent end false end
#child_values_for_elements(node, child_values) (private)
[ GitHub ]# File 'actiontext/lib/action_text/markdown_conversion.rb', line 300def child_values_for_elements(node, child_values) node.children.zip(child_values).filter_map do |child, value| value if child.element? end end
#code_fence(content) (private)
[ GitHub ]# File 'actiontext/lib/action_text/markdown_conversion.rb', line 328def code_fence(content) max_run = content.scan(/`{3,}/).map(&:length).max || 0 "`" * [3, max_run + 1].max end
#encode_href(href) (private)
[ GitHub ]# File 'actiontext/lib/action_text/markdown_conversion.rb', line 371def encode_href(href) URI::RFC2396_PARSER.escape(href, ENCODE_HREF_CHARS) end
#escape_markdown_text(text)
[ GitHub ]Backslash-escapes CommonMark metacharacters in
textso they are treated as literal characters by Markdown renderers.MarkdownConversion.escape_markdown_text("**Important**") # => "\\*\\*Important\\*\\*"# File 'actiontext/lib/action_text/markdown_conversion.rb', line 62def escape_markdown_text(text) text.gsub(MARKDOWN_METACHARACTERS) { |c| "\\#{c}" } end
#format_list_item(lines, bullet) (private)
[ GitHub ]# File 'actiontext/lib/action_text/markdown_conversion.rb', line 266def format_list_item(lines, bullet) first, *rest = lines leader = first.match?(LIST_BULLET) ? LIST_INDENT : bullet ([ leader + first ] + rest.map { |line| LIST_INDENT + line }).join("\n") end
#inline_code(content) (private)
[ GitHub ]# File 'actiontext/lib/action_text/markdown_conversion.rb', line 333def inline_code(content) max_run = content.scan(/`+/).map(&:length).max || 0 fence = "`" * [1, max_run + 1].max if content.start_with?("`") || content.end_with?("`") "#{fence} #{content} #{fence}" else "#{fence}#{content}#{fence}" end end
#inline_sibling?(sibling) ⇒
Boolean(private)[ GitHub ]# File 'actiontext/lib/action_text/markdown_conversion.rb', line 357def inline_sibling?(sibling) sibling&.text? || sibling&.name&.in?(INLINE_ELEMENTS) end
#join_children(child_values) (private)
[ GitHub ]# File 'actiontext/lib/action_text/markdown_conversion.rb', line 272def join_children(child_values) merged = [] child_values.each do |value| # Merge adjacent bold/italic runs which Lexxy emits if value.is_a?(Array) && (value[0] == :bold || value[0] == :italic) if merged.last.is_a?(Array) && merged.last[0] == value[0] merged.last[1] = merged.last[1] + value[1] else merged << [ value[0], value[1] ] end else merged << value end end parts = merged.map { |v| stringify(v) } result = +"" parts.each do |part| # Nested block elements (e.g., lists and blockquotes) need an initial newline injected if !result.empty? && !result.end_with?("\n") && part.end_with?("\n\n") result << "\n" end result << part end result end
#list_item_lines(list_node, child_values, prefix:) (private)
[ GitHub ]# File 'actiontext/lib/action_text/markdown_conversion.rb', line 254def list_item_lines(list_node, child_values, prefix:) element_values = child_values_for_elements(list_node, child_values) element_values.each_with_index.filter_map do |value, index| text = stringify(value) lines = text.split("\n").reject(&:blank?) next if lines.empty? bullet = prefix.respond_to?(:call) ? prefix.call(index) : prefix format_list_item(lines, bullet) end.join("\n") end
#markdown_for_node(node, child_values) (private)
[ GitHub ]# File 'actiontext/lib/action_text/markdown_conversion.rb', line 92def markdown_for_node(node, child_values) if node.text? if node.content.blank? && !significant_whitespace?(node) "" elsif skip_markdown_escaping?(node) node.content else escape_markdown_text(strip_pretty_print_indentation(node)) end elsif node.element? method_name = :"visit_#{node.name.tr("-", "_")}" if respond_to?(method_name, true) send(method_name, node, child_values) else join_children(child_values).strip end else join_children(child_values) end end
#markdown_link(title, url, image: false)
[ GitHub ]Returns a Markdown link:
[title](url).Escapes metacharacters in
title, and percent-encodes characters inurlthat would break the link syntax.MarkdownConversion.markdown_link("photo", "https://example.com/photo_(large).png") # => "{https://example.com/photo_%28large%29.png photo}"Pass image: true to produce an image link (+
+).
MarkdownConversion.markdown_link("photo", "https://example.com/photo.png", image: true) # => "!{https://example.com/photo.png photo}"If the URI scheme is not allowed (per
Rails::HTML::Sanitizer.allowed_uri?), returns the escaped title wrapped in escaped brackets (+[title]+).MarkdownConversion.markdown_link("click", "javascript:alert(1)") # => "\\[click\\]"# File 'actiontext/lib/action_text/markdown_conversion.rb', line 49def markdown_link(title, url, image: false) if Rails::HTML::Sanitizer.allowed_uri?(url) "#{"!" if image}[#{escape_markdown_text(title)}](#{encode_href(url)})" else "\\[#{escape_markdown_text(title)}\\]" end end
#node_to_markdown(node)
[ GitHub ]Converts a Nokogiri HTML
nodeinto a Markdown string.node = Nokogiri::HTML4.fragment("<p>Hello <strong>world</strong></p>") MarkdownConversion.node_to_markdown(node) # => "Hello **world**"# File 'actiontext/lib/action_text/markdown_conversion.rb', line 25def node_to_markdown(node) BottomUpReducer.new(node).reduce do |n, child_values| markdown_for_node(n, child_values) end.strip end
#significant_whitespace?(node) ⇒
Boolean(private)[ GitHub ]# File 'actiontext/lib/action_text/markdown_conversion.rb', line 352def significant_whitespace?(node) inline_sibling?(node.previous_sibling) && inline_sibling?(node.next_sibling) end
#skip_markdown_escaping?(node) ⇒
Boolean(private)[ GitHub ]# File 'actiontext/lib/action_text/markdown_conversion.rb', line 375def skip_markdown_escaping?(node) node.parent&.name.in?(SKIP_ESCAPING_PARENTS) end
#stringify(value) (private)
[ GitHub ]# File 'actiontext/lib/action_text/markdown_conversion.rb', line 306def stringify(value) case value when Array case value[0] when :bold then wrap_emphasis(value[1], "**") when :italic then wrap_emphasis(value[1], "*") else value.join end else value.to_s end end
#strip_pretty_print_indentation(node) (private)
[ GitHub ]# File 'actiontext/lib/action_text/markdown_conversion.rb', line 343def strip_pretty_print_indentation(node) content = node.content return content unless content.include?("\n") content .sub(LEADING_PRETTY_PRINT_WHITESPACE, inline_sibling?(node.previous_sibling) ? " " : "") .sub(TRAILING_PRETTY_PRINT_WHITESPACE, inline_sibling?(node.next_sibling) ? " " : "") end
#visit__heading(_node, child_values, level) (private)
[ GitHub ]# File 'actiontext/lib/action_text/markdown_conversion.rb', line 168def visit__heading(_node, child_values, level) "#{"#" * level} #{join_children(child_values)}\n\n" end
#visit__passthrough(_node, child_values) (private) Also known as: #visit_li, #visit_td, #visit_th, #visit_thead, #visit_tbody
[ GitHub ]These elements pass through their content (parent handlers use child_values directly)
# File 'actiontext/lib/action_text/markdown_conversion.rb', line 238def visit__passthrough(_node, child_values) join_children(child_values) end
#visit__table_header_row(node, child_values) (private)
[ GitHub ]# File 'actiontext/lib/action_text/markdown_conversion.rb', line 247def visit__table_header_row(node, child_values) cells = child_values_for_elements(node, child_values).map { |v| stringify(v).strip } row = "| #{cells.join(" | ")} |\n" separator = "| #{Array.new(cells.size, "---").join(" | ")} |\n" "#{row}#{separator}" end
#visit__unsupported(_node, _child_values) (private) Also known as: #visit_script, #visit_style
[ GitHub ]Avoid including content from elements that aren't meaningful for markdown output
# File 'actiontext/lib/action_text/markdown_conversion.rb', line 231def visit__unsupported(_node, _child_values) "" end
#visit_a(node, child_values) (private)
[ GitHub ]# File 'actiontext/lib/action_text/markdown_conversion.rb', line 193def visit_a(node, child_values) inner = join_children(child_values) if (href = node["href"]) && Rails::HTML::Sanitizer.allowed_uri?(href) "[#{inner}](#{encode_href(href)})" else inner end end
#visit_action_text_markdown(_node, child_values) (private)
[ GitHub ]Attachmentmarkdown is wrapped inby Content#to_markdown so it passes through without text escaping. # File 'actiontext/lib/action_text/markdown_conversion.rb', line 226def visit_action_text_markdown(_node, child_values) join_children(child_values) end
#visit_b(node, child_values) (private)
[ GitHub ]Alias for #visit_strong.
# File 'actiontext/lib/action_text/markdown_conversion.rb', line 123alias_method :visit_b, :visit_strong
#visit_blockquote(_node, child_values) (private)
[ GitHub ]# File 'actiontext/lib/action_text/markdown_conversion.rb', line 178def visit_blockquote(_node, child_values) quoted = join_children(child_values).strip.lines.map { |line| "> #{line}" }.join "#{quoted}\n\n" end
#visit_br(_node, _child_values) (private)
[ GitHub ]# File 'actiontext/lib/action_text/markdown_conversion.rb', line 216def visit_br(_node, _child_values) "\n" end
#visit_code(node, child_values) (private)
[ GitHub ]# File 'actiontext/lib/action_text/markdown_conversion.rb', line 142def visit_code(node, child_values) inner = join_children(child_values) if node.parent&.name == "pre" inner else inline_code(inner) end end
#visit_del(_node, child_values) (private)
[ GitHub ]Alias for #visit_s.
# File 'actiontext/lib/action_text/markdown_conversion.rb', line 140alias_method :visit_del, :visit_s
#visit_div(_node, child_values) (private)
[ GitHub ]Trix uses
as its default block element and represents newlines as
tags (see piece_view.js and block_view.js in the Trix source). Unlike, we don't append paragraph-separating newlines here because the
children already provide spacing.# File 'actiontext/lib/action_text/markdown_conversion.rb', line 164def visit_div(_node, child_values) join_children(child_values) end
#visit_em(node, child_values) (private) Also known as: #visit_i
[ GitHub ]# File 'actiontext/lib/action_text/markdown_conversion.rb', line 125def visit_em(node, child_values) inner = join_children(child_values) # lexxy redundantly wraps emphasized subtrees in `<i>` if ancestor_named?(node, ITALIC_TAGS, max_depth: 4) inner else [ :italic, inner ] end end
#visit_h1(node, child_values) (private)
[ GitHub ]# File 'actiontext/lib/action_text/markdown_conversion.rb', line 171def visit_h1(node, child_values) = visit__heading(node, child_values, 1)
#visit_h2(node, child_values) (private)
[ GitHub ]# File 'actiontext/lib/action_text/markdown_conversion.rb', line 172def visit_h2(node, child_values) = visit__heading(node, child_values, 2)
#visit_h3(node, child_values) (private)
[ GitHub ]# File 'actiontext/lib/action_text/markdown_conversion.rb', line 173def visit_h3(node, child_values) = visit__heading(node, child_values, 3)
#visit_h4(node, child_values) (private)
[ GitHub ]# File 'actiontext/lib/action_text/markdown_conversion.rb', line 174def visit_h4(node, child_values) = visit__heading(node, child_values, 4)
#visit_h5(node, child_values) (private)
[ GitHub ]# File 'actiontext/lib/action_text/markdown_conversion.rb', line 175def visit_h5(node, child_values) = visit__heading(node, child_values, 5)
#visit_h6(node, child_values) (private)
[ GitHub ]# File 'actiontext/lib/action_text/markdown_conversion.rb', line 176def visit_h6(node, child_values) = visit__heading(node, child_values, 6)
#visit_hr(_node, _child_values) (private)
[ GitHub ]# File 'actiontext/lib/action_text/markdown_conversion.rb', line 220def visit_hr(_node, _child_values) "---\n\n" end
#visit_i(node, child_values) (private)
[ GitHub ]Alias for #visit_em.
# File 'actiontext/lib/action_text/markdown_conversion.rb', line 135alias_method :visit_i, :visit_em
#visit_li(_node, child_values) (private)
[ GitHub ]Alias for #visit__passthrough.
# File 'actiontext/lib/action_text/markdown_conversion.rb', line 241alias_method :visit_li, :visit__passthrough
#visit_ol(node, child_values) (private)
[ GitHub ]# File 'actiontext/lib/action_text/markdown_conversion.rb', line 188def visit_ol(node, child_values) items = list_item_lines(node, child_values, prefix: ->(i) { "#{i + 1}. " }) "#{items}\n\n" end
#visit_p(_node, child_values) (private)
[ GitHub ]# File 'actiontext/lib/action_text/markdown_conversion.rb', line 157def visit_p(_node, child_values) "#{join_children(child_values)}\n\n" end
#visit_pre(_node, child_values) (private)
[ GitHub ]# File 'actiontext/lib/action_text/markdown_conversion.rb', line 151def visit_pre(_node, child_values) inner = join_children(child_values).delete_prefix("\n").delete_suffix("\n") fence = code_fence(inner) "#{fence}\n#{inner}\n#{fence}\n\n" end
#visit_s(_node, child_values) (private) Also known as: #visit_del
[ GitHub ]# File 'actiontext/lib/action_text/markdown_conversion.rb', line 137def visit_s(_node, child_values) "~~#{join_children(child_values)}~~" end
#visit_script(_node, _child_values) (private)
[ GitHub ]Alias for #visit__unsupported.
# File 'actiontext/lib/action_text/markdown_conversion.rb', line 234alias_method :visit_script, :visit__unsupported
#visit_strong(node, child_values) (private) Also known as: #visit_b
[ GitHub ]# File 'actiontext/lib/action_text/markdown_conversion.rb', line 113def visit_strong(node, child_values) inner = join_children(child_values) # lexxy redundantly wraps bold subtrees in `<b>` if ancestor_named?(node, BOLD_TAGS, max_depth: 4) inner else [ :bold, inner ] end end
#visit_style(_node, _child_values) (private)
[ GitHub ]Alias for #visit__unsupported.
# File 'actiontext/lib/action_text/markdown_conversion.rb', line 235alias_method :visit_style, :visit__unsupported
#visit_summary(_node, child_values) (private)
[ GitHub ]# File 'actiontext/lib/action_text/markdown_conversion.rb', line 212def visit_summary(_node, child_values) "**#{join_children(child_values)}**\n\n" end
#visit_tbody(_node, child_values) (private)
[ GitHub ]Alias for #visit__passthrough.
# File 'actiontext/lib/action_text/markdown_conversion.rb', line 245alias_method :visit_tbody, :visit__passthrough
#visit_td(_node, child_values) (private)
[ GitHub ]Alias for #visit__passthrough.
# File 'actiontext/lib/action_text/markdown_conversion.rb', line 242alias_method :visit_td, :visit__passthrough
#visit_th(_node, child_values) (private)
[ GitHub ]Alias for #visit__passthrough.
# File 'actiontext/lib/action_text/markdown_conversion.rb', line 243alias_method :visit_th, :visit__passthrough
#visit_thead(_node, child_values) (private)
[ GitHub ]Alias for #visit__passthrough.
# File 'actiontext/lib/action_text/markdown_conversion.rb', line 244alias_method :visit_thead, :visit__passthrough
#visit_tr(node, child_values) (private)
[ GitHub ]# File 'actiontext/lib/action_text/markdown_conversion.rb', line 202def visit_tr(node, child_values) # lexxy does not emit `thead`, so we need to infer header rows from `tr` contents if node.element_children.all? { |cell| cell.name == "th" } visit__table_header_row(node, child_values) else cells = child_values_for_elements(node, child_values).map { |v| stringify(v).strip } "| #{cells.join(" | ")} |\n" end end
#visit_ul(node, child_values) (private)
[ GitHub ]# File 'actiontext/lib/action_text/markdown_conversion.rb', line 183def visit_ul(node, child_values) items = list_item_lines(node, child_values, prefix: "- ") "#{items}\n\n" end
#wrap_emphasis(text, marker) (private)
[ GitHub ]Make sure
hellobecomeshelloand not** hello **(the latter is not valid markdown).# File 'actiontext/lib/action_text/markdown_conversion.rb', line 321def wrap_emphasis(text, marker) leading = text[/\A\s*/] trailing = text[/\s*\z/] inner = text.strip "#{leading}#{marker}#{inner}#{marker}#{trailing}" end