Module: ActionText::MarkdownConversion
| Relationships & Source Files | |
| Defined in: | actiontext/lib/action_text/markdown_conversion.rb |
Overview
Converts an HTML fragment into a Markdown string. Used by Content#to_markdown and Fragment#to_markdown to produce Markdown representations of rich text.
Example: <h1>Release Notes</h1> => # Release Notes, a markdown heading.
Note that this converter escapes text nodes so it won’t render as markdown.
Example: <p># Release Notes</p> => \# Release Notes, not a heading.
Constant Summary
-
BOLD_TAGS =
private
# File 'actiontext/lib/action_text/markdown_conversion.rb', line 51%w[b strong].freeze
-
ENCODE_HREF_CHARS =
private
# File 'actiontext/lib/action_text/markdown_conversion.rb', line 55/[() <>\n\r\t]/ -
INLINE_ELEMENTS =
private
# File 'actiontext/lib/action_text/markdown_conversion.rb', line 65%w[ action-text-markdown a abbr b bdi bdo cite code data del dfn em i kbd mark q rp rt ruby s samp small span strong sub sup time u var ].freeze
-
ITALIC_TAGS =
private
# File 'actiontext/lib/action_text/markdown_conversion.rb', line 52%w[i em].freeze
-
LEADING_PRETTY_PRINT_WHITESPACE =
private
# File 'actiontext/lib/action_text/markdown_conversion.rb', line 70/\A\s*\n\s*/ -
LIST_BULLET =
private
# File 'actiontext/lib/action_text/markdown_conversion.rb', line 53/\A(-|\d+\.) / -
LIST_INDENT =
private
# File 'actiontext/lib/action_text/markdown_conversion.rb', line 54" " -
MARKDOWN_METACHARACTERS =
private
# File 'actiontext/lib/action_text/markdown_conversion.rb', line 56/ [\\`*_{}\[\]|~<>] # metacharacters that should be escaped generally | \A\#(?=[\s\#]|\z) # leading hash before space or another hash: ATX heading | \A=(?=[=\s]|\z) # leading equals before space or another equals: setext heading | \A- # leading hyphen: list item, thematic break, or setext heading | \A\+(?=\s|\z) # leading plus before space: list item | \A\d+\K\.(?=\s|\z) # leading "1." with trailing space: ordered list item (only the dot is matched) /x -
SKIP_ESCAPING_PARENTS =
private
# File 'actiontext/lib/action_text/markdown_conversion.rb', line 64%w[ action-text-markdown code pre ].freeze
-
TRAILING_PRETTY_PRINT_WHITESPACE =
private
# File 'actiontext/lib/action_text/markdown_conversion.rb', line 71/\s*\n\s*\z/
Instance Method Summary
-
#escape_markdown_text(text)
Backslash-escapes CommonMark metacharacters in
textso they are treated as literal characters by Markdown renderers. -
#markdown_link(title, url)
Returns a Markdown link: [title](url).
-
#node_to_markdown(node)
Converts a Nokogiri HTML
nodeinto a Markdown string. - #ancestor_named?(node, names, max_depth:) ⇒ Boolean private
- #child_values_for_elements(node, child_values) private
- #code_fence(content) private
- #encode_href(href) private
- #format_list_item(lines, bullet) private
- #inline_code(content) private
- #inline_sibling?(sibling) ⇒ Boolean private
- #join_children(child_values) private
- #list_item_lines(list_node, child_values, prefix:) private
- #markdown_for_node(node, child_values) private
- #significant_whitespace?(node) ⇒ Boolean private
- #skip_markdown_escaping?(node) ⇒ Boolean private
- #stringify(value) private
- #strip_pretty_print_indentation(node) private
- #visit__heading(_node, child_values, level) private
-
#visit__passthrough(_node, child_values)
(also: #visit_li, #visit_td, #visit_th, #visit_thead, #visit_tbody)
private
These elements pass through their content (parent handlers use child_values directly).
- #visit__table_header_row(node, child_values) private
-
#visit__unsupported(_node, _child_values)
(also: #visit_script, #visit_style)
private
Avoid including content from elements that aren’t meaningful for markdown output.
- #visit_a(node, child_values) private
-
#visit_action_text_markdown(_node, child_values)
private
Attachmentmarkdown is wrapped in <action-text-markdown> by Content#to_markdown so it passes through without text escaping. -
#visit_b(node, child_values)
private
Alias for #visit_strong.
- #visit_blockquote(_node, child_values) private
- #visit_br(_node, _child_values) private
- #visit_code(node, child_values) private
-
#visit_del(_node, child_values)
private
Alias for #visit_s.
-
#visit_div(_node, child_values)
private
Trix uses <div> as its default block element and represents newlines as
tags (see piece_view.js and block_view.js in the Trix source). - #visit_em(node, child_values) (also: #visit_i) private
- #visit_h1(node, child_values) private
- #visit_h2(node, child_values) private
- #visit_h3(node, child_values) private
- #visit_h4(node, child_values) private
- #visit_h5(node, child_values) private
- #visit_h6(node, child_values) private
- #visit_hr(_node, _child_values) private
-
#visit_i(node, child_values)
private
Alias for #visit_em.
-
#visit_li(_node, child_values)
private
Alias for #visit__passthrough.
- #visit_ol(node, child_values) private
- #visit_p(_node, child_values) private
- #visit_pre(_node, child_values) private
- #visit_s(_node, child_values) (also: #visit_del) private
-
#visit_script(_node, _child_values)
private
Alias for #visit__unsupported.
- #visit_strong(node, child_values) (also: #visit_b) private
-
#visit_style(_node, _child_values)
private
Alias for #visit__unsupported.
- #visit_summary(_node, child_values) private
-
#visit_tbody(_node, child_values)
private
Alias for #visit__passthrough.
-
#visit_td(_node, child_values)
private
Alias for #visit__passthrough.
-
#visit_th(_node, child_values)
private
Alias for #visit__passthrough.
-
#visit_thead(_node, child_values)
private
Alias for #visit__passthrough.
- #visit_tr(node, child_values) private
- #visit_ul(node, child_values) private
-
#wrap_emphasis(text, marker)
private
Make sure
<strong> hello </strong>becomeshelloand not** hello **(the latter is not valid markdown).
Instance Method Details
#ancestor_named?(node, names, max_depth:) ⇒ Boolean (private)
# File 'actiontext/lib/action_text/markdown_conversion.rb', line 345
def ancestor_named?(node, names, max_depth:) current = node.parent max_depth.times do break unless current&.element? return true if current.name.in?(names) current = current.parent end false end
#child_values_for_elements(node, child_values) (private)
[ GitHub ]# File 'actiontext/lib/action_text/markdown_conversion.rb', line 284
def child_values_for_elements(node, child_values) node.children.zip(child_values).filter_map do |child, value| value if child.element? end end
#code_fence(content) (private)
[ GitHub ]# File 'actiontext/lib/action_text/markdown_conversion.rb', line 312
def code_fence(content) max_run = content.scan(/`{3,}/).map(&:length).max || 0 "`" * [3, max_run + 1].max end
#encode_href(href) (private)
[ GitHub ]# File 'actiontext/lib/action_text/markdown_conversion.rb', line 355
def encode_href(href) URI::RFC2396_PARSER.escape(href, ENCODE_HREF_CHARS) end
#escape_markdown_text(text)
Backslash-escapes CommonMark metacharacters in text so they are treated as literal characters by Markdown renderers.
MarkdownConversion.escape_markdown_text("**Important**")
# => "\\*\\*Important\\*\\*"
# File 'actiontext/lib/action_text/markdown_conversion.rb', line 46
def escape_markdown_text(text) text.gsub(MARKDOWN_METACHARACTERS) { |c| "\\#{c}" } end
#format_list_item(lines, bullet) (private)
[ GitHub ]# File 'actiontext/lib/action_text/markdown_conversion.rb', line 250
def format_list_item(lines, bullet) first, *rest = lines leader = first.match?(LIST_BULLET) ? LIST_INDENT : bullet ([ leader + first ] + rest.map { |line| LIST_INDENT + line }).join("\n") end
#inline_code(content) (private)
[ GitHub ]# File 'actiontext/lib/action_text/markdown_conversion.rb', line 317
def inline_code(content) max_run = content.scan(/`+/).map(&:length).max || 0 fence = "`" * [1, max_run + 1].max if content.start_with?("`") || content.end_with?("`") "#{fence} #{content} #{fence}" else "#{fence}#{content}#{fence}" end end
#inline_sibling?(sibling) ⇒ Boolean (private)
# File 'actiontext/lib/action_text/markdown_conversion.rb', line 341
def inline_sibling?(sibling) sibling&.text? || sibling&.name&.in?(INLINE_ELEMENTS) end
#join_children(child_values) (private)
[ GitHub ]# File 'actiontext/lib/action_text/markdown_conversion.rb', line 256
def join_children(child_values) merged = [] child_values.each do |value| # Merge adjacent bold/italic runs which Lexxy emits if value.is_a?(Array) && (value[0] == :bold || value[0] == :italic) if merged.last.is_a?(Array) && merged.last[0] == value[0] merged.last[1] = merged.last[1] + value[1] else merged << [ value[0], value[1] ] end else merged << value end end parts = merged.map { |v| stringify(v) } result = +"" parts.each do |part| # Nested block elements (e.g., lists and blockquotes) need an initial newline injected if !result.empty? && !result.end_with?("\n") && part.end_with?("\n\n") result << "\n" end result << part end result end
#list_item_lines(list_node, child_values, prefix:) (private)
[ GitHub ]# File 'actiontext/lib/action_text/markdown_conversion.rb', line 238
def list_item_lines(list_node, child_values, prefix:) element_values = child_values_for_elements(list_node, child_values) element_values.each_with_index.filter_map do |value, index| text = stringify(value) lines = text.split("\n").reject(&:blank?) next if lines.empty? bullet = prefix.respond_to?(:call) ? prefix.call(index) : prefix format_list_item(lines, bullet) end.join("\n") end
#markdown_for_node(node, child_values) (private)
[ GitHub ]# File 'actiontext/lib/action_text/markdown_conversion.rb', line 76
def markdown_for_node(node, child_values) if node.text? if node.content.blank? && !significant_whitespace?(node) "" elsif skip_markdown_escaping?(node) node.content else escape_markdown_text(strip_pretty_print_indentation(node)) end elsif node.element? method_name = :"visit_#{node.name.tr("-", "_")}" if respond_to?(method_name, true) send(method_name, node, child_values) else join_children(child_values).strip end else join_children(child_values) end end
#markdown_link(title, url)
Returns a Markdown link: [title](url). Escapes brackets and backslashes in title, and percent-encodes characters in url that would break the link syntax.
MarkdownConversion.markdown_link("photo", "https://example.com/photo_(large).png")
# => "{https://example.com/photo_%28large%29.png photo}"
# File 'actiontext/lib/action_text/markdown_conversion.rb', line 37
def markdown_link(title, url) "[#{escape_markdown_text(title)}](#{encode_href(url)})" end
#node_to_markdown(node)
Converts a Nokogiri HTML node into a Markdown string.
node = Nokogiri::HTML4.fragment("<p>Hello <strong>world</strong></p>")
MarkdownConversion.node_to_markdown(node) # => "Hello **world**"
# File 'actiontext/lib/action_text/markdown_conversion.rb', line 25
def node_to_markdown(node) BottomUpReducer.new(node).reduce do |n, child_values| markdown_for_node(n, child_values) end.strip end
#significant_whitespace?(node) ⇒ Boolean (private)
# File 'actiontext/lib/action_text/markdown_conversion.rb', line 336
def significant_whitespace?(node) inline_sibling?(node.previous_sibling) && inline_sibling?(node.next_sibling) end
#skip_markdown_escaping?(node) ⇒ Boolean (private)
# File 'actiontext/lib/action_text/markdown_conversion.rb', line 359
def skip_markdown_escaping?(node) node.parent&.name.in?(SKIP_ESCAPING_PARENTS) end
#stringify(value) (private)
[ GitHub ]# File 'actiontext/lib/action_text/markdown_conversion.rb', line 290
def stringify(value) case value when Array case value[0] when :bold then wrap_emphasis(value[1], "**") when :italic then wrap_emphasis(value[1], "*") else value.join end else value.to_s end end
#strip_pretty_print_indentation(node) (private)
[ GitHub ]# File 'actiontext/lib/action_text/markdown_conversion.rb', line 327
def strip_pretty_print_indentation(node) content = node.content return content unless content.include?("\n") content .sub(LEADING_PRETTY_PRINT_WHITESPACE, inline_sibling?(node.previous_sibling) ? " " : "") .sub(TRAILING_PRETTY_PRINT_WHITESPACE, inline_sibling?(node.next_sibling) ? " " : "") end
#visit__heading(_node, child_values, level) (private)
[ GitHub ]# File 'actiontext/lib/action_text/markdown_conversion.rb', line 152
def visit__heading(_node, child_values, level) "#{"#" * level} #{join_children(child_values)}\n\n" end
#visit__passthrough(_node, child_values) (private) Also known as: #visit_li, #visit_td, #visit_th, #visit_thead, #visit_tbody
These elements pass through their content (parent handlers use child_values directly)
# File 'actiontext/lib/action_text/markdown_conversion.rb', line 222
def visit__passthrough(_node, child_values) join_children(child_values) end
#visit__table_header_row(node, child_values) (private)
[ GitHub ]# File 'actiontext/lib/action_text/markdown_conversion.rb', line 231
def visit__table_header_row(node, child_values) cells = child_values_for_elements(node, child_values).map { |v| stringify(v).strip } row = "| #{cells.join(" | ")} |\n" separator = "| #{Array.new(cells.size, "---").join(" | ")} |\n" "#{row}#{separator}" end
#visit__unsupported(_node, _child_values) (private) Also known as: #visit_script, #visit_style
Avoid including content from elements that aren’t meaningful for markdown output
# File 'actiontext/lib/action_text/markdown_conversion.rb', line 215
def visit__unsupported(_node, _child_values) "" end
#visit_a(node, child_values) (private)
[ GitHub ]# File 'actiontext/lib/action_text/markdown_conversion.rb', line 177
def visit_a(node, child_values) inner = join_children(child_values) if (href = node["href"]) && Rails::HTML::Sanitizer.allowed_uri?(href) "[#{inner}](#{encode_href(href)})" else inner end end
#visit_action_text_markdown(_node, child_values) (private)
Attachment markdown is wrapped in <action-text-markdown> by Content#to_markdown so it passes through without text escaping.
# File 'actiontext/lib/action_text/markdown_conversion.rb', line 210
def visit_action_text_markdown(_node, child_values) join_children(child_values) end
#visit_b(node, child_values) (private)
Alias for #visit_strong.
# File 'actiontext/lib/action_text/markdown_conversion.rb', line 107
alias_method :visit_b, :visit_strong
#visit_blockquote(_node, child_values) (private)
[ GitHub ]# File 'actiontext/lib/action_text/markdown_conversion.rb', line 162
def visit_blockquote(_node, child_values) quoted = join_children(child_values).strip.lines.map { |line| "> #{line}" }.join "#{quoted}\n\n" end
#visit_br(_node, _child_values) (private)
[ GitHub ]# File 'actiontext/lib/action_text/markdown_conversion.rb', line 200
def visit_br(_node, _child_values) "\n" end
#visit_code(node, child_values) (private)
[ GitHub ]# File 'actiontext/lib/action_text/markdown_conversion.rb', line 126
def visit_code(node, child_values) inner = join_children(child_values) if node.parent&.name == "pre" inner else inline_code(inner) end end
#visit_del(_node, child_values) (private)
Alias for #visit_s.
# File 'actiontext/lib/action_text/markdown_conversion.rb', line 124
alias_method :visit_del, :visit_s
#visit_div(_node, child_values) (private)
Trix uses <div> as its default block element and represents newlines as
tags (see piece_view.js and block_view.js in the Trix source). Unlike <p>, we don’t append paragraph-separating newlines here because the
children already provide spacing.
# File 'actiontext/lib/action_text/markdown_conversion.rb', line 148
def visit_div(_node, child_values) join_children(child_values) end
#visit_em(node, child_values) (private) Also known as: #visit_i
[ GitHub ]# File 'actiontext/lib/action_text/markdown_conversion.rb', line 109
def visit_em(node, child_values) inner = join_children(child_values) # lexxy redundantly wraps emphasized subtrees in `<i>` if ancestor_named?(node, ITALIC_TAGS, max_depth: 4) inner else [ :italic, inner ] end end
#visit_h1(node, child_values) (private)
[ GitHub ]# File 'actiontext/lib/action_text/markdown_conversion.rb', line 155
def visit_h1(node, child_values) = visit__heading(node, child_values, 1)
#visit_h2(node, child_values) (private)
[ GitHub ]# File 'actiontext/lib/action_text/markdown_conversion.rb', line 156
def visit_h2(node, child_values) = visit__heading(node, child_values, 2)
#visit_h3(node, child_values) (private)
[ GitHub ]# File 'actiontext/lib/action_text/markdown_conversion.rb', line 157
def visit_h3(node, child_values) = visit__heading(node, child_values, 3)
#visit_h4(node, child_values) (private)
[ GitHub ]# File 'actiontext/lib/action_text/markdown_conversion.rb', line 158
def visit_h4(node, child_values) = visit__heading(node, child_values, 4)
#visit_h5(node, child_values) (private)
[ GitHub ]# File 'actiontext/lib/action_text/markdown_conversion.rb', line 159
def visit_h5(node, child_values) = visit__heading(node, child_values, 5)
#visit_h6(node, child_values) (private)
[ GitHub ]# File 'actiontext/lib/action_text/markdown_conversion.rb', line 160
def visit_h6(node, child_values) = visit__heading(node, child_values, 6)
#visit_hr(_node, _child_values) (private)
[ GitHub ]# File 'actiontext/lib/action_text/markdown_conversion.rb', line 204
def visit_hr(_node, _child_values) "---\n\n" end
#visit_i(node, child_values) (private)
Alias for #visit_em.
# File 'actiontext/lib/action_text/markdown_conversion.rb', line 119
alias_method :visit_i, :visit_em
#visit_li(_node, child_values) (private)
Alias for #visit__passthrough.
# File 'actiontext/lib/action_text/markdown_conversion.rb', line 225
alias_method :visit_li, :visit__passthrough
#visit_ol(node, child_values) (private)
[ GitHub ]# File 'actiontext/lib/action_text/markdown_conversion.rb', line 172
def visit_ol(node, child_values) items = list_item_lines(node, child_values, prefix: ->(i) { "#{i + 1}. " }) "#{items}\n\n" end
#visit_p(_node, child_values) (private)
[ GitHub ]# File 'actiontext/lib/action_text/markdown_conversion.rb', line 141
def visit_p(_node, child_values) "#{join_children(child_values)}\n\n" end
#visit_pre(_node, child_values) (private)
[ GitHub ]# File 'actiontext/lib/action_text/markdown_conversion.rb', line 135
def visit_pre(_node, child_values) inner = join_children(child_values).delete_prefix("\n").delete_suffix("\n") fence = code_fence(inner) "#{fence}\n#{inner}\n#{fence}\n\n" end
#visit_s(_node, child_values) (private) Also known as: #visit_del
[ GitHub ]# File 'actiontext/lib/action_text/markdown_conversion.rb', line 121
def visit_s(_node, child_values) "~~#{join_children(child_values)}~~" end
#visit_script(_node, _child_values) (private)
Alias for #visit__unsupported.
# File 'actiontext/lib/action_text/markdown_conversion.rb', line 218
alias_method :visit_script, :visit__unsupported
#visit_strong(node, child_values) (private) Also known as: #visit_b
[ GitHub ]# File 'actiontext/lib/action_text/markdown_conversion.rb', line 97
def visit_strong(node, child_values) inner = join_children(child_values) # lexxy redundantly wraps bold subtrees in `<b>` if ancestor_named?(node, BOLD_TAGS, max_depth: 4) inner else [ :bold, inner ] end end
#visit_style(_node, _child_values) (private)
Alias for #visit__unsupported.
# File 'actiontext/lib/action_text/markdown_conversion.rb', line 219
alias_method :visit_style, :visit__unsupported
#visit_summary(_node, child_values) (private)
[ GitHub ]# File 'actiontext/lib/action_text/markdown_conversion.rb', line 196
def visit_summary(_node, child_values) "**#{join_children(child_values)}**\n\n" end
#visit_tbody(_node, child_values) (private)
Alias for #visit__passthrough.
# File 'actiontext/lib/action_text/markdown_conversion.rb', line 229
alias_method :visit_tbody, :visit__passthrough
#visit_td(_node, child_values) (private)
Alias for #visit__passthrough.
# File 'actiontext/lib/action_text/markdown_conversion.rb', line 226
alias_method :visit_td, :visit__passthrough
#visit_th(_node, child_values) (private)
Alias for #visit__passthrough.
# File 'actiontext/lib/action_text/markdown_conversion.rb', line 227
alias_method :visit_th, :visit__passthrough
#visit_thead(_node, child_values) (private)
Alias for #visit__passthrough.
# File 'actiontext/lib/action_text/markdown_conversion.rb', line 228
alias_method :visit_thead, :visit__passthrough
#visit_tr(node, child_values) (private)
[ GitHub ]# File 'actiontext/lib/action_text/markdown_conversion.rb', line 186
def visit_tr(node, child_values) # lexxy does not emit `thead`, so we need to infer header rows from `tr` contents if node.element_children.all? { |cell| cell.name == "th" } visit__table_header_row(node, child_values) else cells = child_values_for_elements(node, child_values).map { |v| stringify(v).strip } "| #{cells.join(" | ")} |\n" end end
#visit_ul(node, child_values) (private)
[ GitHub ]# File 'actiontext/lib/action_text/markdown_conversion.rb', line 167
def visit_ul(node, child_values) items = list_item_lines(node, child_values, prefix: "- ") "#{items}\n\n" end
#wrap_emphasis(text, marker) (private)
Make sure <strong> hello </strong> becomes hello and not ** hello ** (the latter is not valid markdown).
# File 'actiontext/lib/action_text/markdown_conversion.rb', line 305
def wrap_emphasis(text, marker) leading = text[/\A\s*/] trailing = text[/\s*\z/] inner = text.strip "#{leading}#{marker}#{inner}#{marker}#{trailing}" end