Class: Reline::Unicode
Relationships & Source Files | |
Namespace Children | |
Classes:
| |
Inherits: | Object |
Defined in: | lib/reline/unicode.rb |
Constant Summary
-
CSI_REGEXP =
# File 'lib/reline/unicode.rb', line 40/\e\[[\d;]*[ABCDEFGHJKSTfminsuhl]/
-
EscapedPairs =
# File 'lib/reline/unicode.rb', line 2{ 0x00 => '^@', 0x01 => '^A', # C-a 0x02 => '^B', 0x03 => '^C', 0x04 => '^D', 0x05 => '^E', 0x06 => '^F', 0x07 => '^G', 0x08 => '^H', # Backspace 0x09 => '^I', 0x0A => '^J', 0x0B => '^K', 0x0C => '^L', 0x0D => '^M', # Enter 0x0E => '^N', 0x0F => '^O', 0x10 => '^P', 0x11 => '^Q', 0x12 => '^R', 0x13 => '^S', 0x14 => '^T', 0x15 => '^U', 0x16 => '^V', 0x17 => '^W', 0x18 => '^X', 0x19 => '^Y', 0x1A => '^Z', # C-z 0x1B => '^[', # C-[ C-3 0x1C => '^\\', # C-\ 0x1D => '^]', # C-] 0x1E => '^^', # C-~ C-6 0x1F => '^_', # C-_ C-7 0x7F => '^?', # C-? C-8 }
-
NON_PRINTING_END =
# File 'lib/reline/unicode.rb', line 39"\2"
-
NON_PRINTING_START =
# File 'lib/reline/unicode.rb', line 38"\1"
-
OSC_REGEXP =
# File 'lib/reline/unicode.rb', line 41/\e\]\d(?:;[^;\a\e])*(?:\a|\e\\)/
-
WIDTH_SCANNER =
# File 'lib/reline/unicode.rb', line 42/\G(?:(#{NON_PRINTING_START})|(#{NON_PRINTING_END})|(#{CSI_REGEXP})|(#{OSC_REGEXP})|(\X))/o
Class Method Summary
- .calculate_width(str, allow_escape_code = false)
- .common_prefix(list, ignore_case: false)
- .ed_transpose_words(line, byte_pointer)
- .em_backward_word(line, byte_pointer)
- .em_big_backward_word(line, byte_pointer)
- .em_forward_word(line, byte_pointer)
- .em_forward_word_with_capitalization(line, byte_pointer)
- .escape_for_print(str)
- .get_mbchar_width(mbchar)
- .get_next_mbchar_size(line, byte_pointer)
- .get_prev_mbchar_size(line, byte_pointer)
- .safe_encode(str, encoding)
- .space_character?(s) ⇒ Boolean
-
.split_by_width(str, max_width)
This method is used by IRB.
- .split_line_by_width(str, max_width, encoding = str.encoding, offset: 0)
- .strip_non_printing_start_end(prompt)
- .take_mbchar_range(str, start_col, width, cover_begin: false, cover_end: false, padding: false)
-
.take_range(str, start_col, max_width)
Take a chunk of a String cut by width with escape sequences.
- .vi_backward_word(line, byte_pointer)
- .vi_big_backward_word(line, byte_pointer)
- .vi_big_forward_end_word(line, byte_pointer)
- .vi_big_forward_word(line, byte_pointer)
- .vi_first_print(line)
- .vi_forward_end_word(line, byte_pointer)
- .vi_forward_word(line, byte_pointer, drop_terminate_spaces = false)
- .word_character?(s) ⇒ Boolean
Class Method Details
.calculate_width(str, allow_escape_code = false)
[ GitHub ]# File 'lib/reline/unicode.rb', line 98
def self.calculate_width(str, allow_escape_code = false) if allow_escape_code width = 0 rest = str.encode(Encoding::UTF_8) in_zero_width = false rest.scan(WIDTH_SCANNER) do |non_printing_start, non_printing_end, csi, osc, gc| case when non_printing_start in_zero_width = true when non_printing_end in_zero_width = false when csi, osc when gc unless in_zero_width width += get_mbchar_width(gc) end end end width else str.encode(Encoding::UTF_8).grapheme_clusters.inject(0) { |w, gc| w + get_mbchar_width(gc) } end end
.common_prefix(list, ignore_case: false)
[ GitHub ]# File 'lib/reline/unicode.rb', line 387
def self.common_prefix(list, ignore_case: false) return '' if list.empty? common_prefix_gcs = list.first.grapheme_clusters list.each do |item| gcs = item.grapheme_clusters common_prefix_gcs = common_prefix_gcs.take_while.with_index do |gc, i| ignore_case ? gc.casecmp?(gcs[i]) : gc == gcs[i] end end common_prefix_gcs.join end
.ed_transpose_words(line, byte_pointer)
[ GitHub ]# File 'lib/reline/unicode.rb', line 292
def self.ed_transpose_words(line, byte_pointer) gcs = line.byteslice(0, byte_pointer).grapheme_clusters pos = gcs.size gcs += line.byteslice(byte_pointer..).grapheme_clusters pos += 1 while pos < gcs.size && !word_character?(gcs[pos]) if pos == gcs.size # 'aaa bbb [cursor] ' pos -= 1 while pos > 0 && !word_character?(gcs[pos - 1]) second_word_end = gcs.size else # 'aaa [cursor]bbb' pos += 1 while pos < gcs.size && word_character?(gcs[pos]) second_word_end = pos end pos -= 1 while pos > 0 && word_character?(gcs[pos - 1]) second_word_start = pos pos -= 1 while pos > 0 && !word_character?(gcs[pos - 1]) first_word_end = pos pos -= 1 while pos > 0 && word_character?(gcs[pos - 1]) first_word_start = pos [first_word_start, first_word_end, second_word_start, second_word_end].map do |idx| gcs.take(idx).sum(&:bytesize) end end
.em_backward_word(line, byte_pointer)
[ GitHub ]# File 'lib/reline/unicode.rb', line 278
def self.em_backward_word(line, byte_pointer) gcs = line.byteslice(0, byte_pointer).grapheme_clusters.reverse nonwords = gcs.take_while { |c| !word_character?(c) } words = gcs.drop(nonwords.size).take_while { |c| word_character?(c) } nonwords.sum(&:bytesize) + words.sum(&:bytesize) end
.em_big_backward_word(line, byte_pointer)
[ GitHub ]# File 'lib/reline/unicode.rb', line 285
def self.em_big_backward_word(line, byte_pointer) gcs = line.byteslice(0, byte_pointer).grapheme_clusters.reverse spaces = gcs.take_while { |c| space_character?(c) } nonspaces = gcs.drop(spaces.size).take_while { |c| !space_character?(c) } spaces.sum(&:bytesize) + nonspaces.sum(&:bytesize) end
.em_forward_word(line, byte_pointer)
[ GitHub ]# File 'lib/reline/unicode.rb', line 264
def self.em_forward_word(line, byte_pointer) gcs = line.byteslice(byte_pointer..).grapheme_clusters nonwords = gcs.take_while { |c| !word_character?(c) } words = gcs.drop(nonwords.size).take_while { |c| word_character?(c) } nonwords.sum(&:bytesize) + words.sum(&:bytesize) end
.em_forward_word_with_capitalization(line, byte_pointer)
[ GitHub ]# File 'lib/reline/unicode.rb', line 271
def self.em_forward_word_with_capitalization(line, byte_pointer) gcs = line.byteslice(byte_pointer..).grapheme_clusters nonwords = gcs.take_while { |c| !word_character?(c) } words = gcs.drop(nonwords.size).take_while { |c| word_character?(c) } [nonwords.sum(&:bytesize) + words.sum(&:bytesize), nonwords.join + words.join.capitalize] end
.escape_for_print(str)
[ GitHub ]# File 'lib/reline/unicode.rb', line 44
def self.escape_for_print(str) str.chars.map! { |gr| case gr when -"\n" gr when -"\t" -' ' else EscapedPairs[gr.ord] || gr end }.join end
.get_mbchar_width(mbchar)
[ GitHub ]# File 'lib/reline/unicode.rb', line 75
def self.get_mbchar_width(mbchar) ord = mbchar.ord if ord <= 0x1F # in EscapedPairs return 2 elsif ord <= 0x7E # printable ASCII chars return 1 end utf8_mbchar = mbchar.encode(Encoding::UTF_8) ord = utf8_mbchar.ord chunk_index = EastAsianWidth::CHUNK_LAST.bsearch_index { |o| ord <= o } size = EastAsianWidth::CHUNK_WIDTH[chunk_index] if size == -1 Reline.ambiguous_width elsif size == 1 && utf8_mbchar.size >= 2 second_char_ord = utf8_mbchar[1].ord # Halfwidth Dakuten Handakuten # Only these two character has Letter Modifier category and can be combined in a single grapheme cluster (second_char_ord == 0xFF9E || second_char_ord == 0xFF9F) ? 2 : 1 else size end end
.get_next_mbchar_size(line, byte_pointer)
[ GitHub ]# File 'lib/reline/unicode.rb', line 250
def self.get_next_mbchar_size(line, byte_pointer) grapheme = line.byteslice(byte_pointer..-1).grapheme_clusters.first grapheme ? grapheme.bytesize : 0 end
.get_prev_mbchar_size(line, byte_pointer)
[ GitHub ]# File 'lib/reline/unicode.rb', line 255
def self.get_prev_mbchar_size(line, byte_pointer) if byte_pointer.zero? 0 else grapheme = line.byteslice(0..(byte_pointer - 1)).grapheme_clusters.last grapheme ? grapheme.bytesize : 0 end end
.safe_encode(str, encoding)
[ GitHub ]# File 'lib/reline/unicode.rb', line 57
def self.safe_encode(str, encoding) # Reline only supports utf-8 convertible string. converted = str.encode(encoding, invalid: :replace, undef: :replace) return converted if str.encoding == Encoding::UTF_8 || converted.encoding == Encoding::UTF_8 || converted.ascii_only? # This code is essentially doing the same thing as # `str.encode(utf8, **replace_options).encode(encoding, **replace_options)` # but also avoids unnecessary irreversible encoding conversion. converted.gsub(/\X/) do |c| c.encode(Encoding::UTF_8) c rescue Encoding::UndefinedConversionError '?' end end
.space_character?(s) ⇒ Boolean
# File 'lib/reline/unicode.rb', line 412
def self.space_character?(s) s.match?(/\s/) if s end
.split_by_width(str, max_width)
This method is used by IRB
# File 'lib/reline/unicode.rb', line 125
def self.split_by_width(str, max_width) lines = split_line_by_width(str, max_width) [lines, lines.size] end
.split_line_by_width(str, max_width, encoding = str.encoding, offset: 0)
[ GitHub ]# File 'lib/reline/unicode.rb', line 130
def self.split_line_by_width(str, max_width, encoding = str.encoding, offset: 0) lines = [String.new(encoding: encoding)] width = offset rest = str.encode(Encoding::UTF_8) in_zero_width = false seq = String.new(encoding: encoding) rest.scan(WIDTH_SCANNER) do |non_printing_start, non_printing_end, csi, osc, gc| case when non_printing_start in_zero_width = true when non_printing_end in_zero_width = false when csi lines.last << csi unless in_zero_width if csi == -"\e[m" || csi == -"\e[0m" seq.clear else seq << csi end end when osc lines.last << osc seq << osc unless in_zero_width when gc unless in_zero_width mbchar_width = get_mbchar_width(gc) if (width += mbchar_width) > max_width width = mbchar_width lines << seq.dup end end lines.last << gc end end # The cursor moves to next line in first if width == max_width lines << String.new(encoding: encoding) end lines end
.strip_non_printing_start_end(prompt)
[ GitHub ]# File 'lib/reline/unicode.rb', line 172
def self.strip_non_printing_start_end(prompt) prompt.gsub(/\x01([^\x02]*)(?:\x02|\z)/) { $1 } end
.take_mbchar_range(str, start_col, width, cover_begin: false, cover_end: false, padding: false)
[ GitHub ]# File 'lib/reline/unicode.rb', line 181
def self.take_mbchar_range(str, start_col, width, cover_begin: false, cover_end: false, padding: false) chunk = String.new(encoding: str.encoding) end_col = start_col + width total_width = 0 rest = str.encode(Encoding::UTF_8) in_zero_width = false chunk_start_col = nil chunk_end_col = nil has_csi = false rest.scan(WIDTH_SCANNER) do |non_printing_start, non_printing_end, csi, osc, gc| case when non_printing_start in_zero_width = true when non_printing_end in_zero_width = false when csi has_csi = true chunk << csi when osc chunk << osc when gc if in_zero_width chunk << gc next end mbchar_width = get_mbchar_width(gc) prev_width = total_width total_width += mbchar_width if (cover_begin || padding ? total_width <= start_col : prev_width < start_col) # Current character haven't reached start_col yet next elsif padding && !cover_begin && prev_width < start_col && start_col < total_width # Add preceding padding. This padding might have background color. chunk << ' ' chunk_start_col ||= start_col chunk_end_col = total_width next elsif (cover_end ? prev_width < end_col : total_width <= end_col) # Current character is in the range chunk << gc chunk_start_col ||= prev_width chunk_end_col = total_width break if total_width >= end_col else # Current character exceeds end_col if padding && end_col < total_width # Add succeeding padding. This padding might have background color. chunk << ' ' chunk_start_col ||= prev_width chunk_end_col = end_col end break end end end chunk_start_col ||= start_col chunk_end_col ||= start_col if padding && chunk_end_col < end_col # Append padding. This padding should not include background color. chunk << "\e[0m" if has_csi chunk << ' ' * (end_col - chunk_end_col) chunk_end_col = end_col end [chunk, chunk_start_col, chunk_end_col - chunk_start_col] end
.take_range(str, start_col, max_width)
Take a chunk of a String cut by width with escape sequences.
# File 'lib/reline/unicode.rb', line 177
def self.take_range(str, start_col, max_width) take_mbchar_range(str, start_col, max_width).first end
.vi_backward_word(line, byte_pointer)
[ GitHub ]# File 'lib/reline/unicode.rb', line 378
def self.vi_backward_word(line, byte_pointer) gcs = line.byteslice(0, byte_pointer).grapheme_clusters.reverse spaces = gcs.take_while { |c| space_character?(c) } gcs.shift(spaces.size) start_with_word = word_character?(gcs.first) matched = gcs.take_while { |c| start_with_word ? word_character?(c) : !word_character?(c) && !space_character?(c) } spaces.sum(&:bytesize) + matched.sum(&:bytesize) end
.vi_big_backward_word(line, byte_pointer)
[ GitHub ]# File 'lib/reline/unicode.rb', line 333
def self.vi_big_backward_word(line, byte_pointer) gcs = line.byteslice(0, byte_pointer).grapheme_clusters.reverse spaces = gcs.take_while { |c| space_character?(c) } nonspaces = gcs.drop(spaces.size).take_while { |c| !space_character?(c) } spaces.sum(&:bytesize) + nonspaces.sum(&:bytesize) end
.vi_big_forward_end_word(line, byte_pointer)
[ GitHub ]# File 'lib/reline/unicode.rb', line 323
def self.vi_big_forward_end_word(line, byte_pointer) gcs = line.byteslice(byte_pointer..).grapheme_clusters first = gcs.shift(1) spaces = gcs.take_while { |c| space_character?(c) } nonspaces = gcs.drop(spaces.size).take_while { |c| !space_character?(c) } matched = spaces + nonspaces matched.pop first.sum(&:bytesize) + matched.sum(&:bytesize) end
.vi_big_forward_word(line, byte_pointer)
[ GitHub ]# File 'lib/reline/unicode.rb', line 316
def self.vi_big_forward_word(line, byte_pointer) gcs = line.byteslice(byte_pointer..).grapheme_clusters nonspaces = gcs.take_while { |c| !space_character?(c) } spaces = gcs.drop(nonspaces.size).take_while { |c| space_character?(c) } nonspaces.sum(&:bytesize) + spaces.sum(&:bytesize) end
.vi_first_print(line)
[ GitHub ]# File 'lib/reline/unicode.rb', line 400
def self.vi_first_print(line) gcs = line.grapheme_clusters spaces = gcs.take_while { |c| space_character?(c) } spaces.sum(&:bytesize) end
.vi_forward_end_word(line, byte_pointer)
[ GitHub ]# File 'lib/reline/unicode.rb', line 360
def self.vi_forward_end_word(line, byte_pointer) gcs = line.byteslice(byte_pointer..).grapheme_clusters return 0 if gcs.empty? return gcs.first.bytesize if gcs.size == 1 start = gcs.shift skips = [start] if space_character?(start) || space_character?(gcs.first) spaces = gcs.take_while { |c| space_character?(c) } skips += spaces gcs.shift(spaces.size) end start_with_word = word_character?(gcs.first) matched = gcs.take_while { |c| start_with_word ? word_character?(c) : !word_character?(c) && !space_character?(c) } matched.pop skips.sum(&:bytesize) + matched.sum(&:bytesize) end
.vi_forward_word(line, byte_pointer, drop_terminate_spaces = false)
[ GitHub ]# File 'lib/reline/unicode.rb', line 340
def self.vi_forward_word(line, byte_pointer, drop_terminate_spaces = false) gcs = line.byteslice(byte_pointer..).grapheme_clusters return 0 if gcs.empty? c = gcs.first matched = if word_character?(c) gcs.take_while { |c| word_character?(c) } elsif space_character?(c) gcs.take_while { |c| space_character?(c) } else gcs.take_while { |c| !word_character?(c) && !space_character?(c) } end return matched.sum(&:bytesize) if drop_terminate_spaces spaces = gcs.drop(matched.size).take_while { |c| space_character?(c) } matched.sum(&:bytesize) + spaces.sum(&:bytesize) end
.word_character?(s) ⇒ Boolean
# File 'lib/reline/unicode.rb', line 406
def self.word_character?(s) s.encode(Encoding::UTF_8).match?(/\p{Word}/) if s rescue Encoding::UndefinedConversionError false end