Class: MatchData
Overview
MatchData
encapsulates the result of matching a ::Regexp
against string. It is returned by Regexp#match and String#match, and also stored in a global variable returned by Regexp.last_match.
Usage:
url = 'https://docs.ruby-lang.org/en/2.5.0/MatchData.html'
m = url.match(/(\d\.?)+/) # => #<MatchData "2.5.0" 1:"0">
m.string # => "https://docs.ruby-lang.org/en/2.5.0/MatchData.html"
m.regexp # => /(\d\.?)+/
# entire matched substring:
m[0] # => "2.5.0"
# Working with unnamed captures
m = url.match(%r{([^/])/([^/])\.html$})
m.captures # => ["2.5.0", "MatchData"]
m[1] # => "2.5.0"
m.values_at(1, 2) # => ["2.5.0", "MatchData"]
# Working with named captures
m = url.match(%r{(?<version>[^/])/(?<module>[^/])\.html$})
m.captures # => ["2.5.0", "MatchData"]
m.named_captures # => {"version"=>"2.5.0", "module"=>"MatchData"}
m[:version] # => "2.5.0"
m.values_at(:version, :module)
# => ["2.5.0", "MatchData"]
# Numerical indexes are working, too
m[1] # => "2.5.0"
m.values_at(1, 2) # => ["2.5.0", "MatchData"]
Global variables equivalence
Parts of last MatchData
(returned by Regexp.last_match) are also aliased as global variables:
-
$~
is Regexp.last_match; -
$&
isRegexp.last_match[0]
; -
$1
,$2
, and so on areRegexp.last_match[i]
(captures by number); -
$`
isRegexp.last_match.pre_match
; -
$'
isRegexp.last_match.post_match
; -
$+
isRegexp.last_match[-1]
(the last capture).
See also “Special global variables” section in ::Regexp
documentation.
Instance Method Summary
-
#==(mtch2) ⇒ Boolean
(also: #eql?)
Equality—Two matchdata are equal if their target strings, patterns, and matched positions are identical.
-
#[](i) ⇒ String?
Match Reference –
MatchData
acts as an array, and may be accessed using the normal array indexing techniques. -
#begin(n) ⇒ Integer
Returns the offset of the start of the nth element of the match array in the string.
-
#captures ⇒ Array
Returns the array of captures; equivalent to
mtch.to_a[1..-1]
. -
#end(n) ⇒ Integer
Returns the offset of the character immediately following the end of the nth element of the match array in the string.
-
#eql?(mtch2) ⇒ Boolean
Alias for #==.
-
#hash ⇒ Integer
Produce a hash based on the target string, regexp and matched positions of this matchdata.
-
#inspect ⇒ String
Returns a printable version of mtch.
-
#length ⇒ Integer
(also: #size)
Returns the number of elements in the match array.
-
#named_captures ⇒ Hash
Returns a
::Hash
using named capture. -
#names ⇒ Array, ...
Returns a list of names of captures as an array of strings.
-
#offset(n) ⇒ Array
Returns a two-element array containing the beginning and ending offsets of the nth match.
-
#post_match ⇒ String
Returns the portion of the original string after the current match.
-
#pre_match ⇒ String
Returns the portion of the original string before the current match.
-
#regexp ⇒ Regexp
Returns the regexp.
-
#size ⇒ Integer
Alias for #length.
-
#string ⇒ String
Returns a frozen copy of the string passed in to
match
. -
#to_a ⇒ Array
Returns the array of matches.
-
#to_s ⇒ String
Returns the entire matched string.
-
#values_at(index, ...) ⇒ Array
Uses each index to access the matching values, returning an array of the corresponding matches.
- #initialize_copy(orig) Internal use only
Instance Method Details
#==(mtch2) ⇒ Boolean
#eql?(mtch2) ⇒ Boolean
Also known as: #eql?
Boolean
#eql?(mtch2) ⇒ Boolean
Equality—Two matchdata are equal if their target strings, patterns, and matched positions are identical.
# File 're.c', line 3110
static VALUE match_equal(VALUE match1, VALUE match2) { const struct re_registers *regs1, *regs2; if (match1 == match2) return Qtrue; if (!RB_TYPE_P(match2, T_MATCH)) return Qfalse; if (!RMATCH(match1)->regexp || !RMATCH(match2)->regexp) return Qfalse; if (!rb_str_equal(RMATCH(match1)->str, RMATCH(match2)->str)) return Qfalse; if (!rb_reg_equal(match_regexp(match1), match_regexp(match2))) return Qfalse; regs1 = RMATCH_REGS(match1); regs2 = RMATCH_REGS(match2); if (regs1->num_regs != regs2->num_regs) return Qfalse; if (memcmp(regs1->beg, regs2->beg, regs1->num_regs * sizeof(*regs1->beg))) return Qfalse; if (memcmp(regs1->end, regs2->end, regs1->num_regs * sizeof(*regs1->end))) return Qfalse; return Qtrue; }
Match Reference – MatchData
acts as an array, and may be accessed using the normal array indexing techniques. mtch[0]
is equivalent to the special variable $&
, and returns the entire matched string. mtch[1]
, mtch[2]
, and so on return the values of the matched backreferences (portions of the pattern between parentheses).
m = /(.)(.)(\d+)(\d)/.match("THX1138.")
m #=> #<MatchData "HX1138" 1:"H" 2:"X" 3:"113" 4:"8">
m[0] #=> "HX1138"
m[1, 2] #=> ["H", "X"]
m[1..3] #=> ["H", "X", "113"]
m[-3, 2] #=> ["X", "113"]
m = /(?<foo>a+)b/.match("ccaaab")
m #=> #<MatchData "aaab" foo:"aaa">
m["foo"] #=> "aaa"
m[:foo] #=> "aaa"
# File 're.c', line 2039
static VALUE match_aref(int argc, VALUE *argv, VALUE match) { VALUE idx, length; match_check(match); rb_scan_args(argc, argv, "11", &idx, &length); if (NIL_P(length)) { if (FIXNUM_P(idx)) { return rb_reg_nth_match(FIX2INT(idx), match); } else { int num = namev_to_backref_number(RMATCH_REGS(match), RMATCH(match)->regexp, idx); if (num >= 0) { return rb_reg_nth_match(num, match); } else { return match_ary_aref(match, idx, Qnil); } } } else { long beg = NUM2LONG(idx); long len = NUM2LONG(length); long num_regs = RMATCH_REGS(match)->num_regs; if (len < 0) { return Qnil; } if (beg < 0) { beg += num_regs; if (beg < 0) return Qnil; } else if (beg > num_regs) { return Qnil; } else if (beg+len > num_regs) { len = num_regs - beg; } return match_ary_subseq(match, beg, len, Qnil); } }
#begin(n) ⇒ Integer
Returns the offset of the start of the nth element of the match array in the string. n can be a string or symbol to reference a named capture.
m = /(.)(.)(\d+)(\d)/.match("THX1138.")
m.begin(0) #=> 1
m.begin(2) #=> 2
m = /(?<foo>.)(.)(?<bar>.)/.match("hoge")
p m.begin(:foo) #=> 0
p m.begin(: ) #=> 2
# File 're.c', line 1243
static VALUE match_begin(VALUE match, VALUE n) { int i = match_backref_number(match, n); struct re_registers *regs = RMATCH_REGS(match); match_check(match); if (i < 0 || regs->num_regs <= i) rb_raise(rb_eIndexError, "index %d out of matches", i); if (BEG(i) < 0) return Qnil; update_char_offset(match); return INT2FIX(RMATCH(match)->rmatch->char_offset[i].beg); }
#captures ⇒ Array
Returns the array of captures; equivalent to mtch.to_a[1..-1]
.
f1,f2,f3,f4 = /(.)(.)(\d+)(\d)/.match("THX1138.").captures
f1 #=> "H"
f2 #=> "X"
f3 #=> "113"
f4 #=> "8"
# File 're.c', line 1930
static VALUE match_captures(VALUE match) { return match_array(match, 1); }
#end(n) ⇒ Integer
Returns the offset of the character immediately following the end of the nth element of the match array in the string. n can be a string or symbol to reference a named capture.
m = /(.)(.)(\d+)(\d)/.match("THX1138.")
m.end(0) #=> 7
m.end(2) #=> 3
m = /(?<foo>.)(.)(?<bar>.)/.match("hoge")
p m.end(:foo) #=> 1
p m.end(: ) #=> 3
# File 're.c', line 1278
static VALUE match_end(VALUE match, VALUE n) { int i = match_backref_number(match, n); struct re_registers *regs = RMATCH_REGS(match); match_check(match); if (i < 0 || regs->num_regs <= i) rb_raise(rb_eIndexError, "index %d out of matches", i); if (BEG(i) < 0) return Qnil; update_char_offset(match); return INT2FIX(RMATCH(match)->rmatch->char_offset[i].end); }
#==(mtch2) ⇒ Boolean
#eql?(mtch2) ⇒ Boolean
Boolean
#eql?(mtch2) ⇒ Boolean
Alias for #==.
#hash ⇒ Integer
Produce a hash based on the target string, regexp and matched positions of this matchdata.
See also Object#hash.
# File 're.c', line 3084
static VALUE match_hash(VALUE match) { const struct re_registers *regs; st_index_t hashval; match_check(match); hashval = rb_hash_start(rb_str_hash(RMATCH(match)->str)); hashval = rb_hash_uint(hashval, reg_hash(match_regexp(match))); regs = RMATCH_REGS(match); hashval = rb_hash_uint(hashval, regs->num_regs); hashval = rb_hash_uint(hashval, rb_memhash(regs->beg, regs->num_regs * sizeof(*regs->beg))); hashval = rb_hash_uint(hashval, rb_memhash(regs->end, regs->num_regs * sizeof(*regs->end))); hashval = rb_hash_end(hashval); return ST2FIX(hashval); }
#initialize_copy(orig)
# File 're.c', line 1056
static VALUE match_init_copy(VALUE obj, VALUE orig) { struct rmatch *rm; if (!OBJ_INIT_COPY(obj, orig)) return obj; RMATCH(obj)->str = RMATCH(orig)->str; RMATCH(obj)->regexp = RMATCH(orig)->regexp; rm = RMATCH(obj)->rmatch; if (rb_reg_region_copy(&rm->regs, RMATCH_REGS(orig))) rb_memerror(); if (!RMATCH(orig)->rmatch->char_offset_updated) { rm->char_offset_updated = 0; } else { if (rm->char_offset_num_allocated < rm->regs.num_regs) { REALLOC_N(rm->char_offset, struct rmatch_offset, rm->regs.num_regs); rm->char_offset_num_allocated = rm->regs.num_regs; } MEMCPY(rm->char_offset, RMATCH(orig)->rmatch->char_offset, struct rmatch_offset, rm->regs.num_regs); rm->char_offset_updated = 1; RB_GC_GUARD(orig); } return obj; }
#inspect ⇒ String
Returns a printable version of mtch.
puts /.$/.match("foo").inspect
#=> #<MatchData "o">
puts /(.)(.)(.)/.match("foo").inspect
#=> #<MatchData "foo" 1:"f" 2:"o" 3:"o">
puts /(.)(.)?(.)/.match("fo").inspect
#=> #<MatchData "fo" 1:"f" 2:nil 3:"o">
puts /(?<foo>.)(?<bar>.)(?<baz>.)/.match("hoge").inspect
#=> #<MatchData "hog" foo:"h" bar:"o" baz:"g">
# File 're.c', line 2273
static VALUE match_inspect(VALUE match) { VALUE cname = rb_class_path(rb_obj_class(match)); VALUE str; int i; struct re_registers *regs = RMATCH_REGS(match); int num_regs = regs->num_regs; struct backref_name_tag *names; VALUE regexp = RMATCH(match)->regexp; if (regexp == 0) { return rb_sprintf("#<%"PRIsVALUE":%p>", cname, (void*)match); } else if (NIL_P(regexp)) { return rb_sprintf("#<%"PRIsVALUE": %"PRIsVALUE">", cname, rb_reg_nth_match(0, match)); } names = ALLOCA_N(struct backref_name_tag, num_regs); MEMZERO(names, struct backref_name_tag, num_regs); onig_foreach_name(RREGEXP_PTR(regexp), match_inspect_name_iter, names); str = rb_str_buf_new2("#<"); rb_str_append(str, cname); for (i = 0; i < num_regs; i++) { VALUE v; rb_str_buf_cat2(str, " "); if (0 < i) { if (names[i].name) rb_str_buf_cat(str, (const char *)names[i].name, names[i].len); else { rb_str_catf(str, "%d", i); } rb_str_buf_cat2(str, ":"); } v = rb_reg_nth_match(i, match); if (v == Qnil) rb_str_buf_cat2(str, "nil"); else rb_str_buf_append(str, rb_str_inspect(v)); } rb_str_buf_cat2(str, ">"); return str; }
Also known as: #size
Returns the number of elements in the match array.
m = /(.)(.)(\d+)(\d)/.match("THX1138.")
m.length #=> 5
m.size #=> 5
# File 're.c', line 1147
static VALUE match_size(VALUE match) { match_check(match); return INT2FIX(RMATCH_REGS(match)->num_regs); }
#named_captures ⇒ Hash
Returns a ::Hash
using named capture.
A key of the hash is a name of the named captures. A value of the hash is a string of last successful capture of corresponding group.
m = /(?<a>.)(?<b>.)/.match("01")
m.named_captures #=> {"a" => "0", "b" => "1"}
m = /(?<a>.)(?<b>.)?/.match("0")
m.named_captures #=> {"a" => "0", "b" => nil}
m = /(?<a>.)(?<a>.)/.match("01")
m.named_captures #=> {"a" => "1"}
m = /(?<a>x)|(?<a>y)/.match("x")
m.named_captures #=> {"a" => "x"}
# File 're.c', line 2199
static VALUE match_named_captures(VALUE match) { VALUE hash; struct MEMO *memo; match_check(match); if (NIL_P(RMATCH(match)->regexp)) return rb_hash_new(); hash = rb_hash_new(); memo = MEMO_NEW(hash, match, 0); onig_foreach_name(RREGEXP(RMATCH(match)->regexp)->ptr, match_named_captures_iter, (void*)memo); return hash; }
#names ⇒ Array, ...
Returns a list of names of captures as an array of strings. It is same as mtch.regexp.names.
/(?<foo>.)(?<bar>.)(?<baz>.)/.match("hoge").names
#=> ["foo", "bar", "baz"]
m = /(?<x>.)(?<y>.)?/.match("a") #=> #<MatchData "a" x:"a" y:nil>
m.names #=> ["x", "y"]
# File 're.c', line 1126
static VALUE match_names(VALUE match) { match_check(match); if (NIL_P(RMATCH(match)->regexp)) return rb_ary_new_capa(0); return rb_reg_names(RMATCH(match)->regexp); }
#offset(n) ⇒ Array
Returns a two-element array containing the beginning and ending offsets of the nth match. n can be a string or symbol to reference a named capture.
m = /(.)(.)(\d+)(\d)/.match("THX1138.")
m.offset(0) #=> [1, 7]
m.offset(4) #=> [6, 7]
m = /(?<foo>.)(.)(?<bar>.)/.match("hoge")
p m.offset(:foo) #=> [0, 1]
p m.offset(: ) #=> [2, 3]
# File 're.c', line 1207
static VALUE match_offset(VALUE match, VALUE n) { int i = match_backref_number(match, n); struct re_registers *regs = RMATCH_REGS(match); match_check(match); if (i < 0 || regs->num_regs <= i) rb_raise(rb_eIndexError, "index %d out of matches", i); if (BEG(i) < 0) return rb_assoc_new(Qnil, Qnil); update_char_offset(match); return rb_assoc_new(INT2FIX(RMATCH(match)->rmatch->char_offset[i].beg), INT2FIX(RMATCH(match)->rmatch->char_offset[i].end)); }
#post_match ⇒ String
Returns the portion of the original string after the current match. Equivalent to the special variable $'
.
m = /(.)(.)(\d+)(\d)/.match("THX1138: The Movie")
m.post_match #=> ": The Movie"
# File 're.c', line 1802
VALUE rb_reg_match_post(VALUE match) { VALUE str; long pos; struct re_registers *regs; if (NIL_P(match)) return Qnil; match_check(match); regs = RMATCH_REGS(match); if (BEG(0) == -1) return Qnil; str = RMATCH(match)->str; pos = END(0); str = rb_str_subseq(str, pos, RSTRING_LEN(str) - pos); if (OBJ_TAINTED(match)) OBJ_TAINT(str); return str; }
#pre_match ⇒ String
Returns the portion of the original string before the current match. Equivalent to the special variable $`
.
m = /(.)(.)(\d+)(\d)/.match("THX1138.")
m.pre_match #=> "T"
# File 're.c', line 1775
VALUE rb_reg_match_pre(VALUE match) { VALUE str; struct re_registers *regs; if (NIL_P(match)) return Qnil; match_check(match); regs = RMATCH_REGS(match); if (BEG(0) == -1) return Qnil; str = rb_str_subseq(RMATCH(match)->str, 0, BEG(0)); if (OBJ_TAINTED(match)) OBJ_TAINT(str); return str; }
#regexp ⇒ Regexp
Returns the regexp.
m = /a.*b/.match("abc")
m.regexp #=> /a.*b/
# File 're.c', line 1098
static VALUE match_regexp(VALUE match) { VALUE regexp; match_check(match); regexp = RMATCH(match)->regexp; if (NIL_P(regexp)) { VALUE str = rb_reg_nth_match(0, match); regexp = rb_reg_regcomp(rb_reg_quote(str)); RMATCH(match)->regexp = regexp; } return regexp; }
Alias for #length.
#string ⇒ String
Returns a frozen copy of the string passed in to match
.
m = /(.)(.)(\d+)(\d)/.match("THX1138.")
m.string #=> "THX1138."
# File 're.c', line 2227
static VALUE match_string(VALUE match) { match_check(match); return RMATCH(match)->str; /* str is frozen */ }
#to_a ⇒ Array
Returns the array of matches.
m = /(.)(.)(\d+)(\d)/.match("THX1138.")
m.to_a #=> ["HX1138", "H", "X", "113", "8"]
Because to_a
is called when expanding *
variable, there’s a useful assignment shortcut for extracting matched fields. This is slightly slower than accessing the fields directly (as an intermediate array is generated).
all,f1,f2,f3 = * /(.)(.)(\d+)(\d)/.match("THX1138.")
all #=> "HX1138"
f1 #=> "H"
f2 #=> "X"
f3 #=> "113"
# File 're.c', line 1911
static VALUE match_to_a(VALUE match) { return match_array(match, 0); }
#to_s ⇒ String
Returns the entire matched string.
m = /(.)(.)(\d+)(\d)/.match("THX1138.")
m.to_s #=> "HX1138"
# File 're.c', line 2136
static VALUE match_to_s(VALUE match) { VALUE str = rb_reg_last_match(match); match_check(match); if (NIL_P(str)) str = rb_str_new(0,0); if (OBJ_TAINTED(match)) OBJ_TAINT(str); if (OBJ_TAINTED(RMATCH(match)->str)) OBJ_TAINT(str); return str; }
#values_at(index, ...) ⇒ Array
Uses each index to access the matching values, returning an array of the corresponding matches.
m = /(.)(.)(\d+)(\d)/.match("THX1138: The Movie")
m.to_a #=> ["HX1138", "H", "X", "113", "8"]
m.values_at(0, 2, -2) #=> ["HX1138", "X", "113"]
m = /(?<a>\d+) *(?<op>[+\-*\/]) *(?<b>\d+)/.match("1 + 2")
m.to_a #=> ["1 + 2", "1", "+", "2"]
m.values_at(:a, :b, :op) #=> ["1", "2", "+"]
# File 're.c', line 2099
static VALUE match_values_at(int argc, VALUE *argv, VALUE match) { VALUE result; int i; match_check(match); result = rb_ary_new2(argc); for (i=0; i<argc; i++) { if (FIXNUM_P(argv[i])) { rb_ary_push(result, rb_reg_nth_match(FIX2INT(argv[i]), match)); } else { int num = namev_to_backref_number(RMATCH_REGS(match), RMATCH(match)->regexp, argv[i]); if (num >= 0) { rb_ary_push(result, rb_reg_nth_match(num, match)); } else { match_ary_aref(match, argv[i], result); } } } return result; }