MatchData
is the type of the special variable $~
,
and is the type of the object returned by Regexp#match
and
Regexp.last_match
. It encapsulates all the results of a
pattern match, results normally accessed through the special variables
$&
, $'
, $`
, $1
,
$2
, and so on.
Equality—Two matchdata are equal if their target strings, patterns, and matched positions are identical.
static VALUE match_equal(VALUE match1, VALUE match2) { const struct re_registers *regs1, *regs2; if (match1 == match2) return Qtrue; if (TYPE(match2) != T_MATCH) return Qfalse; if (!rb_str_equal(RMATCH(match1)->str, RMATCH(match2)->str)) return Qfalse; if (!rb_reg_equal(RMATCH(match1)->regexp, RMATCH(match2)->regexp)) return Qfalse; regs1 = RMATCH_REGS(match1); regs2 = RMATCH_REGS(match2); if (regs1->num_regs != regs2->num_regs) return Qfalse; if (memcmp(regs1->beg, regs2->beg, regs1->num_regs * sizeof(*regs1->beg))) return Qfalse; if (memcmp(regs1->end, regs2->end, regs1->num_regs * sizeof(*regs1->end))) return Qfalse; return Qtrue; }
Match Reference—MatchData
acts as an array, and may be
accessed using the normal array indexing techniques. mtch is equivalent to the special variable
$&
, and returns the entire matched string. mtch, mtch, and so
on return the values of the matched backreferences (portions of the pattern
between parentheses).
m = /(.)(.)(\d+)(\d)/.match("THX1138.") m #=> #<MatchData "HX1138" 1:"H" 2:"X" 3:"113" 4:"8"> m[0] #=> "HX1138" m[1, 2] #=> ["H", "X"] m[1..3] #=> ["H", "X", "113"] m[-3, 2] #=> ["X", "113"] m = /(?<foo>a+)b/.match("ccaaab") m #=> #<MatchData "aaab" foo:"aaa"> m["foo"] #=> "aaa" m[:foo] #=> "aaa"
static VALUE match_aref(int argc, VALUE *argv, VALUE match) { VALUE idx, rest; match_check(match); rb_scan_args(argc, argv, "11", &idx, &rest); if (NIL_P(rest)) { if (FIXNUM_P(idx)) { if (FIX2INT(idx) >= 0) { return rb_reg_nth_match(FIX2INT(idx), match); } } else { const char *p; int num; switch (TYPE(idx)) { case T_SYMBOL: p = rb_id2name(SYM2ID(idx)); goto name_to_backref; break; case T_STRING: p = StringValuePtr(idx); name_to_backref: num = name_to_backref_number(RMATCH_REGS(match), RMATCH(match)->regexp, p, p + strlen(p)); return rb_reg_nth_match(num, match); break; default: break; } } } return rb_ary_aref(argc, argv, match_to_a(match)); }
Returns the offset of the start of the nth element of the match array in the string. n can be a string or symbol to reference a named capture.
m = /(.)(.)(\d+)(\d)/.match("THX1138.") m.begin(0) #=> 1 m.begin(2) #=> 2 m = /(?<foo>.)(.)(?<bar>.)/.match("hoge") p m.begin(:foo) #=> 0 p m.begin(:bar) #=> 2
static VALUE match_begin(VALUE match, VALUE n) { int i = match_backref_number(match, n); struct re_registers *regs = RMATCH_REGS(match); match_check(match); if (i < 0 || regs->num_regs <= i) rb_raise(rb_eIndexError, "index %d out of matches", i); if (BEG(i) < 0) return Qnil; update_char_offset(match); return INT2FIX(RMATCH(match)->rmatch->char_offset[i].beg); }
Returns the array of captures; equivalent to mtch.to_a[1..-1]
.
f1,f2,f3,f4 = /(.)(.)(\d+)(\d)/.match("THX1138.").captures f1 #=> "H" f2 #=> "X" f3 #=> "113" f4 #=> "8"
static VALUE match_captures(VALUE match) { return match_array(match, 1); }
Returns the offset of the character immediately following the end of the nth element of the match array in the string. n can be a string or symbol to reference a named capture.
m = /(.)(.)(\d+)(\d)/.match("THX1138.") m.end(0) #=> 7 m.end(2) #=> 3 m = /(?<foo>.)(.)(?<bar>.)/.match("hoge") p m.end(:foo) #=> 1 p m.end(:bar) #=> 3
static VALUE match_end(VALUE match, VALUE n) { int i = match_backref_number(match, n); struct re_registers *regs = RMATCH_REGS(match); match_check(match); if (i < 0 || regs->num_regs <= i) rb_raise(rb_eIndexError, "index %d out of matches", i); if (BEG(i) < 0) return Qnil; update_char_offset(match); return INT2FIX(RMATCH(match)->rmatch->char_offset[i].end); }
Equality—Two matchdata are equal if their target strings, patterns, and matched positions are identical.
static VALUE match_equal(VALUE match1, VALUE match2) { const struct re_registers *regs1, *regs2; if (match1 == match2) return Qtrue; if (TYPE(match2) != T_MATCH) return Qfalse; if (!rb_str_equal(RMATCH(match1)->str, RMATCH(match2)->str)) return Qfalse; if (!rb_reg_equal(RMATCH(match1)->regexp, RMATCH(match2)->regexp)) return Qfalse; regs1 = RMATCH_REGS(match1); regs2 = RMATCH_REGS(match2); if (regs1->num_regs != regs2->num_regs) return Qfalse; if (memcmp(regs1->beg, regs2->beg, regs1->num_regs * sizeof(*regs1->beg))) return Qfalse; if (memcmp(regs1->end, regs2->end, regs1->num_regs * sizeof(*regs1->end))) return Qfalse; return Qtrue; }
Produce a hash based on the target string, regexp and matched positions of this matchdata.
static VALUE match_hash(VALUE match) { const struct re_registers *regs; st_index_t hashval = rb_hash_start(rb_str_hash(RMATCH(match)->str)); rb_hash_uint(hashval, reg_hash(RMATCH(match)->regexp)); regs = RMATCH_REGS(match); hashval = rb_hash_uint(hashval, regs->num_regs); hashval = rb_hash_uint(hashval, rb_memhash(regs->beg, regs->num_regs * sizeof(*regs->beg))); hashval = rb_hash_uint(hashval, rb_memhash(regs->end, regs->num_regs * sizeof(*regs->end))); hashval = rb_hash_end(hashval); return LONG2FIX(hashval); }
Returns a printable version of mtch.
puts /.$/.match("foo").inspect #=> #<MatchData "o"> puts /(.)(.)(.)/.match("foo").inspect #=> #<MatchData "foo" 1:"f" 2:"o" 3:"o"> puts /(.)(.)?(.)/.match("fo").inspect #=> #<MatchData "fo" 1:"f" 2:nil 3:"o"> puts /(?<foo>.)(?<bar>.)(?<baz>.)/.match("hoge").inspect #=> #<MatchData "hog" foo:"h" bar:"o" baz:"g">
static VALUE match_inspect(VALUE match) { const char *cname = rb_obj_classname(match); VALUE str; int i; struct re_registers *regs = RMATCH_REGS(match); int num_regs = regs->num_regs; struct backref_name_tag *names; VALUE regexp = RMATCH(match)->regexp; if (regexp == 0) { return rb_sprintf("#<%s:%p>", cname, (void*)match); } names = ALLOCA_N(struct backref_name_tag, num_regs); MEMZERO(names, struct backref_name_tag, num_regs); onig_foreach_name(RREGEXP(regexp)->ptr, match_inspect_name_iter, names); str = rb_str_buf_new2("#<"); rb_str_buf_cat2(str, cname); for (i = 0; i < num_regs; i++) { VALUE v; rb_str_buf_cat2(str, " "); if (0 < i) { if (names[i].name) rb_str_buf_cat(str, (const char *)names[i].name, names[i].len); else { rb_str_catf(str, "%d", i); } rb_str_buf_cat2(str, ":"); } v = rb_reg_nth_match(i, match); if (v == Qnil) rb_str_buf_cat2(str, "nil"); else rb_str_buf_append(str, rb_str_inspect(v)); } rb_str_buf_cat2(str, ">"); return str; }
Returns the number of elements in the match array.
m = /(.)(.)(\d+)(\d)/.match("THX1138.") m.length #=> 5 m.size #=> 5
static VALUE match_size(VALUE match) { match_check(match); return INT2FIX(RMATCH_REGS(match)->num_regs); }
Returns a list of names of captures as an array of strings. It is same as mtch.regexp.names.
/(?<foo>.)(?<bar>.)(?<baz>.)/.match("hoge").names #=> ["foo", "bar", "baz"] m = /(?<x>.)(?<y>.)?/.match("a") #=> #<MatchData "a" x:"a" y:nil> m.names #=> ["x", "y"]
static VALUE match_names(VALUE match) { match_check(match); return rb_reg_names(RMATCH(match)->regexp); }
Returns a two-element array containing the beginning and ending offsets of the nth match. n can be a string or symbol to reference a named capture.
m = /(.)(.)(\d+)(\d)/.match("THX1138.") m.offset(0) #=> [1, 7] m.offset(4) #=> [6, 7] m = /(?<foo>.)(.)(?<bar>.)/.match("hoge") p m.offset(:foo) #=> [0, 1] p m.offset(:bar) #=> [2, 3]
static VALUE match_offset(VALUE match, VALUE n) { int i = match_backref_number(match, n); struct re_registers *regs = RMATCH_REGS(match); match_check(match); if (i < 0 || regs->num_regs <= i) rb_raise(rb_eIndexError, "index %d out of matches", i); if (BEG(i) < 0) return rb_assoc_new(Qnil, Qnil); update_char_offset(match); return rb_assoc_new(INT2FIX(RMATCH(match)->rmatch->char_offset[i].beg), INT2FIX(RMATCH(match)->rmatch->char_offset[i].end)); }
Returns the portion of the original string after the current match.
Equivalent to the special variable $'
.
m = /(.)(.)(\d+)(\d)/.match("THX1138: The Movie") m.post_match #=> ": The Movie"
VALUE rb_reg_match_post(VALUE match) { VALUE str; long pos; struct re_registers *regs; if (NIL_P(match)) return Qnil; match_check(match); regs = RMATCH_REGS(match); if (BEG(0) == -1) return Qnil; str = RMATCH(match)->str; pos = END(0); str = rb_str_subseq(str, pos, RSTRING_LEN(str) - pos); if (OBJ_TAINTED(match)) OBJ_TAINT(str); return str; }
Returns the portion of the original string before the current match.
Equivalent to the special variable $`
.
m = /(.)(.)(\d+)(\d)/.match("THX1138.") m.pre_match #=> "T"
VALUE rb_reg_match_pre(VALUE match) { VALUE str; struct re_registers *regs; if (NIL_P(match)) return Qnil; match_check(match); regs = RMATCH_REGS(match); if (BEG(0) == -1) return Qnil; str = rb_str_subseq(RMATCH(match)->str, 0, BEG(0)); if (OBJ_TAINTED(match)) OBJ_TAINT(str); return str; }
Returns the regexp.
m = /a.*b/.match("abc") m.regexp #=> /a.*b/
static VALUE match_regexp(VALUE match) { match_check(match); return RMATCH(match)->regexp; }
Returns the number of elements in the match array.
m = /(.)(.)(\d+)(\d)/.match("THX1138.") m.length #=> 5 m.size #=> 5
static VALUE match_size(VALUE match) { match_check(match); return INT2FIX(RMATCH_REGS(match)->num_regs); }
Returns a frozen copy of the string passed in to match
.
m = /(.)(.)(\d+)(\d)/.match("THX1138.") m.string #=> "THX1138."
static VALUE match_string(VALUE match) { match_check(match); return RMATCH(match)->str; /* str is frozen */ }
Returns the array of matches.
m = /(.)(.)(\d+)(\d)/.match("THX1138.") m.to_a #=> ["HX1138", "H", "X", "113", "8"]
Because to_a
is called when expanding
*
variable, there’s a useful assignment shortcut for
extracting matched fields. This is slightly slower than accessing the
fields directly (as an intermediate array is generated).
all,f1,f2,f3 = *(/(.)(.)(\d+)(\d)/.match("THX1138.")) all #=> "HX1138" f1 #=> "H" f2 #=> "X" f3 #=> "113"
static VALUE match_to_a(VALUE match) { return match_array(match, 0); }
Returns the entire matched string.
m = /(.)(.)(\d+)(\d)/.match("THX1138.") m.to_s #=> "HX1138"
static VALUE match_to_s(VALUE match) { VALUE str = rb_reg_last_match(match); match_check(match); if (NIL_P(str)) str = rb_str_new(0,0); if (OBJ_TAINTED(match)) OBJ_TAINT(str); if (OBJ_TAINTED(RMATCH(match)->str)) OBJ_TAINT(str); return str; }
Uses each index to access the matching values, returning an array of the corresponding matches.
m = /(.)(.)(\d+)(\d)/.match("THX1138: The Movie") m.to_a #=> ["HX1138", "H", "X", "113", "8"] m.values_at(0, 2, -2) #=> ["HX1138", "X", "113"]
static VALUE match_values_at(int argc, VALUE *argv, VALUE match) { struct re_registers *regs; match_check(match); regs = RMATCH_REGS(match); return rb_get_values_at(match, regs->num_regs, argc, argv, match_entry); }
Commenting is here to help enhance the documentation. For example, sample code, or clarification of the documentation.
If you have questions about Ruby or the documentation, please post to one of the Ruby mailing lists. You will get better, faster, help that way.
If you wish to post a correction of the docs, please do so, but also file bug report so that it can be corrected for the next release. Thank you.