MatchData is the type of the special variable $~,
and is the type of the object returned by Regexp#match and
Regexp.last_match. It encapsulates all the results of a
pattern match, results normally accessed through the special variables
$&, $', $`, $1,
$2, and so on.
Equality—Two matchdata are equal if their target strings, patterns, and matched positions are identical.
static VALUE
match_equal(VALUE match1, VALUE match2)
{
const struct re_registers *regs1, *regs2;
if (match1 == match2) return Qtrue;
if (TYPE(match2) != T_MATCH) return Qfalse;
if (!rb_str_equal(RMATCH(match1)->str, RMATCH(match2)->str)) return Qfalse;
if (!rb_reg_equal(RMATCH(match1)->regexp, RMATCH(match2)->regexp)) return Qfalse;
regs1 = RMATCH_REGS(match1);
regs2 = RMATCH_REGS(match2);
if (regs1->num_regs != regs2->num_regs) return Qfalse;
if (memcmp(regs1->beg, regs2->beg, regs1->num_regs * sizeof(*regs1->beg))) return Qfalse;
if (memcmp(regs1->end, regs2->end, regs1->num_regs * sizeof(*regs1->end))) return Qfalse;
return Qtrue;
}
Match Reference—MatchData acts as an array, and may be
accessed using the normal array indexing techniques. mtch is equivalent to the special variable
$&, and returns the entire matched string. mtch, mtch, and so
on return the values of the matched backreferences (portions of the pattern
between parentheses).
m = /(.)(.)(\d+)(\d)/.match("THX1138.") m #=> #<MatchData "HX1138" 1:"H" 2:"X" 3:"113" 4:"8"> m[0] #=> "HX1138" m[1, 2] #=> ["H", "X"] m[1..3] #=> ["H", "X", "113"] m[-3, 2] #=> ["X", "113"] m = /(?<foo>a+)b/.match("ccaaab") m #=> #<MatchData "aaab" foo:"aaa"> m["foo"] #=> "aaa" m[:foo] #=> "aaa"
static VALUE
match_aref(int argc, VALUE *argv, VALUE match)
{
VALUE idx, rest;
match_check(match);
rb_scan_args(argc, argv, "11", &idx, &rest);
if (NIL_P(rest)) {
if (FIXNUM_P(idx)) {
if (FIX2INT(idx) >= 0) {
return rb_reg_nth_match(FIX2INT(idx), match);
}
}
else {
const char *p;
int num;
switch (TYPE(idx)) {
case T_SYMBOL:
p = rb_id2name(SYM2ID(idx));
goto name_to_backref;
break;
case T_STRING:
p = StringValuePtr(idx);
name_to_backref:
num = name_to_backref_number(RMATCH_REGS(match),
RMATCH(match)->regexp, p, p + strlen(p));
return rb_reg_nth_match(num, match);
break;
default:
break;
}
}
}
return rb_ary_aref(argc, argv, match_to_a(match));
}
Returns the offset of the start of the nth element of the match array in the string. n can be a string or symbol to reference a named capture.
m = /(.)(.)(\d+)(\d)/.match("THX1138.") m.begin(0) #=> 1 m.begin(2) #=> 2 m = /(?<foo>.)(.)(?<bar>.)/.match("hoge") p m.begin(:foo) #=> 0 p m.begin(:bar) #=> 2
static VALUE
match_begin(VALUE match, VALUE n)
{
int i = match_backref_number(match, n);
struct re_registers *regs = RMATCH_REGS(match);
match_check(match);
if (i < 0 || regs->num_regs <= i)
rb_raise(rb_eIndexError, "index %d out of matches", i);
if (BEG(i) < 0)
return Qnil;
update_char_offset(match);
return INT2FIX(RMATCH(match)->rmatch->char_offset[i].beg);
}
Returns the array of captures; equivalent to mtch.to_a[1..-1].
f1,f2,f3,f4 = /(.)(.)(\d+)(\d)/.match("THX1138.").captures f1 #=> "H" f2 #=> "X" f3 #=> "113" f4 #=> "8"
static VALUE
match_captures(VALUE match)
{
return match_array(match, 1);
}
Returns the offset of the character immediately following the end of the nth element of the match array in the string. n can be a string or symbol to reference a named capture.
m = /(.)(.)(\d+)(\d)/.match("THX1138.") m.end(0) #=> 7 m.end(2) #=> 3 m = /(?<foo>.)(.)(?<bar>.)/.match("hoge") p m.end(:foo) #=> 1 p m.end(:bar) #=> 3
static VALUE
match_end(VALUE match, VALUE n)
{
int i = match_backref_number(match, n);
struct re_registers *regs = RMATCH_REGS(match);
match_check(match);
if (i < 0 || regs->num_regs <= i)
rb_raise(rb_eIndexError, "index %d out of matches", i);
if (BEG(i) < 0)
return Qnil;
update_char_offset(match);
return INT2FIX(RMATCH(match)->rmatch->char_offset[i].end);
}
Equality—Two matchdata are equal if their target strings, patterns, and matched positions are identical.
static VALUE
match_equal(VALUE match1, VALUE match2)
{
const struct re_registers *regs1, *regs2;
if (match1 == match2) return Qtrue;
if (TYPE(match2) != T_MATCH) return Qfalse;
if (!rb_str_equal(RMATCH(match1)->str, RMATCH(match2)->str)) return Qfalse;
if (!rb_reg_equal(RMATCH(match1)->regexp, RMATCH(match2)->regexp)) return Qfalse;
regs1 = RMATCH_REGS(match1);
regs2 = RMATCH_REGS(match2);
if (regs1->num_regs != regs2->num_regs) return Qfalse;
if (memcmp(regs1->beg, regs2->beg, regs1->num_regs * sizeof(*regs1->beg))) return Qfalse;
if (memcmp(regs1->end, regs2->end, regs1->num_regs * sizeof(*regs1->end))) return Qfalse;
return Qtrue;
}
Produce a hash based on the target string, regexp and matched positions of this matchdata.
static VALUE
match_hash(VALUE match)
{
const struct re_registers *regs;
st_index_t hashval = rb_hash_start(rb_str_hash(RMATCH(match)->str));
rb_hash_uint(hashval, reg_hash(RMATCH(match)->regexp));
regs = RMATCH_REGS(match);
hashval = rb_hash_uint(hashval, regs->num_regs);
hashval = rb_hash_uint(hashval, rb_memhash(regs->beg, regs->num_regs * sizeof(*regs->beg)));
hashval = rb_hash_uint(hashval, rb_memhash(regs->end, regs->num_regs * sizeof(*regs->end)));
hashval = rb_hash_end(hashval);
return LONG2FIX(hashval);
}
Returns a printable version of mtch.
puts /.$/.match("foo").inspect #=> #<MatchData "o"> puts /(.)(.)(.)/.match("foo").inspect #=> #<MatchData "foo" 1:"f" 2:"o" 3:"o"> puts /(.)(.)?(.)/.match("fo").inspect #=> #<MatchData "fo" 1:"f" 2:nil 3:"o"> puts /(?<foo>.)(?<bar>.)(?<baz>.)/.match("hoge").inspect #=> #<MatchData "hog" foo:"h" bar:"o" baz:"g">
static VALUE
match_inspect(VALUE match)
{
const char *cname = rb_obj_classname(match);
VALUE str;
int i;
struct re_registers *regs = RMATCH_REGS(match);
int num_regs = regs->num_regs;
struct backref_name_tag *names;
VALUE regexp = RMATCH(match)->regexp;
if (regexp == 0) {
return rb_sprintf("#<%s:%p>", cname, (void*)match);
}
names = ALLOCA_N(struct backref_name_tag, num_regs);
MEMZERO(names, struct backref_name_tag, num_regs);
onig_foreach_name(RREGEXP(regexp)->ptr,
match_inspect_name_iter, names);
str = rb_str_buf_new2("#<");
rb_str_buf_cat2(str, cname);
for (i = 0; i < num_regs; i++) {
VALUE v;
rb_str_buf_cat2(str, " ");
if (0 < i) {
if (names[i].name)
rb_str_buf_cat(str, (const char *)names[i].name, names[i].len);
else {
rb_str_catf(str, "%d", i);
}
rb_str_buf_cat2(str, ":");
}
v = rb_reg_nth_match(i, match);
if (v == Qnil)
rb_str_buf_cat2(str, "nil");
else
rb_str_buf_append(str, rb_str_inspect(v));
}
rb_str_buf_cat2(str, ">");
return str;
}
Returns the number of elements in the match array.
m = /(.)(.)(\d+)(\d)/.match("THX1138.") m.length #=> 5 m.size #=> 5
static VALUE
match_size(VALUE match)
{
match_check(match);
return INT2FIX(RMATCH_REGS(match)->num_regs);
}
Returns a list of names of captures as an array of strings. It is same as mtch.regexp.names.
/(?<foo>.)(?<bar>.)(?<baz>.)/.match("hoge").names #=> ["foo", "bar", "baz"] m = /(?<x>.)(?<y>.)?/.match("a") #=> #<MatchData "a" x:"a" y:nil> m.names #=> ["x", "y"]
static VALUE
match_names(VALUE match)
{
match_check(match);
return rb_reg_names(RMATCH(match)->regexp);
}
Returns a two-element array containing the beginning and ending offsets of the nth match. n can be a string or symbol to reference a named capture.
m = /(.)(.)(\d+)(\d)/.match("THX1138.") m.offset(0) #=> [1, 7] m.offset(4) #=> [6, 7] m = /(?<foo>.)(.)(?<bar>.)/.match("hoge") p m.offset(:foo) #=> [0, 1] p m.offset(:bar) #=> [2, 3]
static VALUE
match_offset(VALUE match, VALUE n)
{
int i = match_backref_number(match, n);
struct re_registers *regs = RMATCH_REGS(match);
match_check(match);
if (i < 0 || regs->num_regs <= i)
rb_raise(rb_eIndexError, "index %d out of matches", i);
if (BEG(i) < 0)
return rb_assoc_new(Qnil, Qnil);
update_char_offset(match);
return rb_assoc_new(INT2FIX(RMATCH(match)->rmatch->char_offset[i].beg),
INT2FIX(RMATCH(match)->rmatch->char_offset[i].end));
}
Returns the portion of the original string after the current match.
Equivalent to the special variable $'.
m = /(.)(.)(\d+)(\d)/.match("THX1138: The Movie") m.post_match #=> ": The Movie"
VALUE
rb_reg_match_post(VALUE match)
{
VALUE str;
long pos;
struct re_registers *regs;
if (NIL_P(match)) return Qnil;
match_check(match);
regs = RMATCH_REGS(match);
if (BEG(0) == -1) return Qnil;
str = RMATCH(match)->str;
pos = END(0);
str = rb_str_subseq(str, pos, RSTRING_LEN(str) - pos);
if (OBJ_TAINTED(match)) OBJ_TAINT(str);
return str;
}
Returns the portion of the original string before the current match.
Equivalent to the special variable $`.
m = /(.)(.)(\d+)(\d)/.match("THX1138.") m.pre_match #=> "T"
VALUE
rb_reg_match_pre(VALUE match)
{
VALUE str;
struct re_registers *regs;
if (NIL_P(match)) return Qnil;
match_check(match);
regs = RMATCH_REGS(match);
if (BEG(0) == -1) return Qnil;
str = rb_str_subseq(RMATCH(match)->str, 0, BEG(0));
if (OBJ_TAINTED(match)) OBJ_TAINT(str);
return str;
}
Returns the regexp.
m = /a.*b/.match("abc") m.regexp #=> /a.*b/
static VALUE
match_regexp(VALUE match)
{
match_check(match);
return RMATCH(match)->regexp;
}
Returns the number of elements in the match array.
m = /(.)(.)(\d+)(\d)/.match("THX1138.") m.length #=> 5 m.size #=> 5
static VALUE
match_size(VALUE match)
{
match_check(match);
return INT2FIX(RMATCH_REGS(match)->num_regs);
}
Returns a frozen copy of the string passed in to match.
m = /(.)(.)(\d+)(\d)/.match("THX1138.") m.string #=> "THX1138."
static VALUE
match_string(VALUE match)
{
match_check(match);
return RMATCH(match)->str; /* str is frozen */
}
Returns the array of matches.
m = /(.)(.)(\d+)(\d)/.match("THX1138.") m.to_a #=> ["HX1138", "H", "X", "113", "8"]
Because to_a is called when expanding
*variable, there’s a useful assignment shortcut for
extracting matched fields. This is slightly slower than accessing the
fields directly (as an intermediate array is generated).
all,f1,f2,f3 = *(/(.)(.)(\d+)(\d)/.match("THX1138.")) all #=> "HX1138" f1 #=> "H" f2 #=> "X" f3 #=> "113"
static VALUE
match_to_a(VALUE match)
{
return match_array(match, 0);
}
Returns the entire matched string.
m = /(.)(.)(\d+)(\d)/.match("THX1138.") m.to_s #=> "HX1138"
static VALUE
match_to_s(VALUE match)
{
VALUE str = rb_reg_last_match(match);
match_check(match);
if (NIL_P(str)) str = rb_str_new(0,0);
if (OBJ_TAINTED(match)) OBJ_TAINT(str);
if (OBJ_TAINTED(RMATCH(match)->str)) OBJ_TAINT(str);
return str;
}
Uses each index to access the matching values, returning an array of the corresponding matches.
m = /(.)(.)(\d+)(\d)/.match("THX1138: The Movie") m.to_a #=> ["HX1138", "H", "X", "113", "8"] m.values_at(0, 2, -2) #=> ["HX1138", "X", "113"]
static VALUE
match_values_at(int argc, VALUE *argv, VALUE match)
{
struct re_registers *regs;
match_check(match);
regs = RMATCH_REGS(match);
return rb_get_values_at(match, regs->num_regs, argc, argv, match_entry);
}
Commenting is here to help enhance the documentation. For example, sample code, or clarification of the documentation.
If you have questions about Ruby or the documentation, please post to one of the Ruby mailing lists. You will get better, faster, help that way.
If you wish to post a correction of the docs, please do so, but also file bug report so that it can be corrected for the next release. Thank you.