In Files

MatchData

MatchData is the type of the special variable $~, and is the type of the object returned by Regexp#match and Regexp.last_match. It encapsulates all the results of a pattern match, results normally accessed through the special variables $&, $', $`, $1, $2, and so on.

Public Instance Methods

mtch == mtch2 → true or false click to toggle source

Equality—Two matchdata are equal if their target strings, patterns, and matched positions are identical.

 
               static VALUE
match_equal(VALUE match1, VALUE match2)
{
    const struct re_registers *regs1, *regs2;
    if (match1 == match2) return Qtrue;
    if (TYPE(match2) != T_MATCH) return Qfalse;
    if (!rb_str_equal(RMATCH(match1)->str, RMATCH(match2)->str)) return Qfalse;
    if (!rb_reg_equal(RMATCH(match1)->regexp, RMATCH(match2)->regexp)) return Qfalse;
    regs1 = RMATCH_REGS(match1);
    regs2 = RMATCH_REGS(match2);
    if (regs1->num_regs != regs2->num_regs) return Qfalse;
    if (memcmp(regs1->beg, regs2->beg, regs1->num_regs * sizeof(*regs1->beg))) return Qfalse;
    if (memcmp(regs1->end, regs2->end, regs1->num_regs * sizeof(*regs1->end))) return Qfalse;
    return Qtrue;
}
            
mtch[i] → str or nil click to toggle source
mtch[start, length] → array
mtch[range] → array
mtch[name] → str or nil

Match Reference—MatchData acts as an array, and may be accessed using the normal array indexing techniques. mtch is equivalent to the special variable $&, and returns the entire matched string. mtch, mtch, and so on return the values of the matched backreferences (portions of the pattern between parentheses).

m = /(.)(.)(\d+)(\d)/.match("THX1138.")
m          #=> #<MatchData "HX1138" 1:"H" 2:"X" 3:"113" 4:"8">
m[0]       #=> "HX1138"
m[1, 2]    #=> ["H", "X"]
m[1..3]    #=> ["H", "X", "113"]
m[-3, 2]   #=> ["X", "113"]

m = /(?<foo>a+)b/.match("ccaaab")
m          #=> #<MatchData "aaab" foo:"aaa">
m["foo"]   #=> "aaa"
m[:foo]    #=> "aaa"
 
               static VALUE
match_aref(int argc, VALUE *argv, VALUE match)
{
    VALUE idx, rest;

    match_check(match);
    rb_scan_args(argc, argv, "11", &idx, &rest);

    if (NIL_P(rest)) {
        if (FIXNUM_P(idx)) {
            if (FIX2INT(idx) >= 0) {
                return rb_reg_nth_match(FIX2INT(idx), match);
            }
        }
        else {
            const char *p;
            int num;

            switch (TYPE(idx)) {
              case T_SYMBOL:
                p = rb_id2name(SYM2ID(idx));
                goto name_to_backref;
                break;
              case T_STRING:
                p = StringValuePtr(idx);

              name_to_backref:
                num = name_to_backref_number(RMATCH_REGS(match),
                                             RMATCH(match)->regexp, p, p + strlen(p));
                return rb_reg_nth_match(num, match);
                break;

              default:
                break;
            }
        }
    }

    return rb_ary_aref(argc, argv, match_to_a(match));
}
            
begin(n) → integer click to toggle source

Returns the offset of the start of the nth element of the match array in the string. n can be a string or symbol to reference a named capture.

m = /(.)(.)(\d+)(\d)/.match("THX1138.")
m.begin(0)       #=> 1
m.begin(2)       #=> 2

m = /(?<foo>.)(.)(?<bar>.)/.match("hoge")
p m.begin(:foo)  #=> 0
p m.begin(:bar)  #=> 2
 
               static VALUE
match_begin(VALUE match, VALUE n)
{
    int i = match_backref_number(match, n);
    struct re_registers *regs = RMATCH_REGS(match);

    match_check(match);
    if (i < 0 || regs->num_regs <= i)
        rb_raise(rb_eIndexError, "index %d out of matches", i);

    if (BEG(i) < 0)
        return Qnil;

    update_char_offset(match);
    return INT2FIX(RMATCH(match)->rmatch->char_offset[i].beg);
}
            
captures → array click to toggle source

Returns the array of captures; equivalent to mtch.to_a[1..-1].

f1,f2,f3,f4 = /(.)(.)(\d+)(\d)/.match("THX1138.").captures
f1    #=> "H"
f2    #=> "X"
f3    #=> "113"
f4    #=> "8"
 
               static VALUE
match_captures(VALUE match)
{
    return match_array(match, 1);
}
            
end(n) → integer click to toggle source

Returns the offset of the character immediately following the end of the nth element of the match array in the string. n can be a string or symbol to reference a named capture.

m = /(.)(.)(\d+)(\d)/.match("THX1138.")
m.end(0)         #=> 7
m.end(2)         #=> 3

m = /(?<foo>.)(.)(?<bar>.)/.match("hoge")
p m.end(:foo)    #=> 1
p m.end(:bar)    #=> 3
 
               static VALUE
match_end(VALUE match, VALUE n)
{
    int i = match_backref_number(match, n);
    struct re_registers *regs = RMATCH_REGS(match);

    match_check(match);
    if (i < 0 || regs->num_regs <= i)
        rb_raise(rb_eIndexError, "index %d out of matches", i);

    if (BEG(i) < 0)
        return Qnil;

    update_char_offset(match);
    return INT2FIX(RMATCH(match)->rmatch->char_offset[i].end);
}
            
mtch == mtch2 → true or false click to toggle source

Equality—Two matchdata are equal if their target strings, patterns, and matched positions are identical.

 
               static VALUE
match_equal(VALUE match1, VALUE match2)
{
    const struct re_registers *regs1, *regs2;
    if (match1 == match2) return Qtrue;
    if (TYPE(match2) != T_MATCH) return Qfalse;
    if (!rb_str_equal(RMATCH(match1)->str, RMATCH(match2)->str)) return Qfalse;
    if (!rb_reg_equal(RMATCH(match1)->regexp, RMATCH(match2)->regexp)) return Qfalse;
    regs1 = RMATCH_REGS(match1);
    regs2 = RMATCH_REGS(match2);
    if (regs1->num_regs != regs2->num_regs) return Qfalse;
    if (memcmp(regs1->beg, regs2->beg, regs1->num_regs * sizeof(*regs1->beg))) return Qfalse;
    if (memcmp(regs1->end, regs2->end, regs1->num_regs * sizeof(*regs1->end))) return Qfalse;
    return Qtrue;
}
            
hash → integer click to toggle source

Produce a hash based on the target string, regexp and matched positions of this matchdata.

 
               static VALUE
match_hash(VALUE match)
{
    const struct re_registers *regs;
    st_index_t hashval = rb_hash_start(rb_str_hash(RMATCH(match)->str));

    rb_hash_uint(hashval, reg_hash(RMATCH(match)->regexp));
    regs = RMATCH_REGS(match);
    hashval = rb_hash_uint(hashval, regs->num_regs);
    hashval = rb_hash_uint(hashval, rb_memhash(regs->beg, regs->num_regs * sizeof(*regs->beg)));
    hashval = rb_hash_uint(hashval, rb_memhash(regs->end, regs->num_regs * sizeof(*regs->end)));
    hashval = rb_hash_end(hashval);
    return LONG2FIX(hashval);
}
            
inspect → str click to toggle source

Returns a printable version of mtch.

puts /.$/.match("foo").inspect
#=> #<MatchData "o">

puts /(.)(.)(.)/.match("foo").inspect
#=> #<MatchData "foo" 1:"f" 2:"o" 3:"o">

puts /(.)(.)?(.)/.match("fo").inspect
#=> #<MatchData "fo" 1:"f" 2:nil 3:"o">

puts /(?<foo>.)(?<bar>.)(?<baz>.)/.match("hoge").inspect
#=> #<MatchData "hog" foo:"h" bar:"o" baz:"g">
 
               static VALUE
match_inspect(VALUE match)
{
    const char *cname = rb_obj_classname(match);
    VALUE str;
    int i;
    struct re_registers *regs = RMATCH_REGS(match);
    int num_regs = regs->num_regs;
    struct backref_name_tag *names;
    VALUE regexp = RMATCH(match)->regexp;

    if (regexp == 0) {
        return rb_sprintf("#<%s:%p>", cname, (void*)match);
    }

    names = ALLOCA_N(struct backref_name_tag, num_regs);
    MEMZERO(names, struct backref_name_tag, num_regs);

    onig_foreach_name(RREGEXP(regexp)->ptr,
            match_inspect_name_iter, names);

    str = rb_str_buf_new2("#<");
    rb_str_buf_cat2(str, cname);

    for (i = 0; i < num_regs; i++) {
        VALUE v;
        rb_str_buf_cat2(str, " ");
        if (0 < i) {
            if (names[i].name)
                rb_str_buf_cat(str, (const char *)names[i].name, names[i].len);
            else {
                rb_str_catf(str, "%d", i);
            }
            rb_str_buf_cat2(str, ":");
        }
        v = rb_reg_nth_match(i, match);
        if (v == Qnil)
            rb_str_buf_cat2(str, "nil");
        else
            rb_str_buf_append(str, rb_str_inspect(v));
    }
    rb_str_buf_cat2(str, ">");

    return str;
}
            
length → integer click to toggle source
size → integer

Returns the number of elements in the match array.

m = /(.)(.)(\d+)(\d)/.match("THX1138.")
m.length   #=> 5
m.size     #=> 5
 
               static VALUE
match_size(VALUE match)
{
    match_check(match);
    return INT2FIX(RMATCH_REGS(match)->num_regs);
}
            
names → [name1, name2, ...] click to toggle source

Returns a list of names of captures as an array of strings. It is same as mtch.regexp.names.

/(?<foo>.)(?<bar>.)(?<baz>.)/.match("hoge").names
#=> ["foo", "bar", "baz"]

m = /(?<x>.)(?<y>.)?/.match("a") #=> #<MatchData "a" x:"a" y:nil>
m.names                          #=> ["x", "y"]
 
               static VALUE
match_names(VALUE match)
{
    match_check(match);
    return rb_reg_names(RMATCH(match)->regexp);
}
            
offset(n) → array click to toggle source

Returns a two-element array containing the beginning and ending offsets of the nth match. n can be a string or symbol to reference a named capture.

m = /(.)(.)(\d+)(\d)/.match("THX1138.")
m.offset(0)      #=> [1, 7]
m.offset(4)      #=> [6, 7]

m = /(?<foo>.)(.)(?<bar>.)/.match("hoge")
p m.offset(:foo) #=> [0, 1]
p m.offset(:bar) #=> [2, 3]
 
               static VALUE
match_offset(VALUE match, VALUE n)
{
    int i = match_backref_number(match, n);
    struct re_registers *regs = RMATCH_REGS(match);

    match_check(match);
    if (i < 0 || regs->num_regs <= i)
        rb_raise(rb_eIndexError, "index %d out of matches", i);

    if (BEG(i) < 0)
        return rb_assoc_new(Qnil, Qnil);

    update_char_offset(match);
    return rb_assoc_new(INT2FIX(RMATCH(match)->rmatch->char_offset[i].beg),
                        INT2FIX(RMATCH(match)->rmatch->char_offset[i].end));
}
            
post_match → str click to toggle source

Returns the portion of the original string after the current match. Equivalent to the special variable $'.

m = /(.)(.)(\d+)(\d)/.match("THX1138: The Movie")
m.post_match   #=> ": The Movie"
 
               VALUE
rb_reg_match_post(VALUE match)
{
    VALUE str;
    long pos;
    struct re_registers *regs;

    if (NIL_P(match)) return Qnil;
    match_check(match);
    regs = RMATCH_REGS(match);
    if (BEG(0) == -1) return Qnil;
    str = RMATCH(match)->str;
    pos = END(0);
    str = rb_str_subseq(str, pos, RSTRING_LEN(str) - pos);
    if (OBJ_TAINTED(match)) OBJ_TAINT(str);
    return str;
}
            
pre_match → str click to toggle source

Returns the portion of the original string before the current match. Equivalent to the special variable $`.

m = /(.)(.)(\d+)(\d)/.match("THX1138.")
m.pre_match   #=> "T"
 
               VALUE
rb_reg_match_pre(VALUE match)
{
    VALUE str;
    struct re_registers *regs;

    if (NIL_P(match)) return Qnil;
    match_check(match);
    regs = RMATCH_REGS(match);
    if (BEG(0) == -1) return Qnil;
    str = rb_str_subseq(RMATCH(match)->str, 0, BEG(0));
    if (OBJ_TAINTED(match)) OBJ_TAINT(str);
    return str;
}
            
regexp → regexp click to toggle source

Returns the regexp.

m = /a.*b/.match("abc")
m.regexp #=> /a.*b/
 
               static VALUE
match_regexp(VALUE match)
{
    match_check(match);
    return RMATCH(match)->regexp;
}
            
length → integer click to toggle source
size → integer

Returns the number of elements in the match array.

m = /(.)(.)(\d+)(\d)/.match("THX1138.")
m.length   #=> 5
m.size     #=> 5
 
               static VALUE
match_size(VALUE match)
{
    match_check(match);
    return INT2FIX(RMATCH_REGS(match)->num_regs);
}
            
string → str click to toggle source

Returns a frozen copy of the string passed in to match.

m = /(.)(.)(\d+)(\d)/.match("THX1138.")
m.string   #=> "THX1138."
 
               static VALUE
match_string(VALUE match)
{
    match_check(match);
    return RMATCH(match)->str;  /* str is frozen */
}
            
to_a → anArray click to toggle source

Returns the array of matches.

m = /(.)(.)(\d+)(\d)/.match("THX1138.")
m.to_a   #=> ["HX1138", "H", "X", "113", "8"]

Because to_a is called when expanding *variable, there’s a useful assignment shortcut for extracting matched fields. This is slightly slower than accessing the fields directly (as an intermediate array is generated).

all,f1,f2,f3 = *(/(.)(.)(\d+)(\d)/.match("THX1138."))
all   #=> "HX1138"
f1    #=> "H"
f2    #=> "X"
f3    #=> "113"
 
               static VALUE
match_to_a(VALUE match)
{
    return match_array(match, 0);
}
            
to_s → str click to toggle source

Returns the entire matched string.

m = /(.)(.)(\d+)(\d)/.match("THX1138.")
m.to_s   #=> "HX1138"
 
               static VALUE
match_to_s(VALUE match)
{
    VALUE str = rb_reg_last_match(match);

    match_check(match);
    if (NIL_P(str)) str = rb_str_new(0,0);
    if (OBJ_TAINTED(match)) OBJ_TAINT(str);
    if (OBJ_TAINTED(RMATCH(match)->str)) OBJ_TAINT(str);
    return str;
}
            
values_at([index]*) → array click to toggle source

Uses each index to access the matching values, returning an array of the corresponding matches.

m = /(.)(.)(\d+)(\d)/.match("THX1138: The Movie")
m.to_a               #=> ["HX1138", "H", "X", "113", "8"]
m.values_at(0, 2, -2)   #=> ["HX1138", "X", "113"]
 
               static VALUE
match_values_at(int argc, VALUE *argv, VALUE match)
{
    struct re_registers *regs;

    match_check(match);
    regs = RMATCH_REGS(match);
    return rb_get_values_at(match, regs->num_regs, argc, argv, match_entry);
}
            

Commenting is here to help enhance the documentation. For example, sample code, or clarification of the documentation.

If you have questions about Ruby or the documentation, please post to one of the Ruby mailing lists. You will get better, faster, help that way.

If you wish to post a correction of the docs, please do so, but also file bug report so that it can be corrected for the next release. Thank you.

blog comments powered by Disqus