In Files

  • re.c

MatchData

MatchData is the type of the special variable $~, and is the type of the object returned by Regexp#match and Regexp#last_match. It encapsulates all the results of a pattern match, results normally accessed through the special variables $&, $', $`, $1, $2, and so on. Matchdata is also known as MatchingData.

Public Instance Methods

mtch[i] => obj click to toggle source
mtch[start, length] => array
mtch[range] => array

Match Reference—MatchData acts as an array, and may be accessed using the normal array indexing techniques. mtch is equivalent to the special variable $&, and returns the entire matched string. mtch, mtch, and so on return the values of the matched backreferences (portions of the pattern between parentheses).

m = /(.)(.)(\d+)(\d)/.match("THX1138.")
m[0]       #=> "HX1138"
m[1, 2]    #=> ["H", "X"]
m[1..3]    #=> ["H", "X", "113"]
m[-3, 2]   #=> ["X", "113"]
 
               static VALUE
match_aref(argc, argv, match)
    int argc;
    VALUE *argv;
    VALUE match;
{
    VALUE idx, rest;

    rb_scan_args(argc, argv, "11", &idx, &rest);

    if (!NIL_P(rest) || !FIXNUM_P(idx) || FIX2INT(idx) < 0) {
        return rb_ary_aref(argc, argv, match_to_a(match));
    }
    return rb_reg_nth_match(FIX2INT(idx), match);
}
            
begin(n) => integer click to toggle source

Returns the offset of the start of the nth element of the match array in the string.

m = /(.)(.)(\d+)(\d)/.match("THX1138.")
m.begin(0)   #=> 1
m.begin(2)   #=> 2
 
               static VALUE
match_begin(match, n)
    VALUE match, n;
{
    int i = NUM2INT(n);

    match_check(match);
    if (i < 0 || RMATCH(match)->regs->num_regs <= i)
        rb_raise(rb_eIndexError, "index %d out of matches", i);

    if (RMATCH(match)->regs->beg[i] < 0)
        return Qnil;

    return INT2FIX(RMATCH(match)->regs->beg[i]);
}
            
captures => array click to toggle source

Returns the array of captures; equivalent to mtch.to_a[1..-1].

f1,f2,f3,f4 = /(.)(.)(\d+)(\d)/.match("THX1138.").captures
f1    #=> "H"
f2    #=> "X"
f3    #=> "113"
f4    #=> "8"
 
               static VALUE
match_captures(match)
    VALUE match;
{
    return match_array(match, 1);
}
            
end(n) => integer click to toggle source

Returns the offset of the character immediately following the end of the nth element of the match array in the string.

m = /(.)(.)(\d+)(\d)/.match("THX1138.")
m.end(0)   #=> 7
m.end(2)   #=> 3
 
               static VALUE
match_end(match, n)
    VALUE match, n;
{
    int i = NUM2INT(n);

    match_check(match);
    if (i < 0 || RMATCH(match)->regs->num_regs <= i)
        rb_raise(rb_eIndexError, "index %d out of matches", i);

    if (RMATCH(match)->regs->beg[i] < 0)
        return Qnil;

    return INT2FIX(RMATCH(match)->regs->end[i]);
}
            
to_s => string click to toggle source

Returns a string representing obj. The default to_s prints the object's class and an encoding of the object id. As a special case, the top-level object that is the initial execution context of Ruby programs returns "main.''

 
               VALUE
rb_any_to_s(obj)
    VALUE obj;
{
    char *cname = rb_obj_classname(obj);
    size_t len;
    VALUE str;

    len = strlen(cname)+6+16;
    str = rb_str_new(0, len); /* 6:tags 16:addr */
    snprintf(RSTRING(str)->ptr, len+1, "#<%s:0x%lx>", cname, obj);
    RSTRING(str)->len = strlen(RSTRING(str)->ptr);
    if (OBJ_TAINTED(obj)) OBJ_TAINT(str);

    return str;
}
            
length => integer click to toggle source

Returns the number of elements in the match array.

m = /(.)(.)(\d+)(\d)/.match("THX1138.")
m.length   #=> 5
m.size     #=> 5
 
               static VALUE
match_size(match)
    VALUE match;
{
    match_check(match);
    return INT2FIX(RMATCH(match)->regs->num_regs);
}
            
offset(n) => array click to toggle source

Returns a two-element array containing the beginning and ending offsets of the nth match.

m = /(.)(.)(\d+)(\d)/.match("THX1138.")
m.offset(0)   #=> [1, 7]
m.offset(4)   #=> [6, 7]
 
               static VALUE
match_offset(match, n)
    VALUE match, n;
{
    int i = NUM2INT(n);

    match_check(match);
    if (i < 0 || RMATCH(match)->regs->num_regs <= i)
        rb_raise(rb_eIndexError, "index %d out of matches", i);

    if (RMATCH(match)->regs->beg[i] < 0)
        return rb_assoc_new(Qnil, Qnil);

    return rb_assoc_new(INT2FIX(RMATCH(match)->regs->beg[i]),
                        INT2FIX(RMATCH(match)->regs->end[i]));
}
            
post_match => str click to toggle source

Returns the portion of the original string after the current match. Equivalent to the special variable $'.

m = /(.)(.)(\d+)(\d)/.match("THX1138: The Movie")
m.post_match   #=> ": The Movie"
 
               VALUE
rb_reg_match_post(match)
    VALUE match;
{
    VALUE str;
    long pos;

    if (NIL_P(match)) return Qnil;
    match_check(match);
    if (RMATCH(match)->BEG(0) == -1) return Qnil;
    str = RMATCH(match)->str;
    pos = RMATCH(match)->END(0);
    str = rb_str_substr(str, pos, RSTRING(str)->len - pos);
    if (OBJ_TAINTED(match)) OBJ_TAINT(str);
    return str;
}
            
pre_match => str click to toggle source

Returns the portion of the original string before the current match. Equivalent to the special variable $`.

m = /(.)(.)(\d+)(\d)/.match("THX1138.")
m.pre_match   #=> "T"
 
               VALUE
rb_reg_match_pre(match)
    VALUE match;
{
    VALUE str;

    if (NIL_P(match)) return Qnil;
    match_check(match);
    if (RMATCH(match)->BEG(0) == -1) return Qnil;
    str = rb_str_substr(RMATCH(match)->str, 0, RMATCH(match)->BEG(0));
    if (OBJ_TAINTED(match)) OBJ_TAINT(str);
    return str;
}
            
select([index]*) => array click to toggle source

Uses each index to access the matching values, returning an array of the corresponding matches.

m = /(.)(.)(\d+)(\d)/.match("THX1138: The Movie")
m.to_a               #=> ["HX1138", "H", "X", "113", "8"]
m.select(0, 2, -2)   #=> ["HX1138", "X", "113"]
 
               static VALUE
match_select(argc, argv, match)
    int argc;
    VALUE *argv;
    VALUE match;
{
    if (argc > 0) {
        rb_raise(rb_eArgError, "wrong number of arguments (%d for 0)", argc);
    }
    else {
        struct re_registers *regs;
        VALUE target;
        VALUE result = rb_ary_new();
        int i;
        int taint = OBJ_TAINTED(match);

        match_check(match);
        regs = RMATCH(match)->regs;
        target = RMATCH(match)->str;

        for (i=0; i<regs->num_regs; i++) {
            VALUE str = rb_str_substr(target, regs->beg[i], regs->end[i]-regs->beg[i]);
            if (taint) OBJ_TAINT(str);
            if (RTEST(rb_yield(str))) {
                rb_ary_push(result, str);
            }
        }
        return result;
    }
}
            
size => integer click to toggle source

Returns the number of elements in the match array.

m = /(.)(.)(\d+)(\d)/.match("THX1138.")
m.length   #=> 5
m.size     #=> 5
 
               static VALUE
match_size(match)
    VALUE match;
{
    match_check(match);
    return INT2FIX(RMATCH(match)->regs->num_regs);
}
            
string => str click to toggle source

Returns a frozen copy of the string passed in to match.

m = /(.)(.)(\d+)(\d)/.match("THX1138.")
m.string   #=> "THX1138."
 
               static VALUE
match_string(match)
    VALUE match;
{
    match_check(match);
    return RMATCH(match)->str;  /* str is frozen */
}
            
to_a => anArray click to toggle source

Returns the array of matches.

m = /(.)(.)(\d+)(\d)/.match("THX1138.")
m.to_a   #=> ["HX1138", "H", "X", "113", "8"]

Because to_a is called when expanding *variable, there's a useful assignment shortcut for extracting matched fields. This is slightly slower than accessing the fields directly (as an intermediate array is generated).

all,f1,f2,f3 = *(/(.)(.)(\d+)(\d)/.match("THX1138."))
all   #=> "HX1138"
f1    #=> "H"
f2    #=> "X"
f3    #=> "113"
 
               static VALUE
match_to_a(match)
    VALUE match;
{
    return match_array(match, 0);
}
            
to_s => str click to toggle source

Returns the entire matched string.

m = /(.)(.)(\d+)(\d)/.match("THX1138.")
m.to_s   #=> "HX1138"
 
               static VALUE
match_to_s(match)
    VALUE match;
{
    VALUE str = rb_reg_last_match(match);

    if (NIL_P(str)) str = rb_str_new(0,0);
    if (OBJ_TAINTED(match)) OBJ_TAINT(str);
    if (OBJ_TAINTED(RMATCH(match)->str)) OBJ_TAINT(str);
    return str;
}
            
select([index]*) => array click to toggle source

Uses each index to access the matching values, returning an array of the corresponding matches.

m = /(.)(.)(\d+)(\d)/.match("THX1138: The Movie")
m.to_a               #=> ["HX1138", "H", "X", "113", "8"]
m.select(0, 2, -2)   #=> ["HX1138", "X", "113"]
 
               static VALUE
match_values_at(argc, argv, match)
    int argc;
    VALUE *argv;
    VALUE match;
{
    match_check(match);
    return rb_values_at(match, RMATCH(match)->regs->num_regs, argc, argv, match_entry);
}