In Files

  • re.c

MatchData

MatchData is the type of the special variable $~, and is the type of the object returned by Regexp#match and Regexp.last_match. It encapsulates all the results of a pattern match, results normally accessed through the special variables $&, $', $`, $1, $2, and so on.

Public Instance Methods

mtch[i] => str or nil click to toggle source
mtch[start, length] => array
mtch[range] => array
mtch[name] => str or nil

Match Reference—MatchData acts as an array, and may be accessed using the normal array indexing techniques. mtch is equivalent to the special variable $&, and returns the entire matched string. mtch, mtch, and so on return the values of the matched backreferences (portions of the pattern between parentheses).

m = /(.)(.)(\d+)(\d)/.match("THX1138.")
m          #=> #<MatchData "HX1138" 1:"H" 2:"X" 3:"113" 4:"8">
m[0]       #=> "HX1138"
m[1, 2]    #=> ["H", "X"]
m[1..3]    #=> ["H", "X", "113"]
m[-3, 2]   #=> ["X", "113"]

m = /(?<foo>a+)b/.match("ccaaab")
m          #=> #<MatchData "aaab" foo:"aaa">
m["foo"]   #=> "aaa"
m[:foo]    #=> "aaa"
 
               static VALUE
match_aref(int argc, VALUE *argv, VALUE match)
{
    VALUE idx, rest;

    match_check(match);
    rb_scan_args(argc, argv, "11", &idx, &rest);

    if (NIL_P(rest)) {
      if (FIXNUM_P(idx)) {
        if (FIX2INT(idx) >= 0) {
          return rb_reg_nth_match(FIX2INT(idx), match);
        }
      }
      else {
        const char *p;
        int num;

        switch (TYPE(idx)) {
          case T_SYMBOL:
            p = rb_id2name(SYM2ID(idx));
            goto name_to_backref;
            break;
          case T_STRING:
            p = StringValuePtr(idx);

          name_to_backref:
            num = name_to_backref_number(RMATCH_REGS(match),
                       RMATCH(match)->regexp, p, p + strlen(p));
            return rb_reg_nth_match(num, match);
            break;

          default:
            break;
        }
      }
    }

    return rb_ary_aref(argc, argv, match_to_a(match));
}
            
begin(n) => integer click to toggle source

Returns the offset of the start of the nth element of the match array in the string. n can be a string or symbol to reference a named capture.

m = /(.)(.)(\d+)(\d)/.match("THX1138.")
m.begin(0)       #=> 1
m.begin(2)       #=> 2

m = /(?<foo>.)(.)(?<bar>.)/.match("hoge")
p m.begin(:foo)  #=> 0
p m.begin(:bar)  #=> 2
 
               static VALUE
match_begin(VALUE match, VALUE n)
{
    int i = match_backref_number(match, n);
    struct re_registers *regs = RMATCH_REGS(match);

    match_check(match);
    if (i < 0 || regs->num_regs <= i)
        rb_raise(rb_eIndexError, "index %d out of matches", i);

    if (BEG(i) < 0)
        return Qnil;

    update_char_offset(match);
    return INT2FIX(RMATCH(match)->rmatch->char_offset[i].beg);
}
            
captures => array click to toggle source

Returns the array of captures; equivalent to mtch.to_a[1..-1].

f1,f2,f3,f4 = /(.)(.)(\d+)(\d)/.match("THX1138.").captures
f1    #=> "H"
f2    #=> "X"
f3    #=> "113"
f4    #=> "8"
 
               static VALUE
match_captures(VALUE match)
{
    return match_array(match, 1);
}
            
end(n) => integer click to toggle source

Returns the offset of the character immediately following the end of the nth element of the match array in the string. n can be a string or symbol to reference a named capture.

m = /(.)(.)(\d+)(\d)/.match("THX1138.")
m.end(0)         #=> 7
m.end(2)         #=> 3

m = /(?<foo>.)(.)(?<bar>.)/.match("hoge")
p m.end(:foo)    #=> 1
p m.end(:bar)    #=> 3
 
               static VALUE
match_end(VALUE match, VALUE n)
{
    int i = match_backref_number(match, n);
    struct re_registers *regs = RMATCH_REGS(match);

    match_check(match);
    if (i < 0 || regs->num_regs <= i)
        rb_raise(rb_eIndexError, "index %d out of matches", i);

    if (BEG(i) < 0)
        return Qnil;

    update_char_offset(match);
    return INT2FIX(RMATCH(match)->rmatch->char_offset[i].end);
}
            
inspect => str click to toggle source

Returns a printable version of mtch.

puts /.$/.match("foo").inspect
#=> #<MatchData "o">

puts /(.)(.)(.)/.match("foo").inspect
#=> #<MatchData "foo" 1:"f" 2:"o" 3:"o">

puts /(.)(.)?(.)/.match("fo").inspect
#=> #<MatchData "fo" 1:"f" 2:nil 3:"o">

puts /(?<foo>.)(?<bar>.)(?<baz>.)/.match("hoge").inspect
#=> #<MatchData "hog" foo:"h" bar:"o" baz:"g">
 
               static VALUE
match_inspect(VALUE match)
{
    const char *cname = rb_obj_classname(match);
    VALUE str;
    int i;
    struct re_registers *regs = RMATCH_REGS(match);
    int num_regs = regs->num_regs;
    struct backref_name_tag *names;
    VALUE regexp = RMATCH(match)->regexp;

    if (regexp == 0) {
        return rb_sprintf("#<%s:%p>", cname, (void*)match);
    }

    names = ALLOCA_N(struct backref_name_tag, num_regs);
    MEMZERO(names, struct backref_name_tag, num_regs);

    onig_foreach_name(RREGEXP(regexp)->ptr,
            match_inspect_name_iter, names);

    str = rb_str_buf_new2("#<");
    rb_str_buf_cat2(str, cname);

    for (i = 0; i < num_regs; i++) {
        VALUE v;
        rb_str_buf_cat2(str, " ");
        if (0 < i) {
            if (names[i].name)
                rb_str_buf_cat(str, (const char *)names[i].name, names[i].len);
            else {
                rb_str_catf(str, "%d", i);
            }
            rb_str_buf_cat2(str, ":");
        }
        v = rb_reg_nth_match(i, match);
        if (v == Qnil)
            rb_str_buf_cat2(str, "nil");
        else
            rb_str_buf_append(str, rb_str_inspect(v));
    }
    rb_str_buf_cat2(str, ">");

    return str;
}
            
length => integer click to toggle source

Returns the number of elements in the match array.

m = /(.)(.)(\d+)(\d)/.match("THX1138.")
m.length   #=> 5
m.size     #=> 5
 
               static VALUE
match_size(VALUE match)
{
    match_check(match);
    return INT2FIX(RMATCH_REGS(match)->num_regs);
}
            
names => [name1, name2, ...] click to toggle source

Returns a list of names of captures as an array of strings. It is same as mtch.regexp.names.

/(?<foo>.)(?<bar>.)(?<baz>.)/.match("hoge").names
#=> ["foo", "bar", "baz"]

m = /(?<x>.)(?<y>.)?/.match("a") #=> #<MatchData "a" x:"a" y:nil>
m.names                          #=> ["x", "y"]
 
               static VALUE
match_names(VALUE match)
{
    match_check(match);
    return rb_reg_names(RMATCH(match)->regexp);
}
            
offset(n) => array click to toggle source

Returns a two-element array containing the beginning and ending offsets of the nth match. n can be a string or symbol to reference a named capture.

m = /(.)(.)(\d+)(\d)/.match("THX1138.")
m.offset(0)      #=> [1, 7]
m.offset(4)      #=> [6, 7]

m = /(?<foo>.)(.)(?<bar>.)/.match("hoge")
p m.offset(:foo) #=> [0, 1]
p m.offset(:bar) #=> [2, 3]
 
               static VALUE
match_offset(VALUE match, VALUE n)
{
    int i = match_backref_number(match, n);
    struct re_registers *regs = RMATCH_REGS(match);

    match_check(match);
    if (i < 0 || regs->num_regs <= i)
        rb_raise(rb_eIndexError, "index %d out of matches", i);

    if (BEG(i) < 0)
        return rb_assoc_new(Qnil, Qnil);

    update_char_offset(match);
    return rb_assoc_new(INT2FIX(RMATCH(match)->rmatch->char_offset[i].beg),
                        INT2FIX(RMATCH(match)->rmatch->char_offset[i].end));
}
            
post_match => str click to toggle source

Returns the portion of the original string after the current match. Equivalent to the special variable $'.

m = /(.)(.)(\d+)(\d)/.match("THX1138: The Movie")
m.post_match   #=> ": The Movie"
 
               VALUE
rb_reg_match_post(VALUE match)
{
    VALUE str;
    long pos;
    struct re_registers *regs;

    if (NIL_P(match)) return Qnil;
    match_check(match);
    regs = RMATCH_REGS(match);
    if (BEG(0) == -1) return Qnil;
    str = RMATCH(match)->str;
    pos = END(0);
    str = rb_str_subseq(str, pos, RSTRING_LEN(str) - pos);
    if (OBJ_TAINTED(match)) OBJ_TAINT(str);
    return str;
}
            
pre_match => str click to toggle source

Returns the portion of the original string before the current match. Equivalent to the special variable $`.

m = /(.)(.)(\d+)(\d)/.match("THX1138.")
m.pre_match   #=> "T"
 
               VALUE
rb_reg_match_pre(VALUE match)
{
    VALUE str;
    struct re_registers *regs;

    if (NIL_P(match)) return Qnil;
    match_check(match);
    regs = RMATCH_REGS(match);
    if (BEG(0) == -1) return Qnil;
    str = rb_str_subseq(RMATCH(match)->str, 0, BEG(0));
    if (OBJ_TAINTED(match)) OBJ_TAINT(str);
    return str;
}
            
regexp => regexp click to toggle source

Returns the regexp.

m = /a.*b/.match("abc")
m.regexp #=> /a.*b/
 
               static VALUE
match_regexp(VALUE match)
{
    match_check(match);
    return RMATCH(match)->regexp;
}
            
size => integer click to toggle source

Returns the number of elements in the match array.

m = /(.)(.)(\d+)(\d)/.match("THX1138.")
m.length   #=> 5
m.size     #=> 5
 
               static VALUE
match_size(VALUE match)
{
    match_check(match);
    return INT2FIX(RMATCH_REGS(match)->num_regs);
}
            
string => str click to toggle source

Returns a frozen copy of the string passed in to match.

m = /(.)(.)(\d+)(\d)/.match("THX1138.")
m.string   #=> "THX1138."
 
               static VALUE
match_string(VALUE match)
{
    match_check(match);
    return RMATCH(match)->str;  /* str is frozen */
}
            
to_a => anArray click to toggle source

Returns the array of matches.

m = /(.)(.)(\d+)(\d)/.match("THX1138.")
m.to_a   #=> ["HX1138", "H", "X", "113", "8"]

Because to_a is called when expanding *variable, there's a useful assignment shortcut for extracting matched fields. This is slightly slower than accessing the fields directly (as an intermediate array is generated).

all,f1,f2,f3 = *(/(.)(.)(\d+)(\d)/.match("THX1138."))
all   #=> "HX1138"
f1    #=> "H"
f2    #=> "X"
f3    #=> "113"
 
               static VALUE
match_to_a(VALUE match)
{
    return match_array(match, 0);
}
            
to_s => str click to toggle source

Returns the entire matched string.

m = /(.)(.)(\d+)(\d)/.match("THX1138.")
m.to_s   #=> "HX1138"
 
               static VALUE
match_to_s(VALUE match)
{
    VALUE str = rb_reg_last_match(match);

    match_check(match);
    if (NIL_P(str)) str = rb_str_new(0,0);
    if (OBJ_TAINTED(match)) OBJ_TAINT(str);
    if (OBJ_TAINTED(RMATCH(match)->str)) OBJ_TAINT(str);
    return str;
}
            
values_at([index]*) => array click to toggle source

Uses each index to access the matching values, returning an array of the corresponding matches.

m = /(.)(.)(\d+)(\d)/.match("THX1138: The Movie")
m.to_a               #=> ["HX1138", "H", "X", "113", "8"]
m.values_at(0, 2, -2)   #=> ["HX1138", "X", "113"]
 
               static VALUE
match_values_at(int argc, VALUE *argv, VALUE match)
{
    struct re_registers *regs = RMATCH_REGS(match);
    match_check(match);
    return rb_get_values_at(match, regs->num_regs, argc, argv, match_entry);
}