In Files

  • rdoc/parser/ruby.rb

Files

Class/Module Index [+]

Quicksearch

RDoc::RubyLex

Lexical analyzer for Ruby source

Attributes

continue[R]
exception_on_syntax_error[RW]
indent[R]
lex_state[R]
read_auto_clean_up[RW]
skip_space[RW]

Public Class Methods

debug?() click to toggle source
 
               # File rdoc/parser/ruby.rb, line 446
def self.debug?
  false
end
            
new(content, options) click to toggle source
 
               # File rdoc/parser/ruby.rb, line 450
def initialize(content, options)
  lex_init

  @options = options

  @reader = BufferedReader.new content, @options

  @exp_line_no = @line_no = 1
  @base_char_no = 0
  @indent = 0

  @ltype = nil
  @quoted = nil
  @lex_state = EXPR_BEG
  @space_seen = false

  @continue = false
  @line = ""

  @skip_space = false
  @read_auto_clean_up = false
  @exception_on_syntax_error = true
end
            

Public Instance Methods

char_no() click to toggle source
 
               # File rdoc/parser/ruby.rb, line 484
def char_no
  @reader.column
end
            
get_read() click to toggle source
 
               # File rdoc/parser/ruby.rb, line 488
def get_read
  @reader.get_read
end
            
getc() click to toggle source
 
               # File rdoc/parser/ruby.rb, line 492
def getc
  @reader.getc
end
            
getc_of_rests() click to toggle source
 
               # File rdoc/parser/ruby.rb, line 496
def getc_of_rests
  @reader.getc_already_read
end
            
gets() click to toggle source
 
               # File rdoc/parser/ruby.rb, line 500
def gets
  c = getc or return
  l = ""
  begin
    l.concat c unless c == "\r"
    break if c == "\n"
  end while c = getc
  l
end
            
identify_comment() click to toggle source
 
               # File rdoc/parser/ruby.rb, line 1256
def identify_comment
  @ltype = "#"
  comment = "#"
  while ch = getc
    if ch == "\\"
      ch = getc
      if ch == "\n"
        ch = " "
      else
        comment << "\\"
      end
    else
      if ch == "\n"
        @ltype = nil
        ungetc
        break
      end
    end
    comment << ch
  end
  return Token(TkCOMMENT).set_text(comment)
end
            
identify_gvar() click to toggle source
 
               # File rdoc/parser/ruby.rb, line 951
def identify_gvar
  @lex_state = EXPR_END
  str = "$"

  tk = case ch = getc
       when /[~_*$?!@\/\;,=:<>".]/   #"
         str << ch
         Token(TkGVAR, str)

       when "-"
         str << "-" << getc
         Token(TkGVAR, str)

       when "&", "`", "'", "+"
         str << ch
         Token(TkBACK_REF, str)

       when /[1-9]/
         str << ch
         while (ch = getc) =~ /[0-9]/
           str << ch
         end
         ungetc
         Token(TkNTH_REF)
       when /\w/
         ungetc
         ungetc
         return identify_identifier
       else
         ungetc
         Token("$")
       end
  tk.set_text(str)
end
            
identify_here_document() click to toggle source
 
               # File rdoc/parser/ruby.rb, line 1061
def identify_here_document
  ch = getc
  if ch == "-"
    ch = getc
    indent = true
  end
  if /['"`]/ =~ ch            # '
    lt = ch
    quoted = ""
    while (c = getc) && c != lt
      quoted.concat c
    end
  else
    lt = '"'
    quoted = ch.dup
    while (c = getc) && c =~ /\w/
      quoted.concat c
    end
    ungetc
  end

  ltback, @ltype = @ltype, lt
  reserve = ""

  while ch = getc
    reserve << ch
    if ch == "\\"    #"
      ch = getc
      reserve << ch
    elsif ch == "\n"
      break
    end
  end

  str = ""
  while (l = gets)
    l.chomp!
    l.strip! if indent
    break if l == quoted
    str << l.chomp << "\n"
  end

  @reader.divert_read_from(reserve)

  @ltype = ltback
  @lex_state = EXPR_END
  Token(Ltype2Token[lt], str).set_text(str.dump)
end
            
identify_identifier() click to toggle source
 
               # File rdoc/parser/ruby.rb, line 986
def identify_identifier
  token = ""
  token.concat getc if peek(0) =~ /[$@]/
  token.concat getc if peek(0) == "@"

  while (ch = getc) =~ /\w|_/
    print ":", ch, ":" if RDoc::RubyLex.debug?
    token.concat ch
  end
  ungetc

  if ch == "!" or ch == "?"
    token.concat getc
  end
  # fix token

  # $stderr.puts "identifier - #{token}, state = #@lex_state"

  case token
  when /^\$/
    return Token(TkGVAR, token).set_text(token)
  when /^\@/
    @lex_state = EXPR_END
    return Token(TkIVAR, token).set_text(token)
  end

  if @lex_state != EXPR_DOT
    print token, "\n" if RDoc::RubyLex.debug?

    token_c, *trans = TkReading2Token[token]
    if token_c
      # reserved word?

      if (@lex_state != EXPR_BEG &&
          @lex_state != EXPR_FNAME &&
          trans[1])
        # modifiers
        token_c = TkSymbol2Token[trans[1]]
        @lex_state = trans[0]
      else
        if @lex_state != EXPR_FNAME
          if ENINDENT_CLAUSE.include?(token)
            @indent += 1
          elsif DEINDENT_CLAUSE.include?(token)
            @indent -= 1
          end
          @lex_state = trans[0]
        else
          @lex_state = EXPR_END
        end
      end
      return Token(token_c, token).set_text(token)
    end
  end

  if @lex_state == EXPR_FNAME
    @lex_state = EXPR_END
    if peek(0) == '='
      token.concat getc
    end
  elsif @lex_state == EXPR_BEG || @lex_state == EXPR_DOT
    @lex_state = EXPR_ARG
  else
    @lex_state = EXPR_END
  end

  if token[0, 1] =~ /[A-Z]/
    return Token(TkCONSTANT, token).set_text(token)
  elsif token[token.size - 1, 1] =~ /[!?]/
    return Token(TkFID, token).set_text(token)
  else
    return Token(TkIDENTIFIER, token).set_text(token)
  end
end
            
identify_number(start) click to toggle source
 
               # File rdoc/parser/ruby.rb, line 1129
def identify_number(start)
  str = start.dup

  if start == "+" or start == "-" or start == ""
    start = getc
    str << start
  end

  @lex_state = EXPR_END

  if start == "0"
    if peek(0) == "x"
      ch = getc
      str << ch
      match = /[0-9a-f_]/
    else
      match = /[0-7_]/
    end
    while ch = getc
      if ch !~ match
        ungetc
        break
      else
        str << ch
      end
    end
    return Token(TkINTEGER).set_text(str)
  end

  type = TkINTEGER
  allow_point = TRUE
  allow_e = TRUE
  while ch = getc
    case ch
    when /[0-9_]/
      str << ch

    when allow_point && "."
      type = TkFLOAT
      if peek(0) !~ /[0-9]/
        ungetc
        break
      end
      str << ch
      allow_point = false

    when allow_e && "e", allow_e && "E"
      str << ch
      type = TkFLOAT
      if peek(0) =~ /[+-]/
        str << getc
      end
      allow_e = false
      allow_point = false
    else
      ungetc
      break
    end
  end
  Token(type).set_text(str)
end
            
identify_quotation(initial_char) click to toggle source
 
               # File rdoc/parser/ruby.rb, line 1110
  def identify_quotation(initial_char)
    ch = getc
    if lt = PERCENT_LTYPE[ch]
      initial_char += ch
      ch = getc
    elsif ch =~ /\W/
      lt = "\""
    else
      fail SyntaxError, "unknown type of %string ('#{ch}')"
    end
#     if ch !~ /\W/
#       ungetc
#       next
#     end
    #@ltype = lt
    @quoted = ch unless @quoted = PERCENT_PAREN[ch]
    identify_string(lt, @quoted, ch, initial_char)
  end
            
identify_string(ltype, quoted = ltype, opener=nil, initial_char = nil) click to toggle source
 
               # File rdoc/parser/ruby.rb, line 1191
def identify_string(ltype, quoted = ltype, opener=nil, initial_char = nil)
  @ltype = ltype
  @quoted = quoted
  subtype = nil

  str = ""
  str << initial_char if initial_char
  str << (opener||quoted)

  nest = 0
  begin
    while ch = getc
      str << ch
      if @quoted == ch
        if nest == 0
          break
        else
          nest -= 1
        end
      elsif opener == ch
        nest += 1
      elsif @ltype != "'" && @ltype != "]" and ch == "#"
        ch = getc
        if ch == "{"
          subtype = true
          str << ch << skip_inner_expression
        else
          ungetc(ch)
        end
      elsif ch == '\' #'
        str << read_escape
      end
    end
    if @ltype == "/"
      if peek(0) =~ /i|o|n|e|s/
        str << getc
      end
    end
    if subtype
      Token(DLtype2Token[ltype], str)
    else
      Token(Ltype2Token[ltype], str)
    end.set_text(str)
  ensure
    @ltype = nil
    @quoted = nil
    @lex_state = EXPR_END
  end
end
            
lex() click to toggle source
 
               # File rdoc/parser/ruby.rb, line 523
  def lex
    until (TkNL === (tk = token) or TkEND_OF_SCRIPT === tk) and
           not @continue or tk.nil?
    end

    line = get_read

    if line == "" and TkEND_OF_SCRIPT === tk or tk.nil? then
      nil
    else
      line
    end
  end

  def token
    set_token_position(line_no, char_no)
    begin
      begin
        tk = @OP.match(self)
        @space_seen = TkSPACE === tk
      rescue SyntaxError => e
        raise RDoc::Error, "syntax error: #{e.message}" if
          @exception_on_syntax_error

        tk = TkError.new(line_no, char_no)
      end
    end while @skip_space and TkSPACE === tk
    if @read_auto_clean_up
      get_read
    end
#   throw :eof unless tk
    tk
  end

  ENINDENT_CLAUSE = [
    "case", "class", "def", "do", "for", "if",
    "module", "unless", "until", "while", "begin" #, "when"
  ]
  DEINDENT_CLAUSE = ["end" #, "when"
  ]

  PERCENT_LTYPE = {
    "q" => "\'",
    "Q" => "\"",
    "x" => "\`",
    "r" => "/",
    "w" => "]"
  }

  PERCENT_PAREN = {
    "{" => "}",
    "[" => "]",
    "<" => ">",
    "(" => ")"
  }

  Ltype2Token = {
    "\'" => TkSTRING,
    "\"" => TkSTRING,
    "\`" => TkXSTRING,
    "/" => TkREGEXP,
    "]" => TkDSTRING
  }
  Ltype2Token.default = TkSTRING

  DLtype2Token = {
    "\"" => TkDSTRING,
    "\`" => TkDXSTRING,
    "/" => TkDREGEXP,
  }

  def lex_init()
    @OP = IRB::SLex.new
    @OP.def_rules("\0", "\004", "\032") do |chars, io|
      Token(TkEND_OF_SCRIPT).set_text(chars)
    end

    @OP.def_rules(" ", "\t", "\f", "\r", "\13") do |chars, io|
      @space_seen = TRUE
      while (ch = getc) =~ /[ \t\f\r\13]/
        chars << ch
      end
      ungetc
      Token(TkSPACE).set_text(chars)
    end

    @OP.def_rule("#") do
      |op, io|
      identify_comment
    end

    @OP.def_rule("=begin", proc{@prev_char_no == 0 && peek(0) =~ /\s/}) do
      |op, io|
      str = op
      @ltype = "="


      begin
        line = ""
        begin
          ch = getc
          line << ch
        end until ch == "\n"
        str << line
      end until line =~ /^=end/

      ungetc

      @ltype = nil

      if str =~ /\A=begin\s+rdoc/i
        str.sub!(/\A=begin.*\n/, '')
        str.sub!(/^=end.*/m, '')
        Token(TkCOMMENT).set_text(str)
      else
        Token(TkRD_COMMENT)#.set_text(str)
      end
    end

    @OP.def_rule("\n") do
      print "\\n\n" if RDoc::RubyLex.debug?
      case @lex_state
      when EXPR_BEG, EXPR_FNAME, EXPR_DOT
        @continue = TRUE
      else
        @continue = FALSE
        @lex_state = EXPR_BEG
      end
      Token(TkNL).set_text("\n")
    end

    @OP.def_rules("*", "**",
                  "!", "!=", "!~",
                  "=", "==", "===",
                  "=~", "<=>",
                  "<", "<=",
                  ">", ">=", ">>") do
      |op, io|
      @lex_state = EXPR_BEG
      Token(op).set_text(op)
    end

    @OP.def_rules("<<") do
      |op, io|
      tk = nil
      if @lex_state != EXPR_END && @lex_state != EXPR_CLASS &&
        (@lex_state != EXPR_ARG || @space_seen)
        c = peek(0)
        if /[-\w_\"\\`]/ =~ c
          tk = identify_here_document
        end
      end
      if !tk
        @lex_state = EXPR_BEG
        tk = Token(op).set_text(op)
      end
      tk
    end

    @OP.def_rules("'", '"') do
      |op, io|
      identify_string(op)
    end

    @OP.def_rules("`") do
      |op, io|
      if @lex_state == EXPR_FNAME
        Token(op).set_text(op)
      else
        identify_string(op)
      end
    end

    @OP.def_rules('?') do
      |op, io|
      if @lex_state == EXPR_END
        @lex_state = EXPR_BEG
        Token(TkQUESTION).set_text(op)
      else
        ch = getc
        if @lex_state == EXPR_ARG && ch !~ /\s/
          ungetc
          @lex_state = EXPR_BEG
          Token(TkQUESTION).set_text(op)
        else
          str = op
          str << ch
          if (ch == '\') #'
            str << read_escape
          end
          @lex_state = EXPR_END
          Token(TkINTEGER).set_text(str)
        end
      end
    end

    @OP.def_rules("&", "&&", "|", "||") do
      |op, io|
      @lex_state = EXPR_BEG
      Token(op).set_text(op)
    end

    @OP.def_rules("+=", "-=", "*=", "**=",
                  "&=", "|=", "^=", "<<=", ">>=", "||=", "&&=") do
      |op, io|
      @lex_state = EXPR_BEG
      op =~ /^(.*)=$/
      Token(TkOPASGN, $1).set_text(op)
    end

    @OP.def_rule("+@", proc{@lex_state == EXPR_FNAME}) do |op, io|
      Token(TkUPLUS).set_text(op)
    end

    @OP.def_rule("-@", proc{@lex_state == EXPR_FNAME}) do |op, io|
      Token(TkUMINUS).set_text(op)
    end

    @OP.def_rules("+", "-") do
      |op, io|
      catch(:RET) do
        if @lex_state == EXPR_ARG
          if @space_seen and peek(0) =~ /[0-9]/
            throw :RET, identify_number(op)
          else
            @lex_state = EXPR_BEG
          end
        elsif @lex_state != EXPR_END and peek(0) =~ /[0-9]/
          throw :RET, identify_number(op)
        else
          @lex_state = EXPR_BEG
        end
        Token(op).set_text(op)
      end
    end

    @OP.def_rule(".") do
      @lex_state = EXPR_BEG
      if peek(0) =~ /[0-9]/
        ungetc
        identify_number("")
      else
        # for obj.if
        @lex_state = EXPR_DOT
        Token(TkDOT).set_text(".")
      end
    end

    @OP.def_rules("..", "...") do
      |op, io|
      @lex_state = EXPR_BEG
      Token(op).set_text(op)
    end

    lex_int2
  end

  def lex_int2
    @OP.def_rules("]", "}", ")") do
      |op, io|
      @lex_state = EXPR_END
      @indent -= 1
      Token(op).set_text(op)
    end

    @OP.def_rule(":") do
      if @lex_state == EXPR_END || peek(0) =~ /\s/
        @lex_state = EXPR_BEG
        tk = Token(TkCOLON)
      else
        @lex_state = EXPR_FNAME
        tk = Token(TkSYMBEG)
      end
      tk.set_text(":")
    end

    @OP.def_rule("::") do
      if @lex_state == EXPR_BEG or @lex_state == EXPR_ARG && @space_seen
        @lex_state = EXPR_BEG
        tk = Token(TkCOLON3)
      else
        @lex_state = EXPR_DOT
        tk = Token(TkCOLON2)
      end
      tk.set_text("::")
    end

    @OP.def_rule("/") do
      |op, io|
      if @lex_state == EXPR_BEG || @lex_state == EXPR_MID
        identify_string(op)
      elsif peek(0) == '='
        getc
        @lex_state = EXPR_BEG
        Token(TkOPASGN, :/).set_text("/=") #")
      elsif @lex_state == EXPR_ARG and @space_seen and peek(0) !~ /\s/
        identify_string(op)
      else
        @lex_state = EXPR_BEG
        Token("/").set_text(op)
      end
    end

    @OP.def_rules("^") do
      @lex_state = EXPR_BEG
      Token("^").set_text("^")
    end

    @OP.def_rules(",", ";") do
      |op, io|
      @lex_state = EXPR_BEG
      Token(op).set_text(op)
    end

    @OP.def_rule("~") do
      @lex_state = EXPR_BEG
      Token("~").set_text("~")
    end

    @OP.def_rule("~@", proc{@lex_state = EXPR_FNAME}) do
      @lex_state = EXPR_BEG
      Token("~").set_text("~@")
    end

    @OP.def_rule("(") do
      @indent += 1
      if @lex_state == EXPR_BEG || @lex_state == EXPR_MID
        @lex_state = EXPR_BEG
        tk = Token(TkfLPAREN)
      else
        @lex_state = EXPR_BEG
        tk = Token(TkLPAREN)
      end
      tk.set_text("(")
    end

    @OP.def_rule("[]", proc{@lex_state == EXPR_FNAME}) do
      Token("[]").set_text("[]")
    end

    @OP.def_rule("[]=", proc{@lex_state == EXPR_FNAME}) do
      Token("[]=").set_text("[]=")
    end

    @OP.def_rule("[") do
      @indent += 1
      if @lex_state == EXPR_FNAME
        t = Token(TkfLBRACK)
      else
        if @lex_state == EXPR_BEG || @lex_state == EXPR_MID
          t = Token(TkLBRACK)
        elsif @lex_state == EXPR_ARG && @space_seen
          t = Token(TkLBRACK)
        else
          t = Token(TkfLBRACK)
        end
        @lex_state = EXPR_BEG
      end
      t.set_text("[")
    end

    @OP.def_rule("{") do
      @indent += 1
      if @lex_state != EXPR_END && @lex_state != EXPR_ARG
        t = Token(TkLBRACE)
      else
        t = Token(TkfLBRACE)
      end
      @lex_state = EXPR_BEG
      t.set_text("{")
    end

    @OP.def_rule('\') do   #'
      if getc == "\n"
        @space_seen = true
        @continue = true
        Token(TkSPACE).set_text("\\\n")
      else
        ungetc
        Token("\\").set_text("\\")  #"
      end
    end

    @OP.def_rule('%') do
      |op, io|
      if @lex_state == EXPR_BEG || @lex_state == EXPR_MID
        identify_quotation('%')
      elsif peek(0) == '='
        getc
        Token(TkOPASGN, "%").set_text("%=")
      elsif @lex_state == EXPR_ARG and @space_seen and peek(0) !~ /\s/
        identify_quotation('%')
      else
        @lex_state = EXPR_BEG
        Token("%").set_text("%")
      end
    end

    @OP.def_rule('$') do  #'
      identify_gvar
    end

    @OP.def_rule('@') do
      if peek(0) =~ /[@\w_]/
        ungetc
        identify_identifier
      else
        Token("@").set_text("@")
      end
    end

    @OP.def_rule("__END__", proc{@prev_char_no == 0 && peek(0) =~ /[\r\n]/}) do
      throw :eof
    end

    @OP.def_rule("") do
      |op, io|
      printf "MATCH: start %s: %s\n", op, io.inspect if RDoc::RubyLex.debug?
      if peek(0) =~ /[0-9]/
        t = identify_number("")
      elsif peek(0) =~ /[\w_]/
        t = identify_identifier
      end
      printf "MATCH: end %s: %s\n", op, io.inspect if RDoc::RubyLex.debug?
      t
    end
  end

  def identify_gvar
    @lex_state = EXPR_END
    str = "$"

    tk = case ch = getc
         when /[~_*$?!@\/\;,=:<>".]/   #"
           str << ch
           Token(TkGVAR, str)

         when "-"
           str << "-" << getc
           Token(TkGVAR, str)

         when "&", "`", "'", "+"
           str << ch
           Token(TkBACK_REF, str)

         when /[1-9]/
           str << ch
           while (ch = getc) =~ /[0-9]/
             str << ch
           end
           ungetc
           Token(TkNTH_REF)
         when /\w/
           ungetc
           ungetc
           return identify_identifier
         else
           ungetc
           Token("$")
         end
    tk.set_text(str)
  end

  def identify_identifier
    token = ""
    token.concat getc if peek(0) =~ /[$@]/
    token.concat getc if peek(0) == "@"

    while (ch = getc) =~ /\w|_/
      print ":", ch, ":" if RDoc::RubyLex.debug?
      token.concat ch
    end
    ungetc

    if ch == "!" or ch == "?"
      token.concat getc
    end
    # fix token

    # $stderr.puts "identifier - #{token}, state = #@lex_state"

    case token
    when /^\$/
      return Token(TkGVAR, token).set_text(token)
    when /^\@/
      @lex_state = EXPR_END
      return Token(TkIVAR, token).set_text(token)
    end

    if @lex_state != EXPR_DOT
      print token, "\n" if RDoc::RubyLex.debug?

      token_c, *trans = TkReading2Token[token]
      if token_c
        # reserved word?

        if (@lex_state != EXPR_BEG &&
            @lex_state != EXPR_FNAME &&
            trans[1])
          # modifiers
          token_c = TkSymbol2Token[trans[1]]
          @lex_state = trans[0]
        else
          if @lex_state != EXPR_FNAME
            if ENINDENT_CLAUSE.include?(token)
              @indent += 1
            elsif DEINDENT_CLAUSE.include?(token)
              @indent -= 1
            end
            @lex_state = trans[0]
          else
            @lex_state = EXPR_END
          end
        end
        return Token(token_c, token).set_text(token)
      end
    end

    if @lex_state == EXPR_FNAME
      @lex_state = EXPR_END
      if peek(0) == '='
        token.concat getc
      end
    elsif @lex_state == EXPR_BEG || @lex_state == EXPR_DOT
      @lex_state = EXPR_ARG
    else
      @lex_state = EXPR_END
    end

    if token[0, 1] =~ /[A-Z]/
      return Token(TkCONSTANT, token).set_text(token)
    elsif token[token.size - 1, 1] =~ /[!?]/
      return Token(TkFID, token).set_text(token)
    else
      return Token(TkIDENTIFIER, token).set_text(token)
    end
  end

  def identify_here_document
    ch = getc
    if ch == "-"
      ch = getc
      indent = true
    end
    if /['"`]/ =~ ch            # '
      lt = ch
      quoted = ""
      while (c = getc) && c != lt
        quoted.concat c
      end
    else
      lt = '"'
      quoted = ch.dup
      while (c = getc) && c =~ /\w/
        quoted.concat c
      end
      ungetc
    end

    ltback, @ltype = @ltype, lt
    reserve = ""

    while ch = getc
      reserve << ch
      if ch == "\\"    #"
        ch = getc
        reserve << ch
      elsif ch == "\n"
        break
      end
    end

    str = ""
    while (l = gets)
      l.chomp!
      l.strip! if indent
      break if l == quoted
      str << l.chomp << "\n"
    end

    @reader.divert_read_from(reserve)

    @ltype = ltback
    @lex_state = EXPR_END
    Token(Ltype2Token[lt], str).set_text(str.dump)
  end

  def identify_quotation(initial_char)
    ch = getc
    if lt = PERCENT_LTYPE[ch]
      initial_char += ch
      ch = getc
    elsif ch =~ /\W/
      lt = "\""
    else
      fail SyntaxError, "unknown type of %string ('#{ch}')"
    end
#     if ch !~ /\W/
#       ungetc
#       next
#     end
    #@ltype = lt
    @quoted = ch unless @quoted = PERCENT_PAREN[ch]
    identify_string(lt, @quoted, ch, initial_char)
  end

  def identify_number(start)
    str = start.dup

    if start == "+" or start == "-" or start == ""
      start = getc
      str << start
    end

    @lex_state = EXPR_END

    if start == "0"
      if peek(0) == "x"
        ch = getc
        str << ch
        match = /[0-9a-f_]/
      else
        match = /[0-7_]/
      end
      while ch = getc
        if ch !~ match
          ungetc
          break
        else
          str << ch
        end
      end
      return Token(TkINTEGER).set_text(str)
    end

    type = TkINTEGER
    allow_point = TRUE
    allow_e = TRUE
    while ch = getc
      case ch
      when /[0-9_]/
        str << ch

      when allow_point && "."
        type = TkFLOAT
        if peek(0) !~ /[0-9]/
          ungetc
          break
        end
        str << ch
        allow_point = false

      when allow_e && "e", allow_e && "E"
        str << ch
        type = TkFLOAT
        if peek(0) =~ /[+-]/
          str << getc
        end
        allow_e = false
        allow_point = false
      else
        ungetc
        break
      end
    end
    Token(type).set_text(str)
  end

  def identify_string(ltype, quoted = ltype, opener=nil, initial_char = nil)
    @ltype = ltype
    @quoted = quoted
    subtype = nil

    str = ""
    str << initial_char if initial_char
    str << (opener||quoted)

    nest = 0
    begin
      while ch = getc
        str << ch
        if @quoted == ch
          if nest == 0
            break
          else
            nest -= 1
          end
        elsif opener == ch
          nest += 1
        elsif @ltype != "'" && @ltype != "]" and ch == "#"
          ch = getc
          if ch == "{"
            subtype = true
            str << ch << skip_inner_expression
          else
            ungetc(ch)
          end
        elsif ch == '\' #'
          str << read_escape
        end
      end
      if @ltype == "/"
        if peek(0) =~ /i|o|n|e|s/
          str << getc
        end
      end
      if subtype
        Token(DLtype2Token[ltype], str)
      else
        Token(Ltype2Token[ltype], str)
      end.set_text(str)
    ensure
      @ltype = nil
      @quoted = nil
      @lex_state = EXPR_END
    end
  end

  def skip_inner_expression
    res = ""
    nest = 0
    while (ch = getc)
      res << ch
      if ch == '}'
        break if nest.zero?
        nest -= 1
      elsif ch == '{'
        nest += 1
      end
    end
    res
  end

  def identify_comment
    @ltype = "#"
    comment = "#"
    while ch = getc
      if ch == "\\"
        ch = getc
        if ch == "\n"
          ch = " "
        else
          comment << "\\"
        end
      else
        if ch == "\n"
          @ltype = nil
          ungetc
          break
        end
      end
      comment << ch
    end
    return Token(TkCOMMENT).set_text(comment)
  end

  def read_escape
    res = ""
    case ch = getc
    when /[0-7]/
      ungetc ch
      3.times do
        case ch = getc
        when /[0-7]/
        when nil
          break
        else
          ungetc
          break
        end
        res << ch
      end

    when "x"
      res << ch
      2.times do
        case ch = getc
        when /[0-9a-fA-F]/
        when nil
          break
        else
          ungetc
          break
        end
        res << ch
      end

    when "M"
      res << ch
      if (ch = getc) != '-'
        ungetc
      else
        res << ch
        if (ch = getc) == "\\" #"
          res << ch
          res << read_escape
        else
          res << ch
        end
      end

    when "C", "c" #, "^"
      res << ch
      if ch == "C" and (ch = getc) != "-"
        ungetc
      else
        res << ch
        if (ch = getc) == "\\" #"
          res << ch
          res << read_escape
        else
          res << ch
        end
      end
    else
      res << ch
    end
    res
  end
end
            
lex_init() click to toggle source
 
               # File rdoc/parser/ruby.rb, line 594
def lex_init()
  @OP = IRB::SLex.new
  @OP.def_rules("\0", "\004", "\032") do |chars, io|
    Token(TkEND_OF_SCRIPT).set_text(chars)
  end

  @OP.def_rules(" ", "\t", "\f", "\r", "\13") do |chars, io|
    @space_seen = TRUE
    while (ch = getc) =~ /[ \t\f\r\13]/
      chars << ch
    end
    ungetc
    Token(TkSPACE).set_text(chars)
  end

  @OP.def_rule("#") do
    |op, io|
    identify_comment
  end

  @OP.def_rule("=begin", proc{@prev_char_no == 0 && peek(0) =~ /\s/}) do
    |op, io|
    str = op
    @ltype = "="


    begin
      line = ""
      begin
        ch = getc
        line << ch
      end until ch == "\n"
      str << line
    end until line =~ /^=end/

    ungetc

    @ltype = nil

    if str =~ /\A=begin\s+rdoc/i
      str.sub!(/\A=begin.*\n/, '')
      str.sub!(/^=end.*/m, '')
      Token(TkCOMMENT).set_text(str)
    else
      Token(TkRD_COMMENT)#.set_text(str)
    end
  end

  @OP.def_rule("\n") do
    print "\\n\n" if RDoc::RubyLex.debug?
    case @lex_state
    when EXPR_BEG, EXPR_FNAME, EXPR_DOT
      @continue = TRUE
    else
      @continue = FALSE
      @lex_state = EXPR_BEG
    end
    Token(TkNL).set_text("\n")
  end

  @OP.def_rules("*", "**",
                "!", "!=", "!~",
                "=", "==", "===",
                "=~", "<=>",
                "<", "<=",
                ">", ">=", ">>") do
    |op, io|
    @lex_state = EXPR_BEG
    Token(op).set_text(op)
  end

  @OP.def_rules("<<") do
    |op, io|
    tk = nil
    if @lex_state != EXPR_END && @lex_state != EXPR_CLASS &&
      (@lex_state != EXPR_ARG || @space_seen)
      c = peek(0)
      if /[-\w_\"\\`]/ =~ c
        tk = identify_here_document
      end
    end
    if !tk
      @lex_state = EXPR_BEG
      tk = Token(op).set_text(op)
    end
    tk
  end

  @OP.def_rules("'", '"') do
    |op, io|
    identify_string(op)
  end

  @OP.def_rules("`") do
    |op, io|
    if @lex_state == EXPR_FNAME
      Token(op).set_text(op)
    else
      identify_string(op)
    end
  end

  @OP.def_rules('?') do
    |op, io|
    if @lex_state == EXPR_END
      @lex_state = EXPR_BEG
      Token(TkQUESTION).set_text(op)
    else
      ch = getc
      if @lex_state == EXPR_ARG && ch !~ /\s/
        ungetc
        @lex_state = EXPR_BEG
        Token(TkQUESTION).set_text(op)
      else
        str = op
        str << ch
        if (ch == '\') #'
          str << read_escape
        end
        @lex_state = EXPR_END
        Token(TkINTEGER).set_text(str)
      end
    end
  end

  @OP.def_rules("&", "&&", "|", "||") do
    |op, io|
    @lex_state = EXPR_BEG
    Token(op).set_text(op)
  end

  @OP.def_rules("+=", "-=", "*=", "**=",
                "&=", "|=", "^=", "<<=", ">>=", "||=", "&&=") do
    |op, io|
    @lex_state = EXPR_BEG
    op =~ /^(.*)=$/
    Token(TkOPASGN, $1).set_text(op)
  end

  @OP.def_rule("+@", proc{@lex_state == EXPR_FNAME}) do |op, io|
    Token(TkUPLUS).set_text(op)
  end

  @OP.def_rule("-@", proc{@lex_state == EXPR_FNAME}) do |op, io|
    Token(TkUMINUS).set_text(op)
  end

  @OP.def_rules("+", "-") do
    |op, io|
    catch(:RET) do
      if @lex_state == EXPR_ARG
        if @space_seen and peek(0) =~ /[0-9]/
          throw :RET, identify_number(op)
        else
          @lex_state = EXPR_BEG
        end
      elsif @lex_state != EXPR_END and peek(0) =~ /[0-9]/
        throw :RET, identify_number(op)
      else
        @lex_state = EXPR_BEG
      end
      Token(op).set_text(op)
    end
  end

  @OP.def_rule(".") do
    @lex_state = EXPR_BEG
    if peek(0) =~ /[0-9]/
      ungetc
      identify_number("")
    else
      # for obj.if
      @lex_state = EXPR_DOT
      Token(TkDOT).set_text(".")
    end
  end

  @OP.def_rules("..", "...") do
    |op, io|
    @lex_state = EXPR_BEG
    Token(op).set_text(op)
  end

  lex_int2
end
            
lex_int2() click to toggle source
 
               # File rdoc/parser/ruby.rb, line 780
def lex_int2
  @OP.def_rules("]", "}", ")") do
    |op, io|
    @lex_state = EXPR_END
    @indent -= 1
    Token(op).set_text(op)
  end

  @OP.def_rule(":") do
    if @lex_state == EXPR_END || peek(0) =~ /\s/
      @lex_state = EXPR_BEG
      tk = Token(TkCOLON)
    else
      @lex_state = EXPR_FNAME
      tk = Token(TkSYMBEG)
    end
    tk.set_text(":")
  end

  @OP.def_rule("::") do
    if @lex_state == EXPR_BEG or @lex_state == EXPR_ARG && @space_seen
      @lex_state = EXPR_BEG
      tk = Token(TkCOLON3)
    else
      @lex_state = EXPR_DOT
      tk = Token(TkCOLON2)
    end
    tk.set_text("::")
  end

  @OP.def_rule("/") do
    |op, io|
    if @lex_state == EXPR_BEG || @lex_state == EXPR_MID
      identify_string(op)
    elsif peek(0) == '='
      getc
      @lex_state = EXPR_BEG
      Token(TkOPASGN, :/).set_text("/=") #")
    elsif @lex_state == EXPR_ARG and @space_seen and peek(0) !~ /\s/
      identify_string(op)
    else
      @lex_state = EXPR_BEG
      Token("/").set_text(op)
    end
  end

  @OP.def_rules("^") do
    @lex_state = EXPR_BEG
    Token("^").set_text("^")
  end

  @OP.def_rules(",", ";") do
    |op, io|
    @lex_state = EXPR_BEG
    Token(op).set_text(op)
  end

  @OP.def_rule("~") do
    @lex_state = EXPR_BEG
    Token("~").set_text("~")
  end

  @OP.def_rule("~@", proc{@lex_state = EXPR_FNAME}) do
    @lex_state = EXPR_BEG
    Token("~").set_text("~@")
  end

  @OP.def_rule("(") do
    @indent += 1
    if @lex_state == EXPR_BEG || @lex_state == EXPR_MID
      @lex_state = EXPR_BEG
      tk = Token(TkfLPAREN)
    else
      @lex_state = EXPR_BEG
      tk = Token(TkLPAREN)
    end
    tk.set_text("(")
  end

  @OP.def_rule("[]", proc{@lex_state == EXPR_FNAME}) do
    Token("[]").set_text("[]")
  end

  @OP.def_rule("[]=", proc{@lex_state == EXPR_FNAME}) do
    Token("[]=").set_text("[]=")
  end

  @OP.def_rule("[") do
    @indent += 1
    if @lex_state == EXPR_FNAME
      t = Token(TkfLBRACK)
    else
      if @lex_state == EXPR_BEG || @lex_state == EXPR_MID
        t = Token(TkLBRACK)
      elsif @lex_state == EXPR_ARG && @space_seen
        t = Token(TkLBRACK)
      else
        t = Token(TkfLBRACK)
      end
      @lex_state = EXPR_BEG
    end
    t.set_text("[")
  end

  @OP.def_rule("{") do
    @indent += 1
    if @lex_state != EXPR_END && @lex_state != EXPR_ARG
      t = Token(TkLBRACE)
    else
      t = Token(TkfLBRACE)
    end
    @lex_state = EXPR_BEG
    t.set_text("{")
  end

  @OP.def_rule('\') do   #'
    if getc == "\n"
      @space_seen = true
      @continue = true
      Token(TkSPACE).set_text("\\\n")
    else
      ungetc
      Token("\\").set_text("\\")  #"
    end
  end

  @OP.def_rule('%') do
    |op, io|
    if @lex_state == EXPR_BEG || @lex_state == EXPR_MID
      identify_quotation('%')
    elsif peek(0) == '='
      getc
      Token(TkOPASGN, "%").set_text("%=")
    elsif @lex_state == EXPR_ARG and @space_seen and peek(0) !~ /\s/
      identify_quotation('%')
    else
      @lex_state = EXPR_BEG
      Token("%").set_text("%")
    end
  end

  @OP.def_rule('$') do  #'
    identify_gvar
  end

  @OP.def_rule('@') do
    if peek(0) =~ /[@\w_]/
      ungetc
      identify_identifier
    else
      Token("@").set_text("@")
    end
  end

  @OP.def_rule("__END__", proc{@prev_char_no == 0 && peek(0) =~ /[\r\n]/}) do
    throw :eof
  end

  @OP.def_rule("") do
    |op, io|
    printf "MATCH: start %s: %s\n", op, io.inspect if RDoc::RubyLex.debug?
    if peek(0) =~ /[0-9]/
      t = identify_number("")
    elsif peek(0) =~ /[\w_]/
      t = identify_identifier
    end
    printf "MATCH: end %s: %s\n", op, io.inspect if RDoc::RubyLex.debug?
    t
  end
end
            
line_no() click to toggle source

io functions

 
               # File rdoc/parser/ruby.rb, line 480
def line_no
  @reader.line_num
end
            
peek(i = 0) click to toggle source
 
               # File rdoc/parser/ruby.rb, line 519
def peek(i = 0)
  @reader.peek(i)
end
            
peek_equal?(str) click to toggle source
 
               # File rdoc/parser/ruby.rb, line 515
def peek_equal?(str)
  @reader.peek_equal(str)
end
            
read_escape() click to toggle source
 
               # File rdoc/parser/ruby.rb, line 1279
def read_escape
  res = ""
  case ch = getc
  when /[0-7]/
    ungetc ch
    3.times do
      case ch = getc
      when /[0-7]/
      when nil
        break
      else
        ungetc
        break
      end
      res << ch
    end

  when "x"
    res << ch
    2.times do
      case ch = getc
      when /[0-9a-fA-F]/
      when nil
        break
      else
        ungetc
        break
      end
      res << ch
    end

  when "M"
    res << ch
    if (ch = getc) != '-'
      ungetc
    else
      res << ch
      if (ch = getc) == "\\" #"
        res << ch
        res << read_escape
      else
        res << ch
      end
    end

  when "C", "c" #, "^"
    res << ch
    if ch == "C" and (ch = getc) != "-"
      ungetc
    else
      res << ch
      if (ch = getc) == "\\" #"
        res << ch
        res << read_escape
      else
        res << ch
      end
    end
  else
    res << ch
  end
  res
end
            
skip_inner_expression() click to toggle source
 
               # File rdoc/parser/ruby.rb, line 1241
def skip_inner_expression
  res = ""
  nest = 0
  while (ch = getc)
    res << ch
    if ch == '}'
      break if nest.zero?
      nest -= 1
    elsif ch == '{'
      nest += 1
    end
  end
  res
end
            
ungetc(c = nil) click to toggle source
 
               # File rdoc/parser/ruby.rb, line 511
def ungetc(c = nil)
  @reader.ungetc(c)
end
            

Commenting is here to help enhance the documentation. For example, code samples, or clarification of the documentation.

If you have questions about Ruby or the documentation, please post to one of the Ruby mailing lists. You will get better, faster, help that way.

If you wish to post a correction of the docs, please do so, but also file bug report so that it can be corrected for the next release. Thank you.

If you want to help improve the Ruby documentation, please visit Documenting-ruby.org.

blog comments powered by Disqus