class Prism::Translation::Parser::Lexer

Accepts a list of prism tokens and converts them into the expected format for the parser gem.

Constants

LAMBDA_TOKEN_TYPES

It is used to determine whether ‘do` is of the token type `kDO` or `kDO_LAMBDA`.

NOTE: In edge cases like ‘-> (foo = -> (bar) {}) do end`, please note that `kDO` is still returned instead of `kDO_LAMBDA`, which is expected: github.com/ruby/prism/pull/3046

LPAREN_CONVERSION_TOKEN_TYPES

The ‘PARENTHESIS_LEFT` token in Prism is classified as either `tLPAREN` or `tLPAREN2` in the Parser gem. The following token types are listed as those classified as `tLPAREN`.

TYPES

The direct translating of types between the two lexers.

Attributes

lexed[R]

An array of tuples that contain prism tokens and their associated lex state when they were lexed.

offset_cache[R]

A hash that maps offsets in bytes to offsets in characters.

source_buffer[R]

The Parser::Source::Buffer that the tokens were lexed from.

Public Class Methods

new(source_buffer, lexed, offset_cache) click to toggle source

Initialize the lexer with the given source buffer, prism tokens, and offset cache.

# File prism/translation/parser/lexer.rb, line 217
def initialize(source_buffer, lexed, offset_cache)
  @source_buffer = source_buffer
  @lexed = lexed
  @offset_cache = offset_cache
end

Public Instance Methods

to_a() click to toggle source

Convert the prism tokens into the expected format for the parser gem.

# File prism/translation/parser/lexer.rb, line 227
def to_a
  tokens = []

  index = 0
  length = lexed.length

  heredoc_identifier_stack = []

  while index < length
    token, state = lexed[index]
    index += 1
    next if %i[IGNORED_NEWLINE __END__ EOF].include?(token.type)

    type = TYPES.fetch(token.type)
    value = token.value
    location = Range.new(source_buffer, offset_cache[token.location.start_offset], offset_cache[token.location.end_offset])

    case type
    when :kDO
      types = tokens.map(&:first)
      nearest_lambda_token_type = types.reverse.find { |type| LAMBDA_TOKEN_TYPES.include?(type) }

      if nearest_lambda_token_type == :tLAMBDA
        type = :kDO_LAMBDA
      end
    when :tCHARACTER
      value.delete_prefix!("?")
    when :tCOMMENT
      if token.type == :EMBDOC_BEGIN
        start_index = index

        while !((next_token = lexed[index][0]) && next_token.type == :EMBDOC_END) && (index < length - 1)
          value += next_token.value
          index += 1
        end

        if start_index != index
          value += next_token.value
          location = Range.new(source_buffer, offset_cache[token.location.start_offset], offset_cache[lexed[index][0].location.end_offset])
          index += 1
        end
      else
        value.chomp!
        location = Range.new(source_buffer, offset_cache[token.location.start_offset], offset_cache[token.location.end_offset - 1])
      end
    when :tNL
      value = nil
    when :tFLOAT
      value = parse_float(value)
    when :tIMAGINARY
      value = parse_complex(value)
    when :tINTEGER
      if value.start_with?("+")
        tokens << [:tUNARY_NUM, ["+", Range.new(source_buffer, offset_cache[token.location.start_offset], offset_cache[token.location.start_offset + 1])]]
        location = Range.new(source_buffer, offset_cache[token.location.start_offset + 1], offset_cache[token.location.end_offset])
      end

      value = parse_integer(value)
    when :tLABEL
      value.chomp!(":")
    when :tLABEL_END
      value.chomp!(":")
    when :tLCURLY
      type = :tLBRACE if state == EXPR_BEG | EXPR_LABEL
    when :tLPAREN2
      type = :tLPAREN if tokens.empty? || LPAREN_CONVERSION_TOKEN_TYPES.include?(tokens.dig(-1, 0))
    when :tNTH_REF
      value = parse_integer(value.delete_prefix("$"))
    when :tOP_ASGN
      value.chomp!("=")
    when :tRATIONAL
      value = parse_rational(value)
    when :tSPACE
      value = nil
    when :tSTRING_BEG
      if token.type == :HEREDOC_START
        heredoc_identifier_stack.push(value.match(/<<[-~]?["'`]?(?<heredoc_identifier>.*?)["'`]?\z/)[:heredoc_identifier])
      end
      if ["\"", "'"].include?(value) && (next_token = lexed[index][0]) && next_token.type == :STRING_END
        next_location = token.location.join(next_token.location)
        type = :tSTRING
        value = ""
        location = Range.new(source_buffer, offset_cache[next_location.start_offset], offset_cache[next_location.end_offset])
        index += 1
      elsif ["\"", "'"].include?(value) && (next_token = lexed[index][0]) && next_token.type == :STRING_CONTENT && next_token.value.lines.count <= 1 && (next_next_token = lexed[index + 1][0]) && next_next_token.type == :STRING_END
        next_location = token.location.join(next_next_token.location)
        type = :tSTRING
        value = next_token.value.gsub("\\\\", "\\")
        location = Range.new(source_buffer, offset_cache[next_location.start_offset], offset_cache[next_location.end_offset])
        index += 2
      elsif value.start_with?("<<")
        quote = value[2] == "-" || value[2] == "~" ? value[3] : value[2]
        if quote == "`"
          type = :tXSTRING_BEG
          value = "<<`"
        else
          value = "<<#{quote == "'" || quote == "\"" ? quote : "\""}"
        end
      end
    when :tSTRING_CONTENT
      unless (lines = token.value.lines).one?
        start_offset = offset_cache[token.location.start_offset]
        lines.map do |line|
          newline = line.end_with?("\r\n") ? "\r\n" : "\n"
          chomped_line = line.chomp
          if match = chomped_line.match(/(?<backslashes>\\+)\z/)
            adjustment = match[:backslashes].size / 2
            adjusted_line = chomped_line.delete_suffix("\\" * adjustment)
            if match[:backslashes].size.odd?
              adjusted_line.delete_suffix!("\\")
              adjustment += 2
            else
              adjusted_line << newline
            end
          else
            adjusted_line = line
            adjustment = 0
          end

          end_offset = start_offset + adjusted_line.length + adjustment
          tokens << [:tSTRING_CONTENT, [adjusted_line, Range.new(source_buffer, offset_cache[start_offset], offset_cache[end_offset])]]
          start_offset = end_offset
        end
        next
      end
    when :tSTRING_DVAR
      value = nil
    when :tSTRING_END
      if token.type == :HEREDOC_END && value.end_with?("\n")
        newline_length = value.end_with?("\r\n") ? 2 : 1
        value = heredoc_identifier_stack.pop
        location = Range.new(source_buffer, offset_cache[token.location.start_offset], offset_cache[token.location.end_offset - newline_length])
      elsif token.type == :REGEXP_END
        value = value[0]
        location = Range.new(source_buffer, offset_cache[token.location.start_offset], offset_cache[token.location.start_offset + 1])
      end
    when :tSYMBEG
      if (next_token = lexed[index][0]) && next_token.type != :STRING_CONTENT && next_token.type != :EMBEXPR_BEGIN && next_token.type != :EMBVAR && next_token.type != :STRING_END
        next_location = token.location.join(next_token.location)
        type = :tSYMBOL
        value = next_token.value
        value = { "~@" => "~", "!@" => "!" }.fetch(value, value)
        location = Range.new(source_buffer, offset_cache[next_location.start_offset], offset_cache[next_location.end_offset])
        index += 1
      end
    when :tFID
      if !tokens.empty? && tokens.dig(-1, 0) == :kDEF
        type = :tIDENTIFIER
      end
    when :tXSTRING_BEG
      if (next_token = lexed[index][0]) && next_token.type != :STRING_CONTENT && next_token.type != :STRING_END
        type = :tBACK_REF2
      end
    end

    tokens << [type, [value, location]]

    if token.type == :REGEXP_END
      tokens << [:tREGEXP_OPT, [token.value[1..], Range.new(source_buffer, offset_cache[token.location.start_offset + 1], offset_cache[token.location.end_offset])]]
    end
  end

  tokens
end

Private Instance Methods

parse_complex(value) click to toggle source

Parse a complex from the string representation.

# File prism/translation/parser/lexer.rb, line 409
def parse_complex(value)
  value.chomp!("i")

  if value.end_with?("r")
    Complex(0, parse_rational(value))
  elsif value.start_with?(/0[BbOoDdXx]/)
    Complex(0, parse_integer(value))
  else
    Complex(0, value)
  end
rescue ArgumentError
  0i
end
parse_float(value) click to toggle source

Parse a float from the string representation.

# File prism/translation/parser/lexer.rb, line 402
def parse_float(value)
  Float(value)
rescue ArgumentError
  0.0
end
parse_integer(value) click to toggle source

Parse an integer from the string representation.

# File prism/translation/parser/lexer.rb, line 395
def parse_integer(value)
  Integer(value)
rescue ArgumentError
  0
end
parse_rational(value) click to toggle source

Parse a rational from the string representation.

# File prism/translation/parser/lexer.rb, line 424
def parse_rational(value)
  value.chomp!("r")

  if value.start_with?(/0[BbOoDdXx]/)
    Rational(parse_integer(value))
  else
    Rational(value)
  end
rescue ArgumentError
  0r
end