In Files

  • csv/parser.rb

CSV::Parser::InputsScanner

CSV::InputsScanner receives IO inputs, encoding and the chunk_size. It also controls the life cycle of the object with its methods keep_start, keep_end, keep_back, keep_drop.

CSV::InputsScanner.scan() tries to match with pattern at the current position. If there's a match, the scanner advances the “scan pointer” and returns the matched string. Otherwise, the scanner returns nil.

CSV::InputsScanner.rest() returns the “rest” of the string (i.e. everything after the scan pointer). If there is no more data (eos? = true), it returns “”.

Public Class Methods

new(inputs, encoding, row_separator, chunk_size: 8192) click to toggle source
 
               # File csv/parser.rb, line 87
def initialize(inputs, encoding, row_separator, chunk_size: 8192)
  @inputs = inputs.dup
  @encoding = encoding
  @row_separator = row_separator
  @chunk_size = chunk_size
  @last_scanner = @inputs.empty?
  @keeps = []
  read_chunk
end
            

Public Instance Methods

each_line(row_separator) click to toggle source
 
               # File csv/parser.rb, line 97
def each_line(row_separator)
  buffer = nil
  input = @scanner.rest
  position = @scanner.pos
  offset = 0
  n_row_separator_chars = row_separator.size
  while true
    input.each_line(row_separator) do |line|
      @scanner.pos += line.bytesize
      if buffer
        if n_row_separator_chars == 2 and
          buffer.end_with?(row_separator[0]) and
          line.start_with?(row_separator[1])
          buffer << line[0]
          line = line[1..-1]
          position += buffer.bytesize + offset
          @scanner.pos = position
          offset = 0
          yield(buffer)
          buffer = nil
          next if line.empty?
        else
          buffer << line
          line = buffer
          buffer = nil
        end
      end
      if line.end_with?(row_separator)
        position += line.bytesize + offset
        @scanner.pos = position
        offset = 0
        yield(line)
      else
        buffer = line
      end
    end
    break unless read_chunk
    input = @scanner.rest
    position = @scanner.pos
    offset = -buffer.bytesize if buffer
  end
  yield(buffer) if buffer
end
            
eos?() click to toggle source
 
               # File csv/parser.rb, line 164
def eos?
  @scanner.eos?
end
            
keep_back() click to toggle source
 
               # File csv/parser.rb, line 182
def keep_back
  start, buffer = @keeps.pop
  if buffer
    string = @scanner.string
    keep = string.byteslice(start, string.bytesize - start)
    if keep and not keep.empty?
      @inputs.unshift(StringIO.new(keep))
      @last_scanner = false
    end
    @scanner = StringScanner.new(buffer)
  else
    @scanner.pos = start
  end
  read_chunk if @scanner.eos?
end
            
keep_drop() click to toggle source
 
               # File csv/parser.rb, line 198
def keep_drop
  @keeps.pop
end
            
keep_end() click to toggle source
 
               # File csv/parser.rb, line 172
def keep_end
  start, buffer = @keeps.pop
  keep = @scanner.string.byteslice(start, @scanner.pos - start)
  if buffer
    buffer << keep
    keep = buffer
  end
  keep
end
            
keep_start() click to toggle source
 
               # File csv/parser.rb, line 168
def keep_start
  @keeps.push([@scanner.pos, nil])
end
            
rest() click to toggle source
 
               # File csv/parser.rb, line 202
def rest
  @scanner.rest
end
            
scan(pattern) click to toggle source
 
               # File csv/parser.rb, line 141
def scan(pattern)
  value = @scanner.scan(pattern)
  return value if @last_scanner

  if value
    read_chunk if @scanner.eos?
    return value
  else
    nil
  end
end
            
scan_all(pattern) click to toggle source
 
               # File csv/parser.rb, line 153
def scan_all(pattern)
  value = @scanner.scan(pattern)
  return value if @last_scanner

  return nil if value.nil?
  while @scanner.eos? and read_chunk and (sub_value = @scanner.scan(pattern))
    value << sub_value
  end
  value
end
            
There is an updated format of the API docs for this version here.