CSV::InputsScanner receives IO inputs, encoding and the chunk_size. It also controls the life cycle of the object with its methods keep_start, keep_end, keep_back, keep_drop.
CSV::InputsScanner.scan() tries to match with pattern at the current position. If there's a match, the scanner advances the “scan pointer” and returns the matched string. Otherwise, the scanner returns nil.
CSV::InputsScanner.rest() returns the “rest” of the string (i.e. everything after the scan pointer). If there is no more data (eos? = true), it returns “”.
# File csv/parser.rb, line 88
def initialize(inputs, encoding, row_separator, chunk_size: 8192)
@inputs = inputs.dup
@encoding = encoding
@row_separator = row_separator
@chunk_size = chunk_size
@last_scanner = @inputs.empty?
@keeps = []
read_chunk
end
# File csv/parser.rb, line 98
def each_line(row_separator)
buffer = nil
input = @scanner.rest
position = @scanner.pos
offset = 0
n_row_separator_chars = row_separator.size
while true
input.each_line(row_separator) do |line|
@scanner.pos += line.bytesize
if buffer
if n_row_separator_chars == 2 and
buffer.end_with?(row_separator[0]) and
line.start_with?(row_separator[1])
buffer << line[0]
line = line[1..-1]
position += buffer.bytesize + offset
@scanner.pos = position
offset = 0
yield(buffer)
buffer = nil
next if line.empty?
else
buffer << line
line = buffer
buffer = nil
end
end
if line.end_with?(row_separator)
position += line.bytesize + offset
@scanner.pos = position
offset = 0
yield(line)
else
buffer = line
end
end
break unless read_chunk
input = @scanner.rest
position = @scanner.pos
offset = -buffer.bytesize if buffer
end
yield(buffer) if buffer
end
# File csv/parser.rb, line 183
def keep_back
start, buffer = @keeps.pop
if buffer
string = @scanner.string
keep = string.byteslice(start, string.bytesize - start)
if keep and not keep.empty?
@inputs.unshift(StringIO.new(keep))
@last_scanner = false
end
@scanner = StringScanner.new(buffer)
else
@scanner.pos = start
end
read_chunk if @scanner.eos?
end
# File csv/parser.rb, line 173
def keep_end
start, buffer = @keeps.pop
keep = @scanner.string.byteslice(start, @scanner.pos - start)
if buffer
buffer << keep
keep = buffer
end
keep
end
# File csv/parser.rb, line 169
def keep_start
@keeps.push([@scanner.pos, nil])
end