CSV::InputsScanner receives IO inputs, encoding and the chunk_size. It also
controls the life cycle of the object with its methods
keep_start
, keep_end
, keep_back
,
keep_drop
.
CSV::InputsScanner.scan() tries to match with pattern at the current position. If there's a match, the scanner advances the “scan pointer” and returns the matched string. Otherwise, the scanner returns nil.
CSV::InputsScanner.rest() returns the “rest” of the string (i.e. everything after the scan pointer). If there is no more data (eos? = true), it returns “”.
# File csv/parser.rb, line 87 def initialize(inputs, encoding, row_separator, chunk_size: 8192) @inputs = inputs.dup @encoding = encoding @row_separator = row_separator @chunk_size = chunk_size @last_scanner = @inputs.empty? @keeps = [] read_chunk end
# File csv/parser.rb, line 97 def each_line(row_separator) buffer = nil input = @scanner.rest position = @scanner.pos offset = 0 n_row_separator_chars = row_separator.size while true input.each_line(row_separator) do |line| @scanner.pos += line.bytesize if buffer if n_row_separator_chars == 2 and buffer.end_with?(row_separator[0]) and line.start_with?(row_separator[1]) buffer << line[0] line = line[1..-1] position += buffer.bytesize + offset @scanner.pos = position offset = 0 yield(buffer) buffer = nil next if line.empty? else buffer << line line = buffer buffer = nil end end if line.end_with?(row_separator) position += line.bytesize + offset @scanner.pos = position offset = 0 yield(line) else buffer = line end end break unless read_chunk input = @scanner.rest position = @scanner.pos offset = -buffer.bytesize if buffer end yield(buffer) if buffer end
# File csv/parser.rb, line 182 def keep_back start, buffer = @keeps.pop if buffer string = @scanner.string keep = string.byteslice(start, string.bytesize - start) if keep and not keep.empty? @inputs.unshift(StringIO.new(keep)) @last_scanner = false end @scanner = StringScanner.new(buffer) else @scanner.pos = start end read_chunk if @scanner.eos? end
# File csv/parser.rb, line 172 def keep_end start, buffer = @keeps.pop keep = @scanner.string.byteslice(start, @scanner.pos - start) if buffer buffer << keep keep = buffer end keep end
# File csv/parser.rb, line 168 def keep_start @keeps.push([@scanner.pos, nil]) end