CSV::InputsScanner receives IO
inputs, encoding and the chunk_size. It also controls the life cycle of the object with its methods keep_start
, keep_end
, keep_back
, keep_drop
.
CSV::InputsScanner.scan() tries to match with pattern at the current position. If there's a match, the scanner advances the “scan pointer” and returns the matched string. Otherwise, the scanner returns nil.
CSV::InputsScanner.rest() returns the “rest” of the string (i.e. everything after the scan pointer). If there is no more data (eos? = true), it returns “”.
# File ruby-3.1.2/lib/csv/parser.rb, line 88 def initialize(inputs, encoding, row_separator, chunk_size: 8192) @inputs = inputs.dup @encoding = encoding @row_separator = row_separator @chunk_size = chunk_size @last_scanner = @inputs.empty? @keeps = [] read_chunk end
# File ruby-3.1.2/lib/csv/parser.rb, line 98 def each_line(row_separator) buffer = nil input = @scanner.rest position = @scanner.pos offset = 0 n_row_separator_chars = row_separator.size while true input.each_line(row_separator) do |line| @scanner.pos += line.bytesize if buffer if n_row_separator_chars == 2 and buffer.end_with?(row_separator[0]) and line.start_with?(row_separator[1]) buffer << line[0] line = line[1..-1] position += buffer.bytesize + offset @scanner.pos = position offset = 0 yield(buffer) buffer = nil next if line.empty? else buffer << line line = buffer buffer = nil end end if line.end_with?(row_separator) position += line.bytesize + offset @scanner.pos = position offset = 0 yield(line) else buffer = line end end break unless read_chunk input = @scanner.rest position = @scanner.pos offset = -buffer.bytesize if buffer end yield(buffer) if buffer end
# File ruby-3.1.2/lib/csv/parser.rb, line 165 def eos? @scanner.eos? end
# File ruby-3.1.2/lib/csv/parser.rb, line 183 def keep_back start, buffer = @keeps.pop if buffer string = @scanner.string keep = string.byteslice(start, string.bytesize - start) if keep and not keep.empty? @inputs.unshift(StringIO.new(keep)) @last_scanner = false end @scanner = StringScanner.new(buffer) else @scanner.pos = start end read_chunk if @scanner.eos? end
# File ruby-3.1.2/lib/csv/parser.rb, line 199 def keep_drop @keeps.pop end
# File ruby-3.1.2/lib/csv/parser.rb, line 173 def keep_end start, buffer = @keeps.pop keep = @scanner.string.byteslice(start, @scanner.pos - start) if buffer buffer << keep keep = buffer end keep end
# File ruby-3.1.2/lib/csv/parser.rb, line 169 def keep_start @keeps.push([@scanner.pos, nil]) end
# File ruby-3.1.2/lib/csv/parser.rb, line 203 def rest @scanner.rest end
# File ruby-3.1.2/lib/csv/parser.rb, line 142 def scan(pattern) value = @scanner.scan(pattern) return value if @last_scanner if value read_chunk if @scanner.eos? return value else nil end end
# File ruby-3.1.2/lib/csv/parser.rb, line 154 def scan_all(pattern) value = @scanner.scan(pattern) return value if @last_scanner return nil if value.nil? while @scanner.eos? and read_chunk and (sub_value = @scanner.scan(pattern)) value << sub_value end value end
# File ruby-3.1.2/lib/csv/parser.rb, line 208 def read_chunk return false if @last_scanner unless @keeps.empty? keep = @keeps.last keep_start = keep[0] string = @scanner.string keep_data = string.byteslice(keep_start, @scanner.pos - keep_start) if keep_data keep_buffer = keep[1] if keep_buffer keep_buffer << keep_data else keep[1] = keep_data.dup end end keep[0] = 0 end input = @inputs.first case input when StringIO string = input.read raise InvalidEncoding unless string.valid_encoding? @scanner = StringScanner.new(string) @inputs.shift @last_scanner = @inputs.empty? true else chunk = input.gets(@row_separator, @chunk_size) if chunk raise InvalidEncoding unless chunk.valid_encoding? @scanner = StringScanner.new(chunk) if input.respond_to?(:eof?) and input.eof? @inputs.shift @last_scanner = @inputs.empty? end true else @scanner = StringScanner.new("".encode(@encoding)) @inputs.shift @last_scanner = @inputs.empty? if @last_scanner false else read_chunk end end end end