Object
This API is experimental, and subject to change.
parser = PullParser.new( "<a>text<b att='val'/>txet</a>" ) while parser.has_next? res = parser.next puts res[1]['att'] if res.start_tag? and res[0] == 'b' end
See the PullEvent class for information on the content of the results. The data is identical to the arguments passed for the various events to the StreamListener API.
Notice that:
parser = PullParser.new( "<a>BAD DOCUMENT" ) while parser.has_next? res = parser.next raise res[1] if res.error? end
Nat Price gave me some good ideas for the API.
Entity constants
Just for backward compatibility. For example, kramdown uses this. It’s not used in REXML.
# File rexml-3.2.5/lib/rexml/parsers/baseparser.rb, line 120
def add_listener( listener )
@listeners << listener
end
Returns true if there are no more events
# File rexml-3.2.5/lib/rexml/parsers/baseparser.rb, line 146
def empty?
return (@source.empty? and @stack.empty?)
end
# File rexml-3.2.5/lib/rexml/parsers/baseparser.rb, line 438
def entity( reference, entities )
value = nil
value = entities[ reference ] if entities
if not value
value = DEFAULT_ENTITIES[ reference ]
value = value[2] if value
end
unnormalize( value, entities ) if value
end
Returns true if there are more events. Synonymous with !empty?
# File rexml-3.2.5/lib/rexml/parsers/baseparser.rb, line 151
def has_next?
return !(@source.empty? and @stack.empty?)
end
Escapes all possible entities
# File rexml-3.2.5/lib/rexml/parsers/baseparser.rb, line 449
def normalize( input, entities=nil, entity_filter=nil )
copy = input.clone
# Doing it like this rather than in a loop improves the speed
copy.gsub!( EREFERENCE, '&' )
entities.each do |key, value|
copy.gsub!( value, "&#{key};" ) unless entity_filter and
entity_filter.include?(entity)
end if entities
copy.gsub!( EREFERENCE, '&' )
DEFAULT_ENTITIES.each do |key, value|
copy.gsub!( value[3], value[1] )
end
copy
end
Peek at the depth event in the stack. The first element on
the stack is at depth 0. If depth is -1, will parse to the
end of the input stream and return the last event, which is always
:end_document. Be aware that this causes the stream to be parsed up to the
depth event, so you can effectively pre-parse the entire
document (pull the entire thing into memory) using this method.
# File rexml-3.2.5/lib/rexml/parsers/baseparser.rb, line 167
def peek depth=0
raise %Q[Illegal argument "#{depth}"] if depth < -1
temp = []
if depth == -1
temp.push(pull()) until empty?
else
while @stack.size+temp.size < depth+1
temp.push(pull())
end
end
@stack += temp if temp.size > 0
@stack[depth]
end
# File rexml-3.2.5/lib/rexml/parsers/baseparser.rb, line 136
def position
if @source.respond_to? :position
@source.position
else
# FIXME
0
end
end
Returns the next event. This is a PullEvent object.
# File rexml-3.2.5/lib/rexml/parsers/baseparser.rb, line 182
def pull
pull_event.tap do |event|
@listeners.each do |listener|
listener.receive event
end
end
end
# File rexml-3.2.5/lib/rexml/parsers/baseparser.rb, line 126
def stream=( source )
@source = SourceFactory.create_from( source )
@closed = nil
@document_status = nil
@tags = []
@stack = []
@entities = []
@nsstack = []
end
Unescapes all possible entities
# File rexml-3.2.5/lib/rexml/parsers/baseparser.rb, line 465
def unnormalize( string, entities=nil, filter=nil )
rv = string.clone
rv.gsub!( /\r\n?/, "\n" )
matches = rv.scan( REFERENCE_RE )
return rv if matches.size == 0
rv.gsub!( /�*((?:\d+)|(?:x[a-fA-F0-9]+));/ ) {
m=$1
m = "0#{m}" if m[0] == ?x
[Integer(m)].pack('U*')
}
matches.collect!{|x|x[0]}.compact!
if matches.size > 0
matches.each do |entity_reference|
unless filter and filter.include?(entity_reference)
entity_value = entity( entity_reference, entities )
if entity_value
re = /&#{entity_reference};/
rv.gsub!( re, entity_value )
else
er = DEFAULT_ENTITIES[entity_reference]
rv.gsub!( er[0], er[2] ) if er
end
end
end
rv.gsub!( /&/, '&' )
end
rv
end