We work by substituting non-printing characters in to the text. For now I'm assuming that I can substitute a character in the range 0..8 for a 7 bit character without damaging the encoded string, but this might be optimistic
This maps HTML tags to the corresponding attribute char
This maps delimiters that occur around words (such as bold
or tt
) where the start and end delimiters and the same. This
lets us optimize the regexp
A \ in front of a character that would normally be processed turns off processing. We do this by turning < into <#{PROTECT}
And this maps special sequences to a name. A special sequence is something like a WikiWord
And this is used when the delimiters aren't the same. In this case the hash maps a pattern to the attribute character
# File rdoc/markup/simple_markup/inline.rb, line 208 def initialize add_word_pair("*", "*", :BOLD) add_word_pair("_", "_", :EM) add_word_pair("+", "+", :TT) add_html("em", :EM) add_html("i", :EM) add_html("b", :BOLD) add_html("tt", :TT) add_html("code", :TT) add_special(/<!--(.*?)-->/, :COMMENT) end
# File rdoc/markup/simple_markup/inline.rb, line 238 def add_html(tag, name) HTML_TAGS[tag.downcase] = Attribute.bitmap_for(name) end
# File rdoc/markup/simple_markup/inline.rb, line 242 def add_special(pattern, name) SPECIAL[pattern] = Attribute.bitmap_for(name) end
# File rdoc/markup/simple_markup/inline.rb, line 222 def add_word_pair(start, stop, name) raise "Word flags may not start '<'" if start[0] == ?< bitmap = Attribute.bitmap_for(name) if start == stop MATCHING_WORD_PAIRS[start] = bitmap else pattern = Regexp.new("(" + Regexp.escape(start) + ")" + # "([A-Za-z]+)" + "(\\S+)" + "(" + Regexp.escape(stop) +")") WORD_PAIR_MAP[pattern] = bitmap end PROTECTABLE << start[0,1] PROTECTABLE.uniq! end
Return an attribute object with the given turn_on and turn_off bits set
# File rdoc/markup/simple_markup/inline.rb, line 122 def attribute(turn_on, turn_off) AttrChanger.new(turn_on, turn_off) end
# File rdoc/markup/simple_markup/inline.rb, line 127 def change_attribute(current, new) diff = current ^ new attribute(new & diff, current & diff) end
# File rdoc/markup/simple_markup/inline.rb, line 132 def changed_attribute_by_name(current_set, new_set) current = new = 0 current_set.each {|name| current |= Attribute.bitmap_for(name) } new_set.each {|name| new |= Attribute.bitmap_for(name) } change_attribute(current, new) end
Map attributes like textto the sequence 001002<char>001003<char>, where <char> is a per-attribute specific character
# File rdoc/markup/simple_markup/inline.rb, line 148 def convert_attrs(str, attrs) # first do matching ones tags = MATCHING_WORD_PAIRS.keys.join("") re = "(^|\\W)([#{tags}])([A-Za-z_]+?)\\2(\\W|\$)" # re = "(^|\\W)([#{tags}])(\\S+?)\\2(\\W|\$)" 1 while str.gsub!(Regexp.new(re)) { attr = MATCHING_WORD_PAIRS[$2]; attrs.set_attrs($`.length + $1.length + $2.length, $3.length, attr) $1 + NULL*$2.length + $3 + NULL*$2.length + $4 } # then non-matching unless WORD_PAIR_MAP.empty? WORD_PAIR_MAP.each do |regexp, attr| str.gsub!(regexp) { attrs.set_attrs($`.length + $1.length, $2.length, attr) NULL*$1.length + $2 + NULL*$3.length } end end end
# File rdoc/markup/simple_markup/inline.rb, line 170 def convert_html(str, attrs) tags = HTML_TAGS.keys.join("|") re = "<(#{tags})>(.*?)</\\1>" 1 while str.gsub!(Regexp.new(re, Regexp::IGNORECASE)) { attr = HTML_TAGS[$1.downcase] html_length = $1.length + 2 seq = NULL * html_length attrs.set_attrs($`.length + html_length, $2.length, attr) seq + $2 + seq + NULL } end
# File rdoc/markup/simple_markup/inline.rb, line 182 def convert_specials(str, attrs) unless SPECIAL.empty? SPECIAL.each do |regexp, attr| str.scan(regexp) do attrs.set_attrs($`.length, $&.length, attr | Attribute::SPECIAL) end end end end
# File rdoc/markup/simple_markup/inline.rb, line 139 def copy_string(start_pos, end_pos) res = @str[start_pos...end_pos] res.gsub!(/\000/, '') res end
# File rdoc/markup/simple_markup/inline.rb, line 263 def display_attributes puts puts @str.tr(NULL, "!") bit = 1 16.times do |bno| line = "" @str.length.times do |i| if (@attrs[i] & bit) == 0 line << " " else if bno.zero? line << "S" else line << ("%d" % (bno+1)) end end end puts(line) unless line =~ /^ *$/ bit <<= 1 end end
# File rdoc/markup/simple_markup/inline.rb, line 246 def flow(str) @str = str puts("Before flow, str='#{@str.dump}'") if $DEBUG mask_protected_sequences @attrs = AttrSpan.new(@str.length) puts("After protecting, str='#{@str.dump}'") if $DEBUG convert_attrs(@str, @attrs) convert_html(@str, @attrs) convert_specials(str, @attrs) unmask_protected_sequences puts("After flow, str='#{@str.dump}'") if $DEBUG return split_into_flow end
# File rdoc/markup/simple_markup/inline.rb, line 199 def mask_protected_sequences protect_pattern = Regexp.new("\\\\([#{Regexp.escape(PROTECTABLE.join(''))}])") @str.gsub!(protect_pattern, "\\1#{PROTECT_ATTR}") end
# File rdoc/markup/simple_markup/inline.rb, line 285 def split_into_flow display_attributes if $DEBUG res = [] current_attr = 0 str = "" str_len = @str.length # skip leading invisible text i = 0 i += 1 while i < str_len and @str[i].zero? start_pos = i # then scan the string, chunking it on attribute changes while i < str_len new_attr = @attrs[i] if new_attr != current_attr if i > start_pos res << copy_string(start_pos, i) start_pos = i end res << change_attribute(current_attr, new_attr) current_attr = new_attr if (current_attr & Attribute::SPECIAL) != 0 i += 1 while i < str_len and (@attrs[i] & Attribute::SPECIAL) != 0 res << Special.new(current_attr, copy_string(start_pos, i)) start_pos = i next end end # move on, skipping any invisible characters begin i += 1 end while i < str_len and @str[i].zero? end # tidy up trailing text if start_pos < str_len res << copy_string(start_pos, str_len) end # and reset to all attributes off res << change_attribute(current_attr, 0) if current_attr != 0 return res end