![show/hide quicksearch [+]](./images/find.png)
automatically generated by template/unicode_norm_gen.tmpl
Constant for max hash capacity to avoid DoS attack
Regular Expressions and Hash Constants
Constants For Hangul
for details such as the meaning of the identifiers below, please see www.unicode.org/versions/Unicode7.0.0/ch03.pdf, pp. 144/145
Unicode-based encodings (except UTF-8)
Canonical Ordering
 
               # File unicode_normalize/normalize.rb, line 67
def self.canonical_ordering_one(string)
  sorting = string.each_char.collect { |c| [c, CLASS_TABLE[c]] }
  (sorting.length-2).downto(0) do |i| # almost, but not exactly bubble sort
    (0..i).each do |j|
      later_class = sorting[j+1].last
      if 0<later_class and later_class<sorting[j].last
        sorting[j], sorting[j+1] = sorting[j+1], sorting[j]
      end
    end
  end
  return sorting.collect(&:first).join('')
end
             
             
               # File unicode_normalize/normalize.rb, line 51
def self.hangul_comp_one(string)
  length = string.length
  if length>1 and 0 <= (lead =string[0].ord-LBASE) and lead  < LCOUNT and
                  0 <= (vowel=string[1].ord-VBASE) and vowel < VCOUNT
    lead_vowel = SBASE + (lead * VCOUNT + vowel) * TCOUNT
    if length>2 and 0 <= (trail=string[2].ord-TBASE) and trail < TCOUNT
      (lead_vowel + trail).chr(Encoding::UTF_8) + string[3..-1]
    else
      lead_vowel.chr(Encoding::UTF_8) + string[2..-1]
    end
  else
    string
  end
end
             
            Hangul Algorithm
 
               # File unicode_normalize/normalize.rb, line 42
def self.hangul_decomp_one(target)
  syllable_index = target.ord - SBASE
  return target if syllable_index < 0 || syllable_index >= SCOUNT
  l = LBASE + syllable_index / NCOUNT
  v = VBASE + (syllable_index % NCOUNT) / TCOUNT
  t = TBASE + syllable_index % TCOUNT
  (t==TBASE ? [l, v] : [l, v, t]).pack('U*') + target[1..-1]
end
             
             
               # File unicode_normalize/normalize.rb, line 86
def self.nfc_one(string)
  nfd_string = nfd_one string
  start = nfd_string[0]
  last_class = CLASS_TABLE[start]-1
  accents = ''
  nfd_string[1..-1].each_char do |accent|
    accent_class = CLASS_TABLE[accent]
    if last_class<accent_class and composite = COMPOSITION_TABLE[start+accent]
      start = composite
    else
      accents << accent
      last_class = accent_class
    end
  end
  hangul_comp_one(start+accents)
end
             
            Normalization Forms for Patterns (not whole Strings)
 
               # File unicode_normalize/normalize.rb, line 81
def self.nfd_one(string)
  string = string.chars.map! {|c| DECOMPOSITION_TABLE[c] || c}.join('')
  canonical_ordering_one(hangul_decomp_one(string))
end
             
             
               # File unicode_normalize/normalize.rb, line 103
def self.normalize(string, form = :nfc)
  encoding = string.encoding
  case encoding
  when Encoding::UTF_8
    case form
    when :nfc then
      string.gsub REGEXP_C, NF_HASH_C
    when :nfd then
      string.gsub REGEXP_D, NF_HASH_D
    when :nfkc then
      string.gsub(REGEXP_K, KOMPATIBLE_TABLE).gsub(REGEXP_C, NF_HASH_C)
    when :nfkd then
      string.gsub(REGEXP_K, KOMPATIBLE_TABLE).gsub(REGEXP_D, NF_HASH_D)
    else
      raise ArgumentError, "Invalid normalization form #{form}."
    end
  when Encoding::US_ASCII
    string
  when *UNICODE_ENCODINGS
    normalize(string.encode(Encoding::UTF_8), form).encode(encoding)
  else
    raise Encoding::CompatibilityError, "Unicode Normalization not appropriate for #{encoding}"
  end
end
             
             
               # File unicode_normalize/normalize.rb, line 128
def self.normalized?(string, form = :nfc)
  encoding = string.encoding
  case encoding
  when Encoding::UTF_8
    case form
    when :nfc then
      string.scan REGEXP_C do |match|
        return false  if NF_HASH_C[match] != match
      end
      true
    when :nfd then
      string.scan REGEXP_D do |match|
        return false  if NF_HASH_D[match] != match
      end
      true
    when :nfkc then
      normalized?(string, :nfc) and string !~ REGEXP_K
    when :nfkd then
      normalized?(string, :nfd) and string !~ REGEXP_K
    else
      raise ArgumentError, "Invalid normalization form #{form}."
    end
  when Encoding::US_ASCII
    true
  when *UNICODE_ENCODINGS
    normalized? string.encode(Encoding::UTF_8), form
  else
    raise Encoding::CompatibilityError, "Unicode Normalization not appropriate for #{encoding}"
  end
end