# File rexml/encoding.rb, line 7
def self.apply(obj, enc)
@encoding_methods[enc][obj]
end
# File rexml/encoding.rb, line 56
def check_encoding str
# We have to recognize UTF-16, LSB UTF-16, and UTF-8
if str[0] == 0xfe && str[1] == 0xff
str[0,2] = ""
return UTF_16
elsif str[0] == 0xff && str[1] == 0xfe
str[0,2] = ""
return UNILE
end
str =~ /^\s*<\?xml\s+version\s*=\s*(['"]).*?\1\s+encoding\s*=\s*(["'])(.*?)\2/m
return $3.upcase if $3
return UTF_8
end
Convert to UTF-8
# File rexml/encodings/US-ASCII.rb, line 19
def decode_ascii(str)
str.unpack('C*').pack('U*')
end
Convert to UTF-8
# File rexml/encodings/CP-1252.rb, line 63
def decode_cp1252(str)
array_latin9 = str.unpack('C*')
array_enc = []
array_latin9.each do |num|
case num
# characters that added compared to iso-8859-1
when 0x80; array_enc << 0x20AC # 0xe2 0x82 0xac
when 0x82; array_enc << 0x201A # 0xe2 0x82 0x9a
when 0x83; array_enc << 0x0192 # 0xc6 0x92
when 0x84; array_enc << 0x201E # 0xe2 0x82 0x9e
when 0x85; array_enc << 0x2026 # 0xe2 0x80 0xa6
when 0x86; array_enc << 0x2020 # 0xe2 0x80 0xa0
when 0x87; array_enc << 0x2021 # 0xe2 0x80 0xa1
when 0x88; array_enc << 0x02C6 # 0xcb 0x86
when 0x89; array_enc << 0x2030 # 0xe2 0x80 0xb0
when 0x8A; array_enc << 0x0160 # 0xc5 0xa0
when 0x8B; array_enc << 0x2039 # 0xe2 0x80 0xb9
when 0x8C; array_enc << 0x0152 # 0xc5 0x92
when 0x8E; array_enc << 0x017D # 0xc5 0xbd
when 0x91; array_enc << 0x2018 # 0xe2 0x80 0x98
when 0x92; array_enc << 0x2019 # 0xe2 0x80 0x99
when 0x93; array_enc << 0x201C # 0xe2 0x80 0x9c
when 0x94; array_enc << 0x201D # 0xe2 0x80 0x9d
when 0x95; array_enc << 0x2022 # 0xe2 0x80 0xa2
when 0x96; array_enc << 0x2013 # 0xe2 0x80 0x93
when 0x97; array_enc << 0x2014 # 0xe2 0x80 0x94
when 0x98; array_enc << 0x02DC # 0xcb 0x9c
when 0x99; array_enc << 0x2122 # 0xe2 0x84 0xa2
when 0x9A; array_enc << 0x0161 # 0xc5 0xa1
when 0x9B; array_enc << 0x203A # 0xe2 0x80 0xba
when 0x9C; array_enc << 0x0152 # 0xc5 0x93
when 0x9E; array_enc << 0x017E # 0xc5 0xbe
when 0x9F; array_enc << 0x0178 # 0xc5 0xb8
else
array_enc << num
end
end
array_enc.pack('U*')
end
# File rexml/encodings/EUC-JP.rb, line 6
def decode_eucjp(str)
Uconv::euctou8(str)
end
# File rexml/encodings/ICONV.rb, line 6
def decode_iconv(str)
Iconv.conv(UTF_8, @encoding, str)
end
# File rexml/encodings/SHIFT-JIS.rb, line 6
def decode_sjis content
Uconv::sjistou8(content)
end
# File rexml/encodings/UNILE.rb, line 18
def decode_unile(str)
array_enc=str.unpack('C*')
array_utf8 = []
0.step(array_enc.size-1, 2){|i|
array_utf8 << (array_enc.at(i) + array_enc.at(i+1)*0x100)
}
array_utf8.pack('U*')
end
# File rexml/encodings/UTF-16.rb, line 18
def decode_utf16(str)
str = str[2..-1] if /^\376\377/ =~ str
array_enc=str.unpack('C*')
array_utf8 = []
0.step(array_enc.size-1, 2){|i|
array_utf8 << (array_enc.at(i+1) + array_enc.at(i)*0x100)
}
array_utf8.pack('U*')
end
# File rexml/encodings/UTF-8.rb, line 7
def decode_utf8(str)
str
end
Convert from UTF-8
# File rexml/encodings/US-ASCII.rb, line 4
def encode_ascii content
array_utf8 = content.unpack('U*')
array_enc = []
array_utf8.each do |num|
if num <= 0x7F
array_enc << num
else
# Numeric entity (&#nnnn;); shard by Stefan Scholl
array_enc.concat "&\##{num};".unpack('C*')
end
end
array_enc.pack('C*')
end
Convert from UTF-8
# File rexml/encodings/CP-1252.rb, line 14
def encode_cp1252(content)
array_utf8 = content.unpack('U*')
array_enc = []
array_utf8.each do |num|
case num
# shortcut first bunch basic characters
when 0..0xFF; array_enc << num
# characters added compared to iso-8859-1
when 0x20AC; array_enc << 0x80 # 0xe2 0x82 0xac
when 0x201A; array_enc << 0x82 # 0xe2 0x82 0x9a
when 0x0192; array_enc << 0x83 # 0xc6 0x92
when 0x201E; array_enc << 0x84 # 0xe2 0x82 0x9e
when 0x2026; array_enc << 0x85 # 0xe2 0x80 0xa6
when 0x2020; array_enc << 0x86 # 0xe2 0x80 0xa0
when 0x2021; array_enc << 0x87 # 0xe2 0x80 0xa1
when 0x02C6; array_enc << 0x88 # 0xcb 0x86
when 0x2030; array_enc << 0x89 # 0xe2 0x80 0xb0
when 0x0160; array_enc << 0x8A # 0xc5 0xa0
when 0x2039; array_enc << 0x8B # 0xe2 0x80 0xb9
when 0x0152; array_enc << 0x8C # 0xc5 0x92
when 0x017D; array_enc << 0x8E # 0xc5 0xbd
when 0x2018; array_enc << 0x91 # 0xe2 0x80 0x98
when 0x2019; array_enc << 0x92 # 0xe2 0x80 0x99
when 0x201C; array_enc << 0x93 # 0xe2 0x80 0x9c
when 0x201D; array_enc << 0x94 # 0xe2 0x80 0x9d
when 0x2022; array_enc << 0x95 # 0xe2 0x80 0xa2
when 0x2013; array_enc << 0x96 # 0xe2 0x80 0x93
when 0x2014; array_enc << 0x97 # 0xe2 0x80 0x94
when 0x02DC; array_enc << 0x98 # 0xcb 0x9c
when 0x2122; array_enc << 0x99 # 0xe2 0x84 0xa2
when 0x0161; array_enc << 0x9A # 0xc5 0xa1
when 0x203A; array_enc << 0x9B # 0xe2 0x80 0xba
when 0x0152; array_enc << 0x9C # 0xc5 0x93
when 0x017E; array_enc << 0x9E # 0xc5 0xbe
when 0x0178; array_enc << 0x9F # 0xc5 0xb8
else
# all remaining basic characters can be used directly
if num <= 0xFF
array_enc << num
else
# Numeric entity (&#nnnn;); shard by Stefan Scholl
array_enc.concat "&\##{num};".unpack('C*')
end
end
end
array_enc.pack('C*')
end
# File rexml/encodings/EUC-JP.rb, line 10
def encode_eucjp content
Uconv::u8toeuc(content)
end
# File rexml/encodings/ICONV.rb, line 10
def encode_iconv(content)
Iconv.conv(@encoding, UTF_8, content)
end
# File rexml/encodings/SHIFT-JIS.rb, line 10
def encode_sjis(str)
Uconv::u8tosjis(str)
end
# File rexml/encodings/UNILE.rb, line 3
def encode_unile content
array_utf8 = content.unpack("U*")
array_enc = []
array_utf8.each do |num|
if ((num>>16) > 0)
array_enc << ?
array_enc << 0
else
array_enc << (num & 0xFF)
array_enc << (num >> 8)
end
end
array_enc.pack('C*')
end
# File rexml/encodings/UTF-16.rb, line 3
def encode_utf16 content
array_utf8 = content.unpack("U*")
array_enc = []
array_utf8.each do |num|
if ((num>>16) > 0)
array_enc << 0
array_enc << ?
else
array_enc << (num >> 8)
array_enc << (num & 0xFF)
end
end
array_enc.pack('C*')
end
# File rexml/encodings/UTF-8.rb, line 3
def encode_utf8 content
content
end
# File rexml/encoding.rb, line 22
def encoding=( enc )
old_verbosity = $VERBOSE
begin
$VERBOSE = false
enc = enc.nil? ? nil : enc.upcase
return false if defined? @encoding and enc == @encoding
if enc and enc != UTF_8
@encoding = enc
raise ArgumentError, "Bad encoding name #@encoding" unless @encoding =~ /^[\w-]+$/
@encoding.untaint
begin
require 'rexml/encodings/ICONV.rb'
Encoding.apply(self, "ICONV")
rescue LoadError, Exception
begin
enc_file = File.join( "rexml", "encodings", "#@encoding.rb" )
require enc_file
Encoding.apply(self, @encoding)
rescue LoadError => err
puts err.message
raise ArgumentError, "No decoder found for encoding #@encoding. Please install iconv."
end
end
else
@encoding = UTF_8
require 'rexml/encodings/UTF-8.rb'
Encoding.apply(self, @encoding)
end
ensure
$VERBOSE = old_verbosity
end
true
end
Convert to UTF-8
# File rexml/encodings/ISO-8859-15.rb, line 51
def from_iso_8859_15(str)
array_latin9 = str.unpack('C*')
array_enc = []
array_latin9.each do |num|
case num
# characters that differ compared to iso-8859-1
when 0xA4; array_enc << 0x20AC
when 0xA6; array_enc << 0x0160
when 0xA8; array_enc << 0x0161
when 0xB4; array_enc << 0x017D
when 0xB8; array_enc << 0x017E
when 0xBC; array_enc << 0x0152
when 0xBD; array_enc << 0x0153
when 0xBE; array_enc << 0x0178
else
array_enc << num
end
end
array_enc.pack('U*')
end
Convert from UTF-8
# File rexml/encodings/ISO-8859-15.rb, line 12
def to_iso_8859_15(content)
array_utf8 = content.unpack('U*')
array_enc = []
array_utf8.each do |num|
case num
# shortcut first bunch basic characters
when 0..0xA3; array_enc << num
# characters removed compared to iso-8859-1
when 0xA4; array_enc << '¤'
when 0xA6; array_enc << '¦'
when 0xA8; array_enc << '¨'
when 0xB4; array_enc << '´'
when 0xB8; array_enc << '¸'
when 0xBC; array_enc << '¼'
when 0xBD; array_enc << '½'
when 0xBE; array_enc << '¾'
# characters added compared to iso-8859-1
when 0x20AC; array_enc << 0xA4 # 0xe2 0x82 0xac
when 0x0160; array_enc << 0xA6 # 0xc5 0xa0
when 0x0161; array_enc << 0xA8 # 0xc5 0xa1
when 0x017D; array_enc << 0xB4 # 0xc5 0xbd
when 0x017E; array_enc << 0xB8 # 0xc5 0xbe
when 0x0152; array_enc << 0xBC # 0xc5 0x92
when 0x0153; array_enc << 0xBD # 0xc5 0x93
when 0x0178; array_enc << 0xBE # 0xc5 0xb8
else
# all remaining basic characters can be used directly
if num <= 0xFF
array_enc << num
else
# Numeric entity (&#nnnn;); shard by Stefan Scholl
array_enc.concat "&\##{num};".unpack('C*')
end
end
end
array_enc.pack('C*')
end
Commenting is here to help enhance the documentation. For example, code samples, or clarification of the documentation.
If you have questions about Ruby or the documentation, please post to one of the Ruby mailing lists. You will get better, faster, help that way.
If you wish to post a correction of the docs, please do so, but also file bug report so that it can be corrected for the next release. Thank you.
If you want to help improve the Ruby documentation, please see Improve the docs, or visit Documenting-ruby.org.