indexing description: "Still to be entered"; keywords: "Still to be entered"; status: "See notice at end of class"; date: "$Date$"; revision: "$Revision$" class UNICODESET_CONVERTER inherit CODESET_CONVERTER redefine encode, encode_and_terminate, decode, decode_and_terminate end creation { CODESET_STATICS} make2 --------------------------------- feature -- Encoding and Decoding encode (from_data : ARRAY [INTEGER]; len : INTEGER; ec : DATA_ENCODER) : INTEGER is local f, t : ARRAY [INTEGER] i, j : INTEGER n : INTEGER flen : INTEGER tlen : INTEGER written : INTEGER do flen := from_cs.codepoint_size if flen = 3 then flen := 4 -- alignment ... end if from_data.item (len) = 0 then n := len - 1 else n := len end flen := n * flen create f.make (1, flen) inspect from_cs.codepoint_size when 1 then from i := 1 j := from_data.lower until i > flen or else j > from_data.upper loop f.put (from_data.item (j), i) i := i + 1 j := j + 1 end check got_all_data1 : j <= from_data.upper implies from_data.item (j) = 0 end when 2 then from i := 1 j := from_data.lower until i > flen or else j > from_data.upper loop f.put (get_ushort (from_data, j), i) i := i + 1 j := j + 2 end check got_all_data2 : j <= from_data.upper - 1 implies get_ushort (from_data, j) = 0 end when 3, 4 then -- XXX this is bound to go wrong if codepoint_size = 4 -- and the platform is 32 bit, since then Eiffel -- INTEGERs are _signed_ long ... from i := 1 j := from_data.lower until i > flen or else j > from_data.upper loop f.put (get_ulong (from_data, j), i) i := i + 1 j := j + 4 end -- loop over i and j check got_all_data3_4 : j <= from_data.upper - 3 implies get_ulong (from_data, j) = 0 end else logger.log (logger.Log_err, "General", "UNICODESET_CONVERTER", "encode").put_string ("trying to encode with % %nonsupported codepoint size%N") check supported_codepoint_size1 : false end end tlen := to_cs.codepoint_size if tlen = 3 then tlen := 4 -- alignment ... end tlen := len * to_cs.codepoint_size create t.make (1, tlen) written := convert (f, t, flen) written := written - 1 -- forget the terminating 0. check not_too_much_data2 : written <= t.count end if written > 0 then inspect to_cs.codepoint_size when 1 then from i := 1 until i > written loop ec.get_buffer.put1 (t.item (i)) -- Can't use (unsigned) octets, since -- after conversion some elements (of `t') -- might be negative. i := i + 1 end when 2 then from i := 1 until i > written loop ec.put_ushort (t.item (i)) i := i + 1 end when 3, 4 then from i := 1 until i > written loop ec.put_ulong (t.item (i)) i := i + 1 end else logger.log (logger.Log_err, "General", "UNICODESET_CONVERTER", "encode").put_string ("trying to encode with % %nonsupported codepoint size%N") check supported_codepoint_size2 : false end end end -- if written > 0 then ... result := written end --------------------------------- encode_and_terminate (from_data : ARRAY [INTEGER]; len : INTEGER; ec : DATA_ENCODER) : INTEGER is local f, t : ARRAY [INTEGER] i, j : INTEGER flen : INTEGER tlen : INTEGER written : INTEGER log : IO_MEDIUM do flen := from_cs.codepoint_size if flen = 3 then flen := 4 -- alignment ... end flen := len * flen create f.make (1, flen) inspect from_cs.codepoint_size when 1 then from i := 1 j := from_data.lower until i > flen or else j > from_data.upper loop f.put (from_data.item (j), i) i := i + 1 j := j + 1 end when 2 then from i := 1 j := from_data.lower until i > flen loop f.put (get_ushort (from_data, j), i) i := i + 1 j := j + 2 end when 3, 4 then -- XXX this is bound to go wrong if codepoint_size = 4 -- and the platform is 32 bit, since then Eiffel -- INTEGERs are signed long ... from i := 1 j := from_data.lower until i > flen loop f.put (get_ulong (from_data, j), i) i := i + 1 j := j + 4 end -- loop over i and j else log := logger.log (logger.Log_err, "General", "UNICODESET_CONVERTER", "encode_and_terminate") log.put_string ("trying to encode with nonsupported codepoint size ") log.putint (from_cs.codepoint_size) log.new_line check supported_codepoint_size1 : false end end tlen := to_cs.codepoint_size if tlen = 3 then tlen := 4 -- alignment ... end tlen := len * to_cs.codepoint_size create t.make (1, tlen) written := convert (f, t, flen) -- this time we don't forget the terminating 0. if written > 0 then inspect to_cs.codepoint_size when 1 then from i := 1 until i > written loop ec.get_buffer.put1 (t.item (i)) -- Can't use (unsigned) octets, since -- after conversion some elements (of `t') -- might be negative. i := i + 1 end when 2 then from i := 1 until i > written loop ec.put_ushort (t.item (i)) i := i + 1 end when 3, 4 then from i := 1 until i > written loop ec.put_ulong (t.item (i)) i := i + 1 end else log := logger.log (logger.Log_err, "General", "UNICODESET_CONVERTER", "encode_and_terminate") log.put_string ("trying to encode with nonsupported codepoint size ") log.putint (to_cs.codepoint_size) log.new_line check supported_codepoint_size2 : false end end end -- if written > 0 then ... result := written end --------------------------------- decode (dc : DATA_DECODER; to_data : ARRAY [INTEGER]; len : INTEGER) : INTEGER is -- `to_data' is where the decoded data is to be put. The caller -- should have dimensioned `to_data' correctly; if `to_data' is -- too small some of the decoded data will be missing. `len' is -- the length of the data in `dc's buffer that is to be decoded. local f, t : ARRAY [INTEGER] flen : INTEGER tlen : INTEGER i, j : INTEGER ir : INTEGER_REF written : INTEGER log : IO_MEDIUM do flen := from_cs.codepoint_size if flen = 3 then flen := 4 -- alignment ... end flen := len * flen create f.make (1, flen) inspect from_cs.codepoint_size when 1 then from i := 1 until i > flen loop f.put (dc.get_octet.value, i) i := i + 1 end when 2 then from create ir i := 1 until i > flen loop dc.get_ushort (ir) f.put (ir.item, i) i := i + 1 end when 3, 4 then from create ir i := 1 until i > flen loop dc.get_ulong (ir) f.put (ir.item, i) i := i + 1 end else log := logger.log (logger.Log_err, "General", "UNICODESET_CONVERTER", "decode") log.put_string ("trying to decode with nonsupported codepoint size ") log.putint (from_cs.codepoint_size) log.new_line check supported_codepoint_size1 : false end end tlen := to_cs.codepoint_size if tlen = 3 then tlen := 4 -- alignment ... end tlen := 6 * len + 1 -- Let's be sufficiently generous ... create t.make (1, tlen) written := convert (f, t, flen) written := written - 1 -- forget the terminating 0. if written > 0 then inspect to_cs.codepoint_size when 1 then from i := 1 j := to_data.lower until i > written loop to_data.put (t.item (i), j) i := i + 1 j := j + 1 end when 2 then from i := 1 j := to_data.lower until i > written loop put_ushort (to_data, j, t.item (i)) j := j + 2 end when 3, 4 then from i := 1 j := to_data.lower until i > written loop put_ulong (to_data, j, t.item (i)) i := i + 1 j := j + 4 end -- loop over i and j else log := logger.log (logger.Log_err, "General", "UNICODESET_CONVERTER", "decode") log.put_string ("trying to decode with nonsupported codepoint size ") log.putint (to_cs.codepoint_size) log.new_line check supported_codepoint_size2 : false end end -- inspect end -- if written > 0 then ... result := written end --------------------------------- decode_and_terminate (dc : DATA_DECODER; to_data : ARRAY [INTEGER]; len : INTEGER) : INTEGER is local f, t : ARRAY [INTEGER] flen : INTEGER tlen : INTEGER i, j : INTEGER ir : INTEGER_REF written : INTEGER log : IO_MEDIUM do flen := from_cs.codepoint_size if flen = 3 then flen := 4 -- alignment ... end flen := len * flen create f.make (1, flen) inspect from_cs.codepoint_size when 1 then from i := 1 until i > flen loop f.put (dc.get_octet.value, i) i := i + 1 end when 2 then from create ir i := 1 until i > flen loop dc.get_ushort (ir) f.put (ir.item, i) i := i + 1 end when 3, 4 then from create ir i := 1 until i > flen loop dc.get_ulong (ir) f.put (ir.item, i) i := i + 1 end else log := logger.log (logger.Log_err, "General", "UNICODESET_CONVERTER", "decode_and_terminate") log.put_string ("trying to decode with nonsupported codepoint size ") log.putint (from_cs.codepoint_size) log.new_line check supported_codepoint_size1 : false end end tlen := to_cs.codepoint_size if tlen = 3 then tlen := 4 -- alignment ... end tlen := len * tlen create t.make (1, tlen) written := convert (f, t, flen) -- this time don't forget the terminating 0. if written > 0 then inspect to_cs.codepoint_size when 1 then from i := 1 j := to_data.lower until i > written loop to_data.put (t.item (i), j) i := i + 1 j := j + 1 end when 2 then from i := 1 j := to_data.lower until i > written loop put_ushort (to_data, j, t.item (i)) i := i + 1 j := j + 2 end when 3, 4 then from i := 1 j := to_data.lower until i > written loop put_ulong (to_data, j, t.item (i)) i := i + 1 j := j + 4 end -- loop over i and j else log := logger.log (logger.Log_err, "General", "UNICODESET_CONVERTER", "decode_and_terminate") log.put_string ("try to decode with nonsupported codepoint size ") log.putint (to_cs.codepoint_size) log.new_line check supported_codepoint_size2 : false end end -- inspect end -- if written > 0 then ... result := written end --------------------------------- feature { NONE } -- Impementation convert (from_data, to_data : ARRAY [INTEGER]; len : INTEGER) : INTEGER is -- First convert `from_data' to UTF8. -- Then convert UTF8 into `to_data'. -- `len' elements are to be converted. require enough_data : from_data.count >= len local chars : INTEGER tlen : INTEGER written : INTEGER fext : ANY text : ANY tmp : ARRAY [INTEGER] return : BOOLEAN do if to_cs.id = C_utf8 then text := to_data.to_c else tlen := 6 * len + 1 -- UTF8 has max 6 code points per char create tmp.make (1, tlen) text := tmp.to_c end -- if to_cs.id = C_utf8 then ... fext := from_data.to_c chars := uni_toUTF8 ($text, $fext, tlen, len, from_cs.id, C_line_lf) if chars < 0 then logger.log (logger.Log_err, "General", "UNICODESET_CONVERTER", "convert").put_string ("codeset conversion error in % %stage 1%N") check could_convert1 : false end result := -1 return := true end -- if chars < 0 then ... if not return and then to_cs.id /= C_utf8 then text := to_data.to_c tlen := to_data.count fext := tmp.to_c written := uni_fromUTF8 ($text, $fext, tlen, chars, to_cs.id, C_line_lf) if written < 0 then logger.log (logger.Log_err, "General", "UNICODESET_CONVERTER", "convert").put_string ("codeset conversion error in % %stage 2%N") check could_convert2 : false end result := -1 return := true else -- success result := written return := true end -- if written < 0 then ... end -- if not return and then to_cs.id /= C_utf8 then ... end --------------------------------- uni_fromUTF8 (t, f : POINTER; tlen, flen, id, ending : INTEGER) : INTEGER is -- `t' and `f' point to (C) arrays of INTEGER. `t' has `tlen' -- elements. `ending' specifies the type of line end used -- (e. g. LF or CRLF). `flen' elements of `f', which are coded -- in UTF8 are to be converted to the codeset specified by -- `id' and stored in `t'. The return value is the number of -- elements placed in `t'; a value < 0 means an error occurred. external "C" alias "MICO_uni_fromUTF8" end --------------------------------- uni_toUTF8 (t, f : POINTER; tlen, flen, id, ending : INTEGER) : INTEGER is -- `t' and `f' point to (C) arrays of INTEGER. `t' has `tlen' -- elements. `id' specifies the code set in which `f' is coded -- and `ending' the type of line end used (e. g. LF or CRLF). -- `flen' elements of `f' are to be converted to UTF8 and placed -- in `t'. The return value is the number of elements placed in `t'; -- a value < 0 means an error occurred. external "C" alias "MICO_uni_toUTF8" end end -- class UNICODESET_CONVERTER ------------------------------------------------------------------------ -- -- -- MICO/E --- a free CORBA implementation -- -- Copyright (C) 1999 by Robert Switzer -- -- -- -- This library is free software; you can redistribute it and/or -- -- modify it under the terms of the GNU Library General Public -- -- License as published by the Free Software Foundation; either -- -- version 2 of the License, or (at your option) any later version. -- -- -- -- This library is distributed in the hope that it will be useful, -- -- but WITHOUT ANY WARRANTY; without even the implied warranty of -- -- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -- -- Library General Public License for more details. -- -- -- -- You should have received a copy of the GNU Library General Public -- -- License along with this library; if not, write to the Free -- -- Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.-- -- -- -- Send comments and/or bug reports to: -- -- micoe@math.uni-goettingen.de -- -- -- ------------------------------------------------------------------------