note
	description: "[
			Converter from/to UTF-8, UTF-16 and UTF-32 encodings.
			
			Handling of invalid encodings
			=============================
			
			Whenever a UTF-8 or UTF-16 sequence is decoded, the decoding routines also check
			that the sequence is valid. If it is not, it will replace the invalid unit (e.g. a byte
			for UTF-8 and a 2-byte for UTF-16 by the replacement character U+FFFD as described by
			variant #3 of the recommended practice for replacement character in Unicode (see
			http://www.unicode.org/review/pr-121.html for more details).
			
			However it means that you cannot roundtrip incorrectly encoded sequence back and forth
			between the encoded version and the decoded STRING_32 version. To allow roundtrip, an
			escaped representation of a bad encoded sequence has been introduced. It is adding a
			a fourth variant (which is a slight modification of variant #3) to the recommended
			practice where the replacement character is followed by the printed hexadecimal value
			of the invalid byte or the invalid 2-byte sequence.
			
			To provide an example (assuming that the Unicode character U+FFFD is represented as
			? textually):
			1 - on UNIX, any invalid UTF-8 byte sequence such as 0x8F 0x8F is encoded as the
			following Unicode sequence: U+FFFD U+0038 U+0046 U+FFFF U+0038 U+0046, and textually
			it looks like "?8F?8F".
			2 - on Windows, any invalid UTF-16 2-byte sequence such as 0xD800 0x0054 is encoded as the
			following Unicode sequence: U+FFFD U+0075 U+0044 U+0038 U+0030 U+0030 U+FFFD U+0035 U+0033,
			and textually it looks like "?uD800?54". The rule is that if the 2-byte sequence does not fit
			into 1 byte, it uses the letter `u' followed by the hexadecimal value of the 2-byte sequence,
			otherwise it simply uses the 1-byte hexadecimal representation.
		]"
	date: "$Date$"
	revision: "$Revision$"

expanded class
	UTF_CONVERTER

feature -- Access

	escape_character: CHARACTER_32 = '%/0xFFFD/'
			-- Unicode replacement character to escape invalid UTF-8 or UTF-16 encoding.
			-- UTF-8 encoding: 0xEF 0xBF 0xBD
			-- Binary UTF-8 encoding: 11101111 10111111 10111101
			-- UTF-16 encoding: 0xFFFD

feature -- Status report

	is_valid_utf_8_string_8 (s: READABLE_STRING_8): BOOLEAN
			-- Is `s' a valid UTF-8 Unicode sequence?
		local
			c: NATURAL_32
			i, nb: INTEGER
		do
			from
				nb := s.count
				Result := True
			until
				i >= nb or not Result
			loop
				i := i + 1
				c := s.code (i)
				if c <= 127 then
						-- Form 0xxxxxxx.
				elseif (c & 0xE0) = 0xC0 and i < nb then
						-- Form 110xxxxx 10xxxxxx.
					i := i + 1
					Result := (s.code (i) & 0xC0) = 0x80
				elseif (c & 0xF0) = 0xE0 and i + 1 < nb then
						-- Form 1110xxxx 10xxxxxx 10xxxxxx.
					i := i + 2
					Result := (s.code (i - 1) & 0xC0) = 0x80 and
						(s.code (i) & 0xC0) = 0x80
				elseif (c & 0xF8) = 0xF0 and i + 2 < nb then
						-- Form 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx.
					i := i + 3
					Result := (s.code (i - 2) & 0xC0) = 0x80 and
						(s.code (i - 1) & 0xC0) = 0x80 and
						(s.code (i) & 0xC0) = 0x80
				else
						-- Anything else is not a valid UTF-8 sequence that would yield a valid Unicode character.
					Result := False
				end
			end
		ensure
			instance_free: class
		end

	is_valid_utf_16le_string_8 (s: READABLE_STRING_8): BOOLEAN
			-- Is `s' a valid UTF-16LE Unicode sequence?
		local
			c1, c2: NATURAL_32
			i, nb: INTEGER
		do
			nb := s.count
				-- If `nb' is not even, then clearly not a valid UTF-16 string.
			if (nb \\ 2) = 0 then
				from
					Result := True
				until
					i >= nb or not Result
				loop
					i := i + 2
					c1 := s.code (i - 1) | (s.code (i) |<< 8)
					if c1 < 0xD800 or c1 >= 0xE000 then
							-- Codepoint from Basic Multilingual Plane: one 16-bit code unit, this is valid Unicode.
					elseif c1 <= 0xDBFF then
						i := i + 2
						if i <= nb then
							c2 := s.code (i - 1) | (s.code (i) |<< 8)
							Result := 0xDC00 <= c2 and c2 <= 0xDFF
						else
								-- Surrogate pair is incomplete, clearly not a valid UTF-16 sequence.
							Result := False
						end
					else
							-- Invalid starting surrogate pair which should be between 0xD800 and 0xDBFF.
						Result := False
					end
				end
			end
		ensure
			instance_free: class
		end

	is_valid_utf_16_subpointer (p: MANAGED_POINTER; start_pos, end_pos: INTEGER; a_stop_at_null: BOOLEAN): BOOLEAN
			-- Is `p' a valid UTF-16 Unicode sequence between code unit `start_pos' and `end_pos'?
			-- If `a_stop_at_null' we stop checking after finding a null character.
		local
			i, n: INTEGER
			c1, c2: NATURAL_32
		do
			if p.count >= 2 and start_pos >= 0 and start_pos <= end_pos + 1 and end_pos < (p.count // 2) then
				from
					i := start_pos * 2
					n := end_pos * 2
					Result := True
				until
					i > n or not Result
				loop
					c1 := p.read_natural_16 (i)
					if c1 = 0 and a_stop_at_null then
							-- We hit our null terminating character, we can stop
						i := n + 1
					else
						if c1 < 0xD800 or c1 >= 0xE000 then
								-- Codepoint from Basic Multilingual Plane: one 16-bit code unit, this is valid Unicode.
							i := i + 1
						elseif c1 <= 0xDBFF then
							i := i + 2
							if i <= n then
								c2 := p.read_natural_16 (i)
								Result := 0xDC00 <= c2 and c2 <= 0xDFF
							else
									-- Surrogate pair is incomplete, clearly not a valid UTF-16 sequence.
								Result := False
							end
						else
								-- Invalid starting surrogate pair which should be between 0xD800 and 0xDBFF.
							Result := False
						end
					end
				end
			end
		ensure
			instance_free: class
		end

	is_valid_utf_16 (s: SPECIAL [NATURAL_16]): BOOLEAN
			-- Is `s' a valid UTF-16 Unicode sequence?
		local
			i, n: INTEGER
			c: NATURAL_16
		do
			from
				i := 0
				n := s.count
				Result := True
			until
				i >= n or not Result
			loop
				c := s.item (i)
				if c < 0xD800 or c >= 0xE000 then
						-- Codepoint from Basic Multilingual Plane: one 16-bit code unit, this is valid Unicode.
				elseif c <= 0xDBFF then
					i := i + 1
					if i < n then
						c := s.item (i)
						Result := 0xDC00 <= c and c <= 0xDFF
					else
							-- Surrogate pair is incomplete, clearly not a valid UTF-16 sequence.
						Result := False
					end
				else
						-- Invalid starting surrogate pair which should be between 0xD800 and 0xDBFF.
					Result := False
				end
				i := i + 1
			end
		ensure
			instance_free: class
		end

feature -- Measurement

	utf_8_bytes_count (s: READABLE_STRING_GENERAL; start_pos, end_pos: INTEGER): INTEGER
			-- Number of bytes necessary to encode in UTF-8 `s.substring (start_pos, end_pos)'.
			-- Note that this feature can be used for both escaped and non-escaped string.
			-- In the case of escaped strings, the result will be possibly higher than really needed.
			-- It does not include the terminating null character.
		require
			start_position_big_enough: start_pos >= 1
			end_position_big_enough: start_pos <= end_pos + 1
			end_pos_small_enough: end_pos <= s.count
		local
			i: INTEGER
			c: NATURAL_32
		do
			from
				i := start_pos
			until
				i > end_pos
			loop
				c := s.code (i)
				if c <= 0x7F then
						-- 0xxxxxxx.
					Result := Result + 1
				elseif c <= 0x7FF then
						-- 110xxxxx 10xxxxxx
					Result := Result + 2
				elseif c <= 0xFFFF then
						-- 1110xxxx 10xxxxxx 10xxxxxx
					Result := Result + 3
				else
						-- c <= 1FFFFF - there are no higher code points
						-- 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
					Result := Result + 4
				end
				i := i + 1
			end
		ensure
			instance_free: class
		end

	utf_16_characters_count_form_pointer (m: MANAGED_POINTER; start_pos, end_pos: INTEGER): INTEGER
			-- Number of characters of the UTF-16 encoded `m' starting at `start_pos' in `m' up to `end_pos - 1'.
			-- It does not include the terminating null character.
		require
			start_position_big_enough: start_pos >= 0
			end_position: start_pos <= end_pos + 2
			end_pos_small_enought: end_pos < m.count
			even_start_position: start_pos \\ 2 = 0
			even_end_position: end_pos \\ 2 = 0
		local
			i, n: INTEGER
			c: NATURAL_32
		do
			from
				i := start_pos
				n := end_pos
			until
				i >= end_pos
			loop
				c := m.read_natural_16 (i)
				if c < 0xD800 or c >= 0xE000 then
						-- Codepoint from Basic Multilingual Plane: one 16-bit code unit.
					i := i + 2
				elseif i <= n then
						-- Supplementary Planes: surrogate pair with lead and trail surrogates.
					i := i + 4
				end
				Result := Result + 1
			end
		ensure
			instance_free: class
		end

	utf_16_bytes_count (s: READABLE_STRING_GENERAL; start_pos, end_pos: INTEGER): INTEGER
			-- Number of bytes necessary at the very least to encode in UTF-16 `s.substring (start_pos, end_pos)'.
			-- Note that this feature can be used for both escaped and non-escaped string.
			-- In the case of escaped strings, the result will be possibly higher than really needed.
			-- It does not include the terminating null character.
		require
			start_position_big_enough: start_pos >= 1
			end_position_big_enough: start_pos <= end_pos + 1
			end_pos_small_enough: end_pos <= s.count
		local
			i: INTEGER
			c: NATURAL_32
		do
			from
				i := start_pos
			until
				i > end_pos
			loop
				c := s.code (i)
				if c <= 0xFFFF then
						-- Code point from Basic Multilingual Plane: one 16-bit code unit.
					Result := Result + 2
				else
					Result := Result + 4
				end
				i := i + 1
			end
		ensure
			instance_free: class
		end

	utf_8_to_string_32_count (s: SPECIAL [CHARACTER]; start_pos, end_pos: INTEGER): INTEGER
			-- Count of characters corresponding to UTF-8 sequence `s'.
		require
			start_position_big_enough: start_pos >= 0
			end_position_big_enough: start_pos <= end_pos + 1
			end_pos_small_enough: end_pos < s.count
		local
			i: INTEGER
			n: INTEGER
			c: INTEGER
		do
			from
				i := start_pos
				n := end_pos
			until
				i > n
			loop
				c := s [i].code
				if c <= 0x7F then
						-- 0xxxxxxx
					i := i + 1
					Result := Result + 1
				elseif c <= 0xDF then
						-- 110xxxxx 10xxxxxx
					i := i + 2
					if i <= n then
						Result := Result + 1
					end
				elseif c <= 0xEF then
						-- 1110xxxx 10xxxxxx 10xxxxxx
					i := i + 3
					if i <= n then
						Result := Result + 1
					end
				elseif c <= 0xF7 then
						-- 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
					i := i + 4
					if i <= n then
						Result := Result + 1
					end
				end
			end
		ensure
			instance_free: class
		end

feature -- UTF-32 to UTF-8

	string_32_to_utf_8_string_8 (s: READABLE_STRING_32): STRING_8
			-- UTF-8 sequence corresponding to `s'.
		do
			Result := utf_32_string_to_utf_8_string_8 (s)
		ensure
			instance_free: class
			roundtrip: utf_8_string_8_to_string_32 (Result).same_string (s)
		end

	string_32_into_utf_8_string_8 (s: READABLE_STRING_32; a_result: STRING_8)
			-- Copy the UTF-8 sequence corresponding to `s' appended into `a_result'.
		do
			utf_32_string_into_utf_8_string_8 (s, a_result)
		ensure
			instance_free: class
			roundtrip: utf_8_string_8_to_string_32 (a_result.substring (old a_result.count + 1, a_result.count)).same_string (s)
		end

	utf_32_string_to_utf_8_string_8 (s: READABLE_STRING_GENERAL): STRING_8
			-- UTF-8 sequence corresponding to `s' interpreted as a UTF-32 sequence.
		do
			create Result.make (s.count)
			utf_32_string_into_utf_8_string_8 (s, Result)
		ensure
			instance_free: class
			roundtrip: utf_8_string_8_to_string_32 (Result).same_string_general (s)
		end

	utf_32_string_into_utf_8_string_8 (s: READABLE_STRING_GENERAL; a_result: STRING_8)
			-- Copy the UTF-8 sequence corresponding to `s' interpreted as a UTF-32 sequence
			-- appended into `a_result'.
		local
			i: like {STRING_32}.count
			n: like {STRING_32}.count
		do
			from
				n := s.count
				a_result.grow (a_result.count + n)
			until
				i >= n
			loop
				i := i + 1
				utf_32_code_into_utf_8_string_8 (s.code (i), a_result)
			end
		ensure
			instance_free: class
			roundtrip: utf_8_string_8_to_string_32 (a_result.substring (old a_result.count + 1, a_result.count)).same_string_general (s)
		end

	utf_32_code_into_utf_8_string_8 (c: NATURAL_32; a_result: STRING_8)
			-- Copy the UTF-8 sequence corresponding to code `c' appended into `a_result'.
		do
			if c <= 0x7F then
					-- 0xxxxxxx
				a_result.extend (c.to_character_8)
			elseif c <= 0x7FF then
					-- 110xxxxx 10xxxxxx
				a_result.extend (((c |>> 6) | 0xC0).to_character_8)
				a_result.extend (((c & 0x3F) | 0x80).to_character_8)
			elseif c <= 0xFFFF then
					-- 1110xxxx 10xxxxxx 10xxxxxx
				a_result.extend (((c |>> 12) | 0xE0).to_character_8)
				a_result.extend ((((c |>> 6) & 0x3F) | 0x80).to_character_8)
				a_result.extend (((c & 0x3F) | 0x80).to_character_8)
			else
					-- c <= 1FFFFF - there are no higher code points
					-- 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
				a_result.extend (((c |>> 18) | 0xF0).to_character_8)
				a_result.extend ((((c |>> 12) & 0x3F) | 0x80).to_character_8)
				a_result.extend ((((c |>> 6) & 0x3F) | 0x80).to_character_8)
				a_result.extend (((c & 0x3F) | 0x80).to_character_8)
			end
		ensure
			instance_free: class
		end

	escaped_utf_32_substring_into_utf_8_0_pointer (
			s: READABLE_STRING_GENERAL; start_pos, end_pos: INTEGER; p: MANAGED_POINTER;
			p_offset: INTEGER; a_new_upper: detachable CELL [INTEGER]
		)
			-- Write UTF-8 sequence corresponding to `s', interpreted as a UTF-32 sequence that could
			-- be escaped, with terminating zero to address `p + p_offset' and update the size of `p' to the
			-- number of written bytes.
			-- If `a_new_upper' is provided, the upper index of `p' containing the zero-termination
			-- is written to `a_new_upper'.
			-- The sequence is zero-terminated.
			-- If `s' contains the `escape_character' followed by either "HH" or "uHHHH" where H stands
			-- for an hexadecimal digit, then `s' has been escaped and will be converted to what is
			-- expected by the current platform.
			-- Otherwise it will be ignored and it will be left as is.
			-- See the note clause for the class for more details on the encoding.
		require
			start_position_big_enough: start_pos >= 1
			end_position_big_enough: start_pos <= end_pos + 1
			end_pos_small_enough: end_pos <= s.count
			p_offset_non_negative: p_offset >= 0
		local
			i, n, m, l_count: INTEGER
			c: NATURAL_32
			l_encoded_value: READABLE_STRING_GENERAL
			l_decoded, l_resized: BOOLEAN
		do
				-- Basic assumptions that there will be only one-byte code units.
			n := end_pos - start_pos + 1
			l_count := p.count
				-- Check that there is at least `n' bytes available plus the terminating null character.
			if l_count - p_offset < (n + 1) then
					-- Optimize resizing, once we have to resize, we actually perform the resizing
					-- only once.
				l_count := p_offset + utf_8_bytes_count (s, start_pos, end_pos) + 1
				p.resize (l_count)
				l_resized := True
			end

			from
				m := p_offset
				i := start_pos - 1
			until
				i >= end_pos
			loop
				i := i + 1
				c := s.code (i)

				if c = escape_character.natural_32_code then
						-- We might be facing a character that was escaped.
						-- In the Unix case, we only accept the 1-byte encoded format.
					if i < n and then s.item (i + 1) = escape_character then
							-- The `escape_character' was escaped, it meant they really wanted an `escape_character'.
						i := i + 1
					elseif i + 1 < n then
							-- We have at least 2 characters to read, make sure they represent an hexadecimal
							-- value.
						l_encoded_value := s.substring (i + 1, i + 2)
						if is_hexa_decimal (l_encoded_value) then
							c := to_natural_32 (l_encoded_value)
							if c <= 0x7F then
									-- Value was encoded when it should not have been
									-- do nothing, we leave the original content as is.
								c := escape_character.natural_32_code
							else
								l_decoded := True
								i := i + 2
							end
						else
								-- Not an hexadecimal value, it was not escaped.
						end
					else
							-- Not enough to read to make it valid, it was not escaped.
					end
				end

				if not l_decoded then
					if c <= 0x7F then
							-- 0xxxxxxx
						p.put_natural_8 (c.to_natural_8, m)
						m := m + 1
					else
							-- Make sure there is sufficient room for all the remaining characters and
							-- at least 5 bytes, i.e. 4 bytes for the maximum UTF-8 encoding,
							-- and one byte for the terminating null character. Note that we do not
							-- take into account `p_offset' because `m' already includes it.
							-- Note that `end_pos - i' represents the number of remaining characters
							-- to process in the current string.
						if not l_resized and then (m + 5 + (end_pos - i) > l_count) then
								-- Optimize resizing, once we have to resize, we actually perform the resizing
								-- only once.
							l_count := m + utf_8_bytes_count (s, i, end_pos) + 1
							p.resize (l_count)
							l_resized := True
						end
						if c <= 0x7FF then
								-- 110xxxxx 10xxxxxx
							p.put_natural_8 (((c |>> 6) | 0xC0).to_natural_8, m)
							p.put_natural_8 (((c & 0x3F) | 0x80).to_natural_8, m + 1)
							m := m + 2
						elseif c <= 0xFFFF then
								-- 1110xxxx 10xxxxxx 10xxxxxx
							p.put_natural_8 (((c |>> 12) | 0xE0).to_natural_8, m)
							p.put_natural_8 ((((c |>> 6) & 0x3F) | 0x80).to_natural_8, m + 1)
							p.put_natural_8 (((c & 0x3F) | 0x80).to_natural_8, m + 2)
							m := m + 3
						else
								-- c <= 1FFFFF - there are no higher code points
								-- 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
							p.put_natural_8 (((c |>> 18) | 0xF0).to_natural_8, m)
							p.put_natural_8 ((((c |>> 12) & 0x3F) | 0x80).to_natural_8, m + 1)
							p.put_natural_8 ((((c |>> 6) & 0x3F) | 0x80).to_natural_8, m + 2)
							p.put_natural_8 (((c & 0x3F) | 0x80).to_natural_8, m + 3)
							m := m + 4
						end
					end
				else
					l_decoded := False
						-- Simply put decoded value directly in stream.
					p.put_natural_8 (c.to_natural_8, m)
					m := m + 1
				end
			end
			if l_resized then
					-- `p' was resized so we adjust it to accommodate up to the terminating null character.
				p.resize (m + 1)
			end
			p.put_natural_8 (0, m)
			if a_new_upper /= Void then
				a_new_upper.put (m)
			end
		ensure
			instance_free: class
			roundtrip: a_new_upper /= Void implies utf_8_0_subpointer_to_escaped_string_32 (p, p_offset, a_new_upper.item - 1, False).same_string_general (s.substring (start_pos, end_pos))
			roundtrip: (a_new_upper = Void and then not s.substring (start_pos, end_pos).has ('%U')) implies
				utf_8_0_subpointer_to_escaped_string_32 (p, p_offset, p.count, True).same_string_general (s.substring (start_pos, end_pos))
		end

	escaped_utf_32_string_to_utf_8_string_8 (s: READABLE_STRING_GENERAL): STRING_8
			-- UTF-8 sequence corresponding to `s' interpreted as a UTF-32 sequence that could be escaped.
			-- If `s' contains the `escape_character' followed by either "HH" or "uHHHH" where H stands
			-- for an hexadecimal digit, then `s' has been escaped and will be converted to what is
			-- expected by the current platform.
			-- Otherwise it will be ignored and it will be left as is.
			-- See the note clause for the class for more details on the encoding.
		do
			create Result.make (s.count)
			escaped_utf_32_string_into_utf_8_string_8 (s, Result)
		ensure
			instance_free: class
			roundtrip: utf_8_string_8_to_escaped_string_32 (Result).same_string_general (s)
		end

	escaped_utf_32_string_into_utf_8_string_8 (s: READABLE_STRING_GENERAL; a_result: STRING_8)
			-- Copy the UTF-8 sequence corresponding to `s' interpreted as a UTF-32 sequence that could
			-- be escaped appended into `a_result'.
			-- If `s' contains the `escape_character' followed by either "HH" or "uHHHH" where H stands
			-- for an hexadecimal digit, then `s' has been escaped and will be converted to what is
			-- expected by the current platform.
			-- Otherwise it will be ignored and it will be left as is.
			-- See the note clause for the class for more details on the encoding.
		local
			i: like {STRING_32}.count
			n: like {STRING_32}.count
			c: NATURAL_32
			l_encoded_value: READABLE_STRING_GENERAL
			l_decoded: BOOLEAN
		do
			from
				n := s.count
				a_result.grow (a_result.count + n)
			until
				i >= n
			loop
				i := i + 1
				c := s.code (i)

				if c = escape_character.natural_32_code then
						-- We might be facing a character that was escaped.
						-- In the Unix case, we only accept the 1-byte encoded format.
					if i < n and then s.item (i + 1) = escape_character then
							-- The `escape_character' was escaped, it meant they really wanted an `escape_character'.
						i := i + 1
					elseif i + 1 < n then
							-- We have at least 2 characters to read, make sure they represent an hexadecimal
							-- value.
						l_encoded_value := s.substring (i + 1, i + 2)
						if is_hexa_decimal (l_encoded_value) then
							c := to_natural_32 (l_encoded_value)
							if c <= 0x7F then
									-- Value was encoded when it should not have been
									-- do nothing, we leave the original content as is.
								c := escape_character.natural_32_code
							else
								l_decoded := True
								i := i + 2
							end
						else
								-- Not an hexadecimal value, it was not escaped.
						end
					else
							-- Not enough to read to make it valid, it was not escaped.
					end
				end

				if not l_decoded then
					if c <= 0x7F then
							-- 0xxxxxxx
						a_result.extend (c.to_character_8)
					elseif c <= 0x7FF then
							-- 110xxxxx 10xxxxxx
						a_result.extend (((c |>> 6) | 0xC0).to_character_8)
						a_result.extend (((c & 0x3F) | 0x80).to_character_8)
					elseif c <= 0xFFFF then
							-- 1110xxxx 10xxxxxx 10xxxxxx
						a_result.extend (((c |>> 12) | 0xE0).to_character_8)
						a_result.extend ((((c |>> 6) & 0x3F) | 0x80).to_character_8)
						a_result.extend (((c & 0x3F) | 0x80).to_character_8)
					else
							-- c <= 1FFFFF - there are no higher code points
							-- 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
						a_result.extend (((c |>> 18) | 0xF0).to_character_8)
						a_result.extend ((((c |>> 12) & 0x3F) | 0x80).to_character_8)
						a_result.extend ((((c |>> 6) & 0x3F) | 0x80).to_character_8)
						a_result.extend (((c & 0x3F) | 0x80).to_character_8)
					end
				else
					l_decoded := False
						-- Simply put decoded value directly in stream.
					a_result.extend (c.to_character_8)
				end
			end
		ensure
			instance_free: class
			roundtrip: utf_8_string_8_to_escaped_string_32 (a_result.substring (old a_result.count + 1, a_result.count)).same_string_general (s)
		end

	string_32_into_utf_8_0_pointer (s: READABLE_STRING_32; p: MANAGED_POINTER; p_offset: INTEGER; a_new_upper: detachable CELL [INTEGER])
			-- Write UTF-8 sequence corresponding to `s' with terminating zero
			-- to address `p + p_offset' and update the size of `p' to the number of written bytes.
			-- If `a_new_upper' is provided, the upper index of `p' containing the zero-termination
			-- is written to `a_new_upper'.
			-- The sequence is zero-terminated.
		require
			p_offset_non_negative: p_offset >= 0
		do
			utf_32_string_into_utf_8_0_pointer (s, p, p_offset, a_new_upper)
		ensure
			instance_free: class
			roundtrip: a_new_upper /= Void implies utf_8_0_subpointer_to_escaped_string_32 (p, p_offset, a_new_upper.item - 1, False).same_string (s)
			roundtrip: (a_new_upper = Void and then not s.has ('%U')) implies
				utf_8_0_subpointer_to_escaped_string_32 (p, p_offset, p.count, True).same_string_general (s)
		end

	utf_32_string_into_utf_8_0_pointer (s: READABLE_STRING_GENERAL; p: MANAGED_POINTER; p_offset: INTEGER; a_new_upper: detachable CELL [INTEGER])
			-- Write UTF-8 sequence corresponding to `s', interpreted as a UTF-32 sequence,
			-- with terminating zero to address `p + p_offset' and update the size of `p' to the
			-- number of written bytes.
			-- If `a_new_upper' is provided, the upper index of `p' containing the zero-termination
			-- is written to `a_new_upper'.
			-- The sequence is zero-terminated.
		require
			p_offset_non_negative: p_offset >= 0
		local
			m: INTEGER
			i, n, l_count: INTEGER
			c: NATURAL_32
			l_resized: BOOLEAN
		do
				-- Basic assumptions that there will be only one-byte code units.
			n := s.count
			l_count := p.count
				-- Check that there is at least `n' bytes available plus the terminating null character.
			if l_count - p_offset < (n + 1) then
					-- Optimize resizing, once we have to resize, we actually perform the resizing
					-- only once.
				l_count := p_offset + utf_8_bytes_count (s, 1, n) + 1
				p.resize (l_count)
				l_resized := True
			end

				-- Fill `p' with the converted data.
			from
				i := 0
				m := p_offset
			until
				i >= n
			loop
				i := i + 1
				c := s.code (i)
				if c <= 0x7F then
						-- 0xxxxxxx.
					p.put_natural_8 (c.to_natural_8, m)
					m := m + 1
				else
						-- Make sure there is sufficient room for all the remaining characters and
						-- at least 5 bytes, i.e. 4 bytes for the maximum UTF-8 encoding,
						-- and one byte for the terminating null character. Note that we do not
						-- take into account `p_offset' because `m' already includes it.
						-- Note that `n - i' represents the number of remaining characters
						-- to process in the current string.
					if not l_resized and then (m + 5 + (n - i) > l_count) then
							-- Optimize resizing, once we have to resize, we actually perform the resizing
							-- only once.
						l_count := m + utf_8_bytes_count (s, i, n) + 1
						p.resize (l_count)
						l_resized := True
					end

					if c <= 0x7FF then
							-- 110xxxxx 10xxxxxx.
						p.put_natural_8 (((c |>> 6) | 0xC0).to_natural_8, m)
						p.put_natural_8 (((c & 0x3F) | 0x80).to_natural_8, m + 1)
						m := m + 2
					elseif c <= 0xFFFF then
							-- 1110xxxx 10xxxxxx 10xxxxxx
						p.put_natural_8 (((c |>> 12) | 0xE0).to_natural_8, m)
						p.put_natural_8 ((((c |>> 6) & 0x3F) | 0x80).to_natural_8, m + 1)
						p.put_natural_8 (((c & 0x3F) | 0x80).to_natural_8, m + 2)
						m := m + 3
					else
							-- c <= 1FFFFF - there are no higher code points
							-- 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
						p.put_natural_8 (((c |>> 18) | 0xF0).to_natural_8, m)
						p.put_natural_8 ((((c |>> 12) & 0x3F) | 0x80).to_natural_8, m + 1)
						p.put_natural_8 ((((c |>> 6) & 0x3F) | 0x80).to_natural_8, m + 2)
						p.put_natural_8 (((c & 0x3F) | 0x80).to_natural_8, m + 3)
						m := m + 4
					end
				end
			end
			if l_resized then
					-- `p' was resized so we adjust it to accommodate up to the terminating null character.
				p.resize (m + 1)
			end
			p.put_natural_8 (0, m)
			if a_new_upper /= Void then
				a_new_upper.put (m)
			end
		ensure
			instance_free: class
			roundtrip: a_new_upper /= Void implies utf_8_0_subpointer_to_escaped_string_32 (p, p_offset, a_new_upper.item - 1, False).same_string_general (s)
			roundtrip: (a_new_upper = Void and then not s.has ('%U')) implies
				utf_8_0_subpointer_to_escaped_string_32 (p, p_offset, p.count, True).same_string_general (s)
		end

	utf_32_string_to_utf_8 (s: READABLE_STRING_GENERAL): SPECIAL [NATURAL_8]
			-- UTF-8 sequence corresponding to `s', interpreted as a UTF-32 sequence.
			-- The sequence is not zero-terminated.
		do
			Result := utf_32_string_to_utf_8_0 (s)
			Result := Result.aliased_resized_area_with_default (0, Result.count - 1)
		ensure
			instance_free: class
			roundtrip: attached utf_32_string_to_utf_8_string_8 (s) as l_ref and then
				∀ n: Result ¦ n = l_ref.code (@ n.target_index + 1)
		end

	utf_32_string_to_utf_8_0 (s: READABLE_STRING_GENERAL): SPECIAL [NATURAL_8]
			-- UTF-8 sequence corresponding to `s', interpreted as a UTF-32 sequence.
			-- The sequence is zero-terminated.
		local
			m: INTEGER
			i, n: like {STRING_32}.count
			c: NATURAL_32
		do
			n := s.count

				-- First compute how many bytes we need to convert `s' to UTF-8.
			m := utf_8_bytes_count (s, 1, n)

				-- Fill `Result' with the converted data.
			from
				create Result.make_filled (0, m + 1)
				i := 0
				m := 0
			until
				i >= n
			loop
				i := i + 1
				c := s.code (i)
				if c <= 0x7F then
						-- 0xxxxxxx.
					Result.put (c.to_natural_8, m)
					m := m + 1
				elseif c <= 0x7FF then
						-- 110xxxxx 10xxxxxx.
					Result.put (((c |>> 6) | 0xC0).to_natural_8, m)
					Result.put (((c & 0x3F) | 0x80).to_natural_8, m + 1)
					m := m + 2
				elseif c <= 0xFFFF then
						-- 1110xxxx 10xxxxxx 10xxxxxx
					Result.put (((c |>> 12) | 0xE0).to_natural_8, m)
					Result.put ((((c |>> 6) & 0x3F) | 0x80).to_natural_8, m + 1)
					Result.put (((c & 0x3F) | 0x80).to_natural_8, m + 2)
					m := m + 3
				else
						-- c <= 1FFFFF - there are no higher code points
						-- 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
					Result.put (((c |>> 18) | 0xF0).to_natural_8, m)
					Result.put ((((c |>> 12) & 0x3F) | 0x80).to_natural_8, m + 1)
					Result.put ((((c |>> 6) & 0x3F) | 0x80).to_natural_8, m + 2)
					Result.put (((c & 0x3F) | 0x80).to_natural_8, m + 3)
					m := m + 4
				end
			end
			Result.put (0, m)
		ensure
			instance_free: class
			attached_utf_8_string: attached utf_32_string_to_utf_8_string_8 (s) as l_ref
			count: Result.count = l_ref.count + 1
			roundtrip: ∀ x: l_ref ¦ x = Result [@ x.target_index - 1].to_character_8
			zero_terminated: Result [Result.upper] = 0
		end

feature -- UTF-8 to UTF-32

	utf_8_0_pointer_to_escaped_string_32 (p: MANAGED_POINTER): STRING_32
			-- {STRING_32} object corresponding to UTF-8 sequence `p' which is zero-terminated,
			-- where invalid UTF-8 sequences are escaped.
		do
				-- Allocate Result with the same number of bytes as `p'.
			create Result.make (p.count)
			utf_8_0_pointer_into_escaped_string_32 (p, Result)
		ensure
			instance_free: class
			roundtrip: attached escaped_utf_32_string_to_utf_8_string_8 (Result) as s and then
				∀ c: s ¦ c = p.read_natural_8 (@ c.target_index - 1).to_character_8
		end

	utf_8_0_pointer_into_escaped_string_32 (p: MANAGED_POINTER; a_result: STRING_32)
			-- Copy {STRING_32} object corresponding to UTF-8 sequence `p' which is zero-terminated,
			-- where invalid UTF-8 sequences are escaped, appended into `a_result'.
		do
			utf_8_0_subpointer_into_escaped_string_32 (p, 0, p.count - 1, True, a_result)
		ensure
			instance_free: class
			roundtrip: attached escaped_utf_32_string_to_utf_8_string_8 (a_result.substring (old a_result.count + 1, a_result.count)) as s and then
				∀ c: s ¦ c = p.read_natural_8 (@ c.target_index - 1).to_character_8
		end

	utf_8_0_subpointer_to_escaped_string_32 (p: MANAGED_POINTER; start_pos, end_pos: INTEGER; a_stop_at_null: BOOLEAN): STRING_32
			-- {STRING_32} object corresponding to UTF-8 sequence `p' between indexes `start_pos' and
			-- `end_pos' or the first null character encountered if `a_stop_at_null', where invalid
			-- UTF-8 sequences are escaped.
		require
			start_position_big_enough: start_pos >= 0
			end_position_big_enough: start_pos <= end_pos + 1
			end_pos_small_enough: end_pos < p.count
		do
				-- Allocate Result with the same number of bytes as `p'.
			create Result.make (p.count)
			utf_8_0_subpointer_into_escaped_string_32 (p, start_pos, end_pos, a_stop_at_null, Result)
		ensure
			instance_free: class
			roundtrip: attached escaped_utf_32_string_to_utf_8_string_8 (Result) as s and then
				∀ c: s ¦ c = p.read_natural_8 (start_pos + @ c.target_index - 1).to_character_8
		end

	utf_8_0_subpointer_into_escaped_string_32 (p: MANAGED_POINTER; start_pos, end_pos: INTEGER; a_stop_at_null: BOOLEAN; a_result: STRING_32)
			-- Copy {STRING_32} object corresponding to UTF-8 sequence `p' between indexes `start_pos' and
			-- `end_pos' or the first null character encountered if `a_stop_at_null', where invalid
			-- UTF-8 sequences are escaped, appended into `a_result'.
		require
			start_position_big_enough: start_pos >= 0
			end_position_big_enough: start_pos <= end_pos + 1
			end_pos_small_enough: end_pos < p.count
		local
			i: like {STRING_8}.count
			c1, c2, c3, c4: NATURAL_8
			l_last_char: CHARACTER_32
		do
			from
				a_result.grow (a_result.count + end_pos - start_pos + 1)
				i := start_pos
			until
				i > end_pos
			loop
				c1 := p.read_natural_8 (i)
				if c1 = 0 and a_stop_at_null then
						-- We hit our null terminating character, we can stop
					i := end_pos + 1
				elseif c1 <= 0x7F then
						-- 0xxxxxxx
					a_result.extend (c1.to_character_32)
					i := i + 1
				elseif (c1 & 0xE0) = 0xC0 then
					if i < end_pos then
						c2 := p.read_natural_8 (i + 1)
						if (c2 & 0xC0) = 0x80 then
								-- Valid UTF-8 sequence:
								-- 110xxxxx 10xxxxxx
							a_result.extend ((
								((c1.as_natural_32 & 0x1F) |<< 6) |
								(c2.as_natural_32 & 0x3F)
							).to_character_32)
							i := i + 2
						else
								-- Invalid UTF-8 sequence, we escape the first byte
								-- and try with the next one to see if it is the starting
								-- byte of a valid UTF-8 sequence.
							escape_code_into (a_result, c1)
							i := i + 1
						end
					else
							-- Invalid UTF-8 sequence, we escape the first byte.
						escape_code_into (a_result, c1)
						i := i + 1
					end
				elseif (c1 & 0xF0) = 0xE0 then
					if i + 1 < end_pos then
						c2 := p.read_natural_8 (i + 1)
						c3 := p.read_natural_8 (i + 2)
						if (c2 & 0xC0) = 0x80 and (c3 & 0xC0) = 0x80 then
								-- Valid UTF-8 sequence:
								-- 1110xxxx 10xxxxxx 10xxxxxx
							l_last_char := (((c1.as_natural_32 & 0xF) |<< 12) |
									((c2.as_natural_32 & 0x3F) |<< 6) |
									(c3.as_natural_32 & 0x3F)
								).to_character_32
							a_result.extend (l_last_char)
							i := i + 3
						else
								-- Invalid UTF-8 sequence, we escape the first byte
								-- and try with the next one to see if it is the starting
								-- byte of a valid UTF-8 sequence.
							escape_code_into (a_result, c1)
							i := i + 1
						end
					else
							-- Invalid UTF-8 sequence.
						escape_code_into (a_result, c1)
						i := i + 1
					end
				elseif (c1 & 0xF8) = 0xF0 then
					if i + 2 < end_pos then
						c2 := p.read_natural_8 (i + 1)
						c3 := p.read_natural_8 (i + 2)
						c4 := p.read_natural_8 (i + 3)
						if (c2 & 0xC0) = 0x80 and (c3 & 0xC0) = 0x80 and (c4 & 0xC0) = 0x80 then
								-- Valid UTF-8 sequence:
								-- 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
							a_result.extend ((
								((c1.as_natural_32 & 0x7) |<< 18) |
								((c2.as_natural_32 & 0x3F) |<< 12) |
								((c3.as_natural_32 & 0x3F) |<< 6) |
								(c4.as_natural_32 & 0x3F)
							).to_character_32)
							i := i + 4
						else
								-- Invalid UTF-8 sequence, we escape the first byte
								-- and try with the next one to see if it is the starting
								-- byte of a valid UTF-8 sequence.
							escape_code_into (a_result, c1)
							i := i + 1
						end
					else
							-- Invalid UTF-8 sequence.
						escape_code_into (a_result, c1)
						i := i + 1
					end

				else
						-- Clearly invalid UTF-8
					escape_code_into (a_result, c1)
					i := i + 1
				end
			end
		ensure
			instance_free: class
			roundtrip: attached escaped_utf_32_string_to_utf_8_string_8 (a_result.substring (old a_result.count + 1, a_result.count)) as s and then
				∀ c: s ¦ c = p.read_natural_8 (start_pos + @ c.target_index - 1).to_character_8
		end

	utf_8_string_8_to_string_32 (s: READABLE_STRING_8): STRING_32
			-- STRING_32 corresponding to UTF-8 sequence `s'.
		do
			create Result.make (s.count)
			utf_8_string_8_into_string_32 (s, Result)
		ensure
			instance_free: class
			roundtrip: is_valid_utf_8_string_8 (s) implies utf_32_string_to_utf_8_string_8 (Result).same_string (s)
		end

	utf_8_string_8_into_string_32 (s: READABLE_STRING_8; a_result: STRING_32)
			-- Copy STRING_32 corresponding to UTF-8 sequence `s' appended into `a_result'.
		local
			i: like {STRING_8}.count
			n: like {STRING_8}.count
			c: NATURAL_32
		do
			from
				n := s.count
				a_result.grow (a_result.count + n)
			until
				i >= n
			loop
				i := i + 1
				c := s.code (i)
				if c <= 0x7F then
						-- 0xxxxxxx
					a_result.extend (c.to_character_32)
				elseif c <= 0xDF then
						-- 110xxxxx 10xxxxxx
					i := i + 1
					if i <= n then
						a_result.extend ((
							((c & 0x1F) |<< 6) |
							(s.code (i) & 0x3F)
						).to_character_32)
					end
				elseif c <= 0xEF then
						-- 1110xxxx 10xxxxxx 10xxxxxx
					i := i + 2
					if i <= n then
						a_result.extend ((
							((c & 0xF) |<< 12) |
							((s.code (i - 1) & 0x3F) |<< 6) |
							(s.code (i) & 0x3F)
						).to_character_32)
					end
				elseif c <= 0xF7 then
						-- 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
					i := i + 3
					if i <= n then
						a_result.extend ((
							((c & 0x7) |<< 18) |
							((s.code (i - 2) & 0x3F) |<< 12) |
							((s.code (i - 1) & 0x3F) |<< 6) |
							(s.code (i) & 0x3F)
						).to_character_32)
					end
				end
			end
		ensure
			instance_free: class
			roundtrip: is_valid_utf_8_string_8 (s) implies utf_32_string_to_utf_8_string_8 (a_result.substring (old a_result.count + 1, a_result.count)).same_string (s)
		end

	utf_8_string_8_to_escaped_string_32 (s: READABLE_STRING_8): STRING_32
			-- STRING_32 corresponding to UTF-8 sequence `s', where invalid UTF-8 sequences are escaped.
		do
			create Result.make (s.count)
			utf_8_string_8_into_escaped_string_32 (s, Result)
		ensure
			instance_free: class
			roundtrip: escaped_utf_32_string_to_utf_8_string_8 (Result).same_string (s)
		end

	utf_8_string_8_into_escaped_string_32 (s: READABLE_STRING_8; a_result: STRING_32)
			-- Copy STRING_32 corresponding to UTF-8 sequence `s', where invalid UTF-8 sequences are escaped,
			-- appended into `a_result'.
		local
			i: like {STRING_8}.count
			n: like {STRING_8}.count
			c1, c2, c3, c4: NATURAL_8
			l_last_char: CHARACTER_32
		do
			from
				n := s.count
				a_result.grow (a_result.count + n)
			until
				i >= n
			loop
				i := i + 1
				c1 := s.code (i).as_natural_8
				if c1 <= 0x7F then
						-- 0xxxxxxx
					a_result.extend (c1.to_character_32)
				elseif (c1 & 0xE0) = 0xC0 then
					if i < n then
						c2 := s.code (i + 1).as_natural_8
						if (c2 & 0xC0) = 0x80 then
								-- Valid UTF-8 sequence:
								-- 110xxxxx 10xxxxxx
							a_result.extend ((
								((c1.as_natural_32 & 0x1F) |<< 6) |
								(c2.as_natural_32 & 0x3F)
							).to_character_32)
							i := i + 1
						else
								-- Invalid UTF-8 sequence, we escape the first byte
								-- and try with the next one to see if it is the starting
								-- byte of a valid UTF-8 sequence.
							escape_code_into (a_result, c1)
						end
					else
							-- Invalid UTF-8 sequence, we escape the first byte.
						escape_code_into (a_result, c1)
					end
				elseif (c1 & 0xF0) = 0xE0 then
					if i + 1 < n then
						c2 := s.code (i + 1).as_natural_8
						c3 := s.code (i + 2).as_natural_8
						if (c2 & 0xC0) = 0x80 and (c3 & 0xC0) = 0x80 then
								-- Valid UTF-8 sequence:
								-- 1110xxxx 10xxxxxx 10xxxxxx
							l_last_char := (((c1.as_natural_32 & 0xF) |<< 12) |
									((c2.as_natural_32 & 0x3F) |<< 6) |
									(c3.as_natural_32 & 0x3F)
								).to_character_32
							a_result.extend (l_last_char)
							i := i + 2
						else
								-- Invalid UTF-8 sequence, we escape the first byte
								-- and try with the next one to see if it is the starting
								-- byte of a valid UTF-8 sequence.
							escape_code_into (a_result, c1)
						end
					else
							-- Invalid UTF-8 sequence.
						escape_code_into (a_result, c1)
					end
				elseif (c1 & 0xF8) = 0xF0 then
					if i + 2 < n then
						c2 := s.code (i + 1).as_natural_8
						c3 := s.code (i + 2).as_natural_8
						c4 := s.code (i + 3).as_natural_8
						if (c2 & 0xC0) = 0x80 and (c3 & 0xC0) = 0x80 and (c4 & 0xC0) = 0x80 then
								-- Valid UTF-8 sequence:
								-- 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
							a_result.extend ((
								((c1.as_natural_32 & 0x7) |<< 18) |
								((c2.as_natural_32 & 0x3F) |<< 12) |
								((c3.as_natural_32 & 0x3F) |<< 6) |
								(c4.as_natural_32 & 0x3F)
							).to_character_32)
							i := i + 3
						else
								-- Invalid UTF-8 sequence, we escape the first byte
								-- and try with the next one to see if it is the starting
								-- byte of a valid UTF-8 sequence.
							escape_code_into (a_result, c1)
						end
					else
							-- Invalid UTF-8 sequence.
						escape_code_into (a_result, c1)
					end

				else
						-- Clearly invalid UTF-8
					escape_code_into (a_result, c1)
				end
			end
		ensure
			instance_free: class
			roundtrip: escaped_utf_32_string_to_utf_8_string_8 (a_result.substring (old a_result.count + 1, a_result.count)).same_string (s)
		end

feature -- UTF-32 to UTF-16

	string_32_to_utf_16 (s: READABLE_STRING_32): SPECIAL [NATURAL_16]
			-- UTF-16 sequence corresponding to `s'.
			-- The sequence is not zero-terminated.
		do
			Result := utf_32_string_to_utf_16 (s)
		ensure
			instance_free: class
			roundtrip: attached utf_32_string_to_utf_16le_string_8 (s) as l_ref and then
				∀ n: Result ¦ n = (l_ref.code (@ n.target_index * 2 + 1) | (l_ref.code ((@ n.target_index + 1) * 2) |<< 16))
		end

	utf_32_string_to_utf_16 (s: READABLE_STRING_GENERAL): SPECIAL [NATURAL_16]
			-- UTF-16 sequence corresponding to `s' interpreted as a UTF-32 sequence.
			-- The sequence is not zero-terminated.
		do
			Result := utf_32_string_to_utf_16_0 (s)
			Result := Result.aliased_resized_area_with_default (0, Result.count - 1)
		ensure
			instance_free: class
			roundtrip: attached utf_32_string_to_utf_16le_string_8 (s) as l_ref and then
				∀ n: Result ¦ n = (l_ref.code (@ n.target_index * 2 + 1) | (l_ref.code ((@ n.target_index + 1) * 2) |<< 8))
		end

	string_32_to_utf_16_0 (s: READABLE_STRING_32): SPECIAL [NATURAL_16]
			-- UTF-16 sequence corresponding to `s' with terminating zero.
		do
			Result := utf_32_string_to_utf_16_0 (s)
		ensure
			instance_free: class
			roundtrip: attached utf_32_string_to_utf_16le_string_8 (s) as l_ref and then
				∀ n: Result.resized_area_with_default (0, Result.count - 1) ¦
					n = (l_ref.code (@ n.target_index * 2 + 1) | ((l_ref.code ((@ n.target_index + 1) * 2)) |<< 8))
		end

	utf_32_string_to_utf_16_0 (s: READABLE_STRING_GENERAL): SPECIAL [NATURAL_16]
			-- UTF-16 sequence corresponding to `s', interpreted as a UTF-32 sequence,
			-- with terminating zero.
		local
			i: like {STRING_32}.count
			n: like {STRING_32}.count
			m: like {STRING_32}.count
			p: like {STRING_32}.count
			c: NATURAL_32
		do
			from
				m := 0
				n := s.count
				p := n
				create Result.make_empty (p + 1)
			invariant
				m = Result.count
				p + 1 = Result.capacity
			until
				i >= n
			loop
				i := i + 1
					-- Make sure there is sufficient room for at least 2 code units.
				if p < m + 2 then
					p := m + (n - i) + 2
					Result := Result.aliased_resized_area (p + 1)
				end
				c := s.code (i)
				if c <= 0xFFFF then
						-- Codepoint from Basic Multilingual Plane: one 16-bit code unit.
					Result.extend (c.to_natural_16)
					m := m + 1
				else
						-- Supplementary Planes: surrogate pair with lead and trail surrogates.
					Result.extend ((0xD7C0 + (c |>> 10)).to_natural_16)
					Result.extend ((0xDC00 + (c & 0x3FF)).to_natural_16)
					m := m + 2
				end
			end
			Result.extend (0)
		ensure
			instance_free: class
			roundtrip: attached utf_32_string_to_utf_16le_string_8 (s) as l_ref and then
				∀ x: Result.resized_area_with_default (0, Result.count - 1) ¦
					x = (l_ref.code (@ x.target_index * 2 + 1) | ((l_ref.code ((@ x.target_index + 1) * 2)) |<< 8))
		end

	string_32_into_utf_16_pointer (s: READABLE_STRING_32; p: MANAGED_POINTER; p_offset: INTEGER; a_new_upper: detachable CELL [INTEGER])
			-- Write UTF-16 sequence corresponding to `s' to address `p + p_offset'
			-- and update the size of `p' to the number of written bytes.
			-- If `a_new_upper' is provided, the upper index of `p' containing the zero-termination
			-- is written to `a_new_upper'.
			-- The sequence is not zero-terminated.
		require
			even_p_offset: (p_offset \\ 2) = 0
			p_offset_non_negative: p_offset >= 0
		do
			utf_32_substring_into_utf_16_pointer (s, 1, s.count, p, p_offset, a_new_upper)
		ensure
			instance_free: class
			roundtrip: a_new_upper /= Void implies utf_16_0_subpointer_to_string_32 (p, p_offset // 2, (a_new_upper.item // 2) - 1, False).same_string (s)
			roundtrip: (a_new_upper = Void and then not s.has ('%U')) implies utf_16_0_subpointer_to_string_32 (p, p_offset // 2, (p.count // 2) - 1, True).same_string (s)
		end

	string_32_into_utf_16_0_pointer (s: READABLE_STRING_32; p: MANAGED_POINTER; p_offset: INTEGER; a_new_upper: detachable CELL [INTEGER])
			-- Write UTF-16 sequence corresponding to `s' with terminating zero
			-- to address `p + p_offset' and update the size of `p' to the number of written bytes.
			-- If `a_new_upper' is provided, the upper index of `p' containing the zero-termination
			-- is written to `a_new_upper'.
			-- The sequence is zero-terminated.
		require
			even_p_offset: (p_offset \\ 2) = 0
			p_offset_non_negative: p_offset >= 0
		do
			utf_32_substring_into_utf_16_0_pointer (s, 1, s.count, p, p_offset, a_new_upper)
		ensure
			instance_free: class
			roundtrip: a_new_upper /= Void implies utf_16_0_subpointer_to_string_32 (p, p_offset // 2, (a_new_upper.item // 2) - 1, False).same_string (s)
			roundtrip: (a_new_upper = Void and then not s.has ('%U')) implies utf_16_0_subpointer_to_string_32 (p, p_offset // 2, (p.count // 2) - 1, True).same_string (s)
		end

	utf_32_substring_into_utf_16_pointer
 		(s: READABLE_STRING_GENERAL;
		start_pos, end_pos: like {READABLE_STRING_32}.count;
		p: MANAGED_POINTER; p_offset: INTEGER; a_new_upper: detachable CELL [INTEGER])
			-- Write UTF-16 sequence corresponding to the substring of `s',
			-- interpreted as a UTF-32 sequence, starting at index `start_pos'
			-- and ending at index `end_pos' to address `p + p_offset' and update the
			-- size of `p' to the number of written bytes.
			-- If `a_new_upper' is provided, the upper index of `p' containing the zero-termination
			-- is written to `a_new_upper'.
			-- The sequence is not zero-terminated.
		require
			start_position_big_enough: start_pos >= 1
			end_position_big_enough: start_pos <= end_pos + 1
			end_pos_small_enough: end_pos <= s.count
			even_p_offset: (p_offset \\ 2) = 0
			p_offset_non_negative: p_offset >= 0
		local
			m: INTEGER
		do
			m := p.count
			utf_32_substring_into_utf_16_0_pointer (s, start_pos, end_pos, p, p_offset, a_new_upper)
			if m < p.count then
					-- Remove the null terminating character.
				p.resize (p.count - 2)
				if a_new_upper /= Void then
					a_new_upper.put (p.count - 2)
				end
			end
		ensure
			instance_free: class
			p_count_may_increase: p.count >= old p.count
			roundtrip: a_new_upper /= Void implies utf_16_0_subpointer_to_string_32 (p, p_offset // 2, (a_new_upper.item // 2) - 1, False).same_string_general (s)
			roundtrip: (a_new_upper = Void and then not s.has ('%U')) implies utf_16_0_subpointer_to_string_32 (p, p_offset // 2, (p.count // 2) - 1, True).same_string_general (s)
		end

	utf_32_substring_into_utf_16_0_pointer
 		(s: READABLE_STRING_GENERAL;
		start_pos, end_pos: like {READABLE_STRING_32}.count;
		p: MANAGED_POINTER; p_offset: INTEGER; a_new_upper: detachable CELL [INTEGER])
			-- Write UTF-16 sequence corresponding to the substring of `s',
			-- interpreted as a UTF-32 sequence, starting at index `start_pos'
			-- and ending at index `end_pos' to address `p + p_offset' and update the
			-- size of `p' to the number of written bytes.
			-- If `a_new_upper' is provided, the upper index of `p' containing the zero-termination
			-- is written to `a_new_upper'.
			-- The sequence is zero-terminated.
		require
			start_position_big_enough: start_pos >= 1
			end_position_big_enough: start_pos <= end_pos + 1
			end_pos_small_enough: end_pos <= s.count
			even_p_offset: (p_offset \\ 2) = 0
			p_offset_non_negative: p_offset >= 0
		local
			i: like {READABLE_STRING_GENERAL}.count
			c: NATURAL_32
			m, l_count: like {MANAGED_POINTER}.count
			l_resized: BOOLEAN
		do
				-- Write UTF-16 sequence.
			from
				i := end_pos - start_pos + 1
				l_count := p.count
					-- Check that there is at least `i * 2' bytes available plus the terminating null character.
				if l_count - p_offset < (i + 1) * 2 then
						-- Optimize resizing, once we have to resize, we actually perform the resizing
						-- only once.
					l_count := p_offset + utf_16_bytes_count (s, start_pos, end_pos) + 2
					p.resize (l_count)
					l_resized := True
				end
				i := start_pos - 1
				m := p_offset
			until
				i >= end_pos
			loop
				i := i + 1
				c := s.code (i)
				if c <= 0xFFFF then
						-- Codepoint from Basic Multilingual Plane: one 16-bit code unit.
					p.put_natural_16 (c.to_natural_16, m)
					m := m + 2
				else
						-- Make sure there is sufficient room for all the remaining characters and
						-- at least 3 code units of 2 bytes each, i.e. 2 code unit for the surrogate
						-- pair, and one unit for the terminating null character. Note that we do not
						-- take into account `p_offset' because `m' already includes it.
						-- Note that `end_pos - i' represents the number of remaining characters
						-- to process in the current string.
					if not l_resized and then (m + 6 + (end_pos - i) * 2 > l_count) then
							-- Optimize resizing, once we have to resize, we actually perform the resizing
							-- only once.
						l_count := m + utf_16_bytes_count (s, i, end_pos) + 2
						p.resize (l_count)
						l_resized := True
					end

						-- Supplementary Planes: surrogate pair with lead and trail surrogates.
					p.put_natural_16 ((0xD7C0 + (c |>> 10)).to_natural_16, m)
					p.put_natural_16 ((0xDC00 + (c & 0x3FF)).to_natural_16, m + 2)
					m := m + 4
				end
			end
				-- Adjust number of written bytes and add terminating zero at the end.
			if l_resized then
					-- We had to add a code unit on 4 bytes. We adjust the size.
				p.resize (m + 2)
			end
			p.put_natural_16 (0, m)
			if a_new_upper /= Void then
				a_new_upper.put (m)
			end
		ensure
			instance_free: class
			p_count_may_increase: p.count >= old p.count
			roundtrip: a_new_upper /= Void implies utf_16_0_subpointer_to_string_32 (p, p_offset // 2, (a_new_upper.item // 2) - 1, False).same_string_general (s)
			roundtrip: (a_new_upper = Void and then not s.has ('%U')) implies utf_16_0_subpointer_to_string_32 (p, p_offset // 2, (p.count // 2) - 1, True).same_string_general (s)
		end

	utf_32_string_to_utf_16le_string_8 (s: READABLE_STRING_GENERAL): STRING_8
			-- UTF-16LE sequence corresponding to `s' interpreted as a UTF-32 sequence
		do
				-- We would need at least 2-bytes per characters in `s'.
			create Result.make (s.count * 2)
			utf_32_string_into_utf_16le_string_8 (s, Result)
		ensure
			instance_free: class
			roundtrip: utf_16le_string_8_to_string_32 (Result).same_string_general (s)
		end

	utf_32_string_into_utf_16le_string_8 (s: READABLE_STRING_GENERAL; a_result: STRING_8)
			-- Copy UTF-16LE sequence corresponding to `s' interpreted as a UTF-32 sequence
			-- appended into `a_result'.
		local
			i: like {STRING_32}.count
			n: like {STRING_32}.count
			c: NATURAL_32
			l_nat16: NATURAL_16
		do
			from
				n := s.count
					-- We would need at least 2-bytes per characters in `s'.
				a_result.grow (a_result.count + n * 2)
			until
				i >= n
			loop
				i := i + 1
				c := s.code (i)
				if c <= 0xFFFF then
						-- Codepoint from Basic Multilingual Plane: one 16-bit code unit.
					a_result.extend ((c & 0x00FF).to_character_8)
					a_result.extend (((c & 0xFF00) |>> 8).to_character_8)
				else
						-- Write the lead surrogate pair.
					l_nat16 := (0xD7C0 + (c |>> 10)).to_natural_16
					a_result.extend ((l_nat16 & 0x00FF).to_character_8)
					a_result.extend (((l_nat16 & 0xFF00) |>> 8).to_character_8)

						-- Write the trail surrogate pair.
					l_nat16 := (0xDC00 + (c & 0x3FF)).to_natural_16
					a_result.extend ((l_nat16 & 0x00FF).to_character_8)
					a_result.extend (((l_nat16 & 0xFF00) |>> 8).to_character_8)
				end
			end
		ensure
			instance_free: class
			roundtrip: utf_16le_string_8_to_string_32 (a_result.substring (old a_result.count + 1, a_result.count)).same_string_general (s)
		end

	escaped_utf_32_substring_into_utf_16_0_pointer (
			s: READABLE_STRING_GENERAL; start_pos, end_pos: like {READABLE_STRING_32}.count;
			p: MANAGED_POINTER; p_offset: INTEGER; a_new_upper: detachable CELL [INTEGER]
		)
			-- Write UTF-16 sequence corresponding to the substring of `s',
			-- interpreted as a UTF-32 sequence, starting at index `start_pos'
			-- and ending at index `end_pos' to address `p + p_offset' and update the
			-- size of `p' to the number of written bytes.
			-- If `a_new_upper' is provided, the upper index of `p' containing the zero-termination
			-- is written to `a_new_upper'.
			-- The sequence is not zero-terminated.
		require
			start_position_big_enough: start_pos >= 1
			end_position_big_enough: start_pos <= end_pos + 1
			end_pos_small_enough: end_pos <= s.count
			even_p_offset: (p_offset \\ 2) = 0
			p_offset_non_negative: p_offset >= 0
		local
			i, n, m, l_count: INTEGER
			c: NATURAL_32
			l_encoded_value: READABLE_STRING_GENERAL
			l_decoded: BOOLEAN
			l_resized: BOOLEAN
		do
			from
				n := end_pos - start_pos + 1
				l_count := p.count
					-- Check that there is at least `i * 2' bytes available plus the terminating null character.
				if l_count - p_offset < (n + 1) * 2 then
						-- Optimize resizing, once we have to resize, we actually perform the resizing
						-- only once.
					l_count := p_offset + utf_16_bytes_count (s, start_pos, end_pos) + 2
					p.resize (l_count)
					l_resized := True
				end
				i := start_pos - 1
				m := p_offset
			until
				i >= end_pos
			loop
				i := i + 1
				c := s.code (i)
				if c = escape_character.natural_32_code then
						-- We might be facing a character that was escaped.
					if i < n then
						if s.item (i + 1) = escape_character then
								-- The `escape_character' was escaped, it meant they really wanted an `escape_character'.
							i := i + 1
						elseif s.item (i + 1) = 'u' then
							if i + 4 < n then
								l_encoded_value := s.substring (i + 2, i + 5)
								if is_hexa_decimal (l_encoded_value) then
									c := to_natural_32 (l_encoded_value)
									if c < 0xD800 or c > 0xDFFF then
											-- Value was encoded when it should not have been
											-- do nothing, we leave the original content as is.
										c := escape_character.natural_32_code
									else
										l_decoded := True
										i := i + 5
									end
								else
										-- Not an hexadecimal value, it was not escaped.
								end
							else
									-- Not enough characters to make a 2-byte value, it was not escaped.
							end
						else
								-- Value was most likely not encoded, because if it did, it would be the
								-- hexadecimal representation of a byte which clearly did not need to
								-- be escaped
						end
					else
							-- Nothing more to read, clearly it was not encoded.
					end
				end

				if not l_decoded then
					if c <= 0xFFFF then
							-- Codepoint from Basic Multilingual Plane: one 16-bit code unit.
						p.put_natural_16 (c.to_natural_16, m)
						m := m + 2
					else
							-- Make sure there is sufficient room for all the remaining characters and
							-- at least 3 code units of 2 bytes each, i.e. 2 code unit for the surrogate
							-- pair, and one unit for the terminating null character. Note that we do not
							-- take into account `p_offset' because `m' already includes it.
							-- Note that `end_pos - i' represents the number of remaining characters
							-- to process in the current string.
						if not l_resized and then (m + 6 + (end_pos - i) * 2 > l_count) then
								-- Optimize resizing, once we have to resize, we actually perform the resizing
								-- only once.
							l_count := m + utf_16_bytes_count (s, i, end_pos) + 2
							p.resize (l_count)
							l_resized := True
						end

							-- Write the lead surrogate pair.
						p.put_natural_16 ((0xD7C0 + (c |>> 10)).to_natural_16, m)

							-- Write the trail surrogate pair.
						p.put_natural_16 ((0xDC00 + (c & 0x3FF)).to_natural_16, m + 2)

						m := m + 4
					end
				else
					l_decoded := False
						-- Simply put decoded value directly in stream.
					p.put_natural_16 (c.to_natural_16, m)
					m := m + 2
				end
			end
			if l_resized then
					-- We had to add a code unit on 4 bytes. We adjust the size.
				p.resize (m + 2)
			end
			p.put_natural_16 (0, m)
			if a_new_upper /= Void then
				a_new_upper.put (m)
			end
		ensure
			instance_free: class
			p_count_may_increase: p.count >= old p.count
			roundtrip: a_new_upper /= Void implies utf_16_0_subpointer_to_escaped_string_32 (p, p_offset // 2, (a_new_upper.item // 2) - 1, False).same_string_general (s.substring (start_pos, end_pos))
			roundtrip: (a_new_upper = Void and then not s.substring (start_pos, end_pos).has ('%U')) implies
				utf_16_0_subpointer_to_escaped_string_32 (p, p_offset // 2, (p.count // 2) - 1, True).same_string_general (s.substring (start_pos, end_pos))
		end

	escaped_utf_32_string_to_utf_16le_string_8 (s: READABLE_STRING_GENERAL): STRING_8
			-- UTF-16LE sequence corresponding to `s' interpreted as a UTF-32 sequence that could be escaped.
			-- If `s' contains the `escape_character' followed by either "HH" or "uHHHH" where H stands
			-- for an hexadecimal digit, then `s' has been escaped and will be converted to what is
			-- expected by the current platform.
			-- Otherwise it will be ignored and it will be left as is.
			-- See the note clause for the class for more details on the encoding.
		do
				-- We would need at least 2-bytes per characters in `s'.
			create Result.make (s.count * 2)
			escaped_utf_32_string_into_utf_16le_string_8 (s, Result)
		ensure
			instance_free: class
			roundtrip: utf_16le_string_8_to_escaped_string_32 (Result).same_string_general (s)
		end

	escaped_utf_32_string_into_utf_16le_string_8 (s: READABLE_STRING_GENERAL; a_result: STRING_8)
			-- Copy UTF-16LE sequence corresponding to `s' interpreted as a UTF-32 sequence that could be
			-- escaped appended into `a_result'.
			-- If `s' contains the `escape_character' followed by either "HH" or "uHHHH" where H stands
			-- for an hexadecimal digit, then `s' has been escaped and will be converted to what is
			-- expected by the current platform.
			-- Otherwise it will be ignored and it will be left as is.
			-- See the note clause for the class for more details on the encoding.
		local
			i: like {STRING_32}.count
			n: like {STRING_32}.count
			c: NATURAL_32
			l_nat16: NATURAL_16
			l_encoded_value: READABLE_STRING_GENERAL
			l_decoded: BOOLEAN
		do
			from
				n := s.count
					-- We would need at least 2-bytes per characters in `s'.
				a_result.grow (a_result.count + n * 2)
			until
				i >= n
			loop
				i := i + 1
				c := s.code (i)
				if c = escape_character.natural_32_code then
						-- We might be facing a character that was escaped.
					if i < n then
						if s.item (i + 1) = escape_character then
								-- The `escape_character' was escaped, it meant they really wanted an `escape_character'.
							i := i + 1
						elseif s.item (i + 1) = 'u' then
							if i + 4 < n then
								l_encoded_value := s.substring (i + 2, i + 5)
								if is_hexa_decimal (l_encoded_value) then
									c := to_natural_32 (l_encoded_value)
									if c < 0xD800 or c > 0xDFFF then
											-- Value was encoded when it should not have been
											-- do nothing, we leave the original content as is.
										c := escape_character.natural_32_code
									else
										l_decoded := True
										i := i + 5
									end
								else
										-- Not an hexadecimal value, it was not escaped.
								end
							else
									-- Not enough characters to make a 2-byte value, it was not escaped.
							end
						else
								-- Value was most likely not encoded, because if it did, it would be the
								-- hexadecimal representation of a byte which clearly did not need to
								-- be escaped
						end
					else
							-- Nothing more to read, clearly it was not encoded.
					end
				end

				if not l_decoded then
					if c <= 0xFFFF then
							-- Codepoint from Basic Multilingual Plane: one 16-bit code unit.
						a_result.extend ((c & 0x00FF).to_character_8)
						a_result.extend (((c & 0xFF00) |>> 8).to_character_8)
					else
							-- Write the lead surrogate pair.
						l_nat16 := (0xD7C0 + (c |>> 10)).to_natural_16
						a_result.extend ((l_nat16 & 0x00FF).to_character_8)
						a_result.extend (((l_nat16 & 0xFF00) |>> 8).to_character_8)

							-- Write the trail surrogate pair.
						l_nat16 := (0xDC00 + (c & 0x3FF)).to_natural_16
						a_result.extend ((l_nat16 & 0x00FF).to_character_8)
						a_result.extend (((l_nat16 & 0xFF00) |>> 8).to_character_8)
					end
				else
					l_decoded := False
						-- Simply put decoded value directly in stream.
					a_result.extend ((c & 0x00FF).to_character_8)
					a_result.extend (((c & 0xFF00) |>> 8).to_character_8)
				end
			end
		ensure
			instance_free: class
			roundtrip: utf_16le_string_8_to_escaped_string_32 (a_result.substring (old a_result.count + 1, a_result.count)).same_string_general (s)
		end

feature -- UTF-16 to UTF-32

	utf_16_0_pointer_to_string_32 (p: MANAGED_POINTER): STRING_32
			-- {STRING_32} object corresponding to UTF-16 sequence `p' which is zero-terminated.
		require
			minimum_size: p.count >= 2
			valid_count: p.count \\ 2 = 0
		do
				-- Allocate Result with the same number of bytes as `p'.
			create Result.make (p.count)
			utf_16_0_pointer_into_string_32 (p, Result)
		ensure
			instance_free: class
			roundtrip: is_valid_utf_16_subpointer (p, 0, p.count // 2, True) implies
				∀ n: string_32_to_utf_16 (Result) ¦ n = p.read_natural_16 ((@ n.target_index + 1) * 2)
		end

	utf_16_0_pointer_into_string_32 (p: MANAGED_POINTER; a_result: STRING_32)
			-- Copy {STRING_32} object corresponding to UTF-16 sequence `p' which is zero-terminated
			-- appended into `a_result'.
		require
			minimum_size: p.count >= 2
			valid_count: p.count \\ 2 = 0
		do
			utf_16_0_subpointer_into_string_32 (p, 0, p.count // 2 - 1, True, a_result)
		ensure
			instance_free: class
			roundtrip: is_valid_utf_16_subpointer (p, 0, p.count // 2, True) implies
				∀ n: string_32_to_utf_16 (a_result.substring (old a_result.count + 1, a_result.count)) ¦ n = p.read_natural_16 (@ n.target_index * 2)
		end

	utf_16_0_subpointer_to_string_32 (p: MANAGED_POINTER; start_pos, end_pos: INTEGER; a_stop_at_null: BOOLEAN): STRING_32
			-- {STRING_32} object corresponding to UTF-16 sequence `p' between code units `start_pos' and
			-- `end_pos' or the first null character encountered if `a_stop_at_null'.
		require
			minimum_size: p.count >= 2
			start_position_big_enough: start_pos >= 0
			end_position_big_enough: start_pos <= end_pos + 1
			end_pos_small_enough: end_pos < p.count // 2
		do
			create Result.make (p.count)
			utf_16_0_subpointer_into_string_32 (p, start_pos, end_pos, a_stop_at_null, Result)
		ensure
			instance_free: class
			roundtrip: is_valid_utf_16_subpointer (p, start_pos, end_pos, a_stop_at_null) implies
				∀ n: string_32_to_utf_16 (Result) ¦ n = p.read_natural_16 (@ n.target_index * 2)
		end

	utf_16_0_subpointer_into_string_32 (p: MANAGED_POINTER; start_pos, end_pos: INTEGER; a_stop_at_null: BOOLEAN; a_result: STRING_32)
			-- Copy {STRING_32} object corresponding to UTF-16 sequence `p' between code units `start_pos' and
			-- `end_pos' or the first null character encountered if `a_stop_at_null' appended into `a_result'.
		require
			minimum_size: p.count >= 2
			start_position_big_enough: start_pos >= 0
			end_position_big_enough: start_pos <= end_pos + 1
			end_pos_small_enough: end_pos < p.count // 2
		local
			i, n: INTEGER
			c: NATURAL_32
		do
			from
					-- Allocate Result with the same number of bytes as copied from `p'.
				a_result.grow (a_result.count + end_pos - start_pos + 1)
				i := start_pos * 2
				n := end_pos * 2
			until
				i > n
			loop
				c := p.read_natural_16 (i)
				if c = 0 and a_stop_at_null then
						-- We hit our null terminating character, we can stop
					i := n + 1
				else
					i := i + 2
					if c < 0xD800 or c >= 0xE000 then
							-- Codepoint from Basic Multilingual Plane: one 16-bit code unit.
						a_result.extend (c.to_character_32)
					else
							-- Supplementary Planes: surrogate pair with lead and trail surrogates.
						if i <= n then
							a_result.extend (((c.as_natural_32 |<< 10) + p.read_natural_16 (i) - 0x35FDC00).to_character_32)
							i := i + 2
						end
					end
				end
			end
		ensure
			instance_free: class
			roundtrip: is_valid_utf_16_subpointer (p, start_pos, end_pos, a_stop_at_null) implies
				∀ x: string_32_to_utf_16 (a_result.substring (old a_result.count + 1, a_result.count)) ¦ x = p.read_natural_16 (@ x.target_index * 2)
		end

	utf_16_0_pointer_to_escaped_string_32 (p: MANAGED_POINTER): STRING_32
			-- {STRING_32} object corresponding to UTF-16 sequence `p' which is zero-terminated,
			-- where invalid UTF-16LE sequences are escaped.
		require
			minimum_size: p.count >= 2
			valid_count: p.count \\ 2 = 0
		do
				-- Allocate Result with the same number of bytes as `p'.
			create Result.make (p.count)
			utf_16_0_pointer_into_escaped_string_32 (p, Result)
		ensure
			instance_free: class
			roundtrip: attached escaped_utf_32_string_to_utf_16le_string_8 (Result) as l_utf and then
				∀ c: l_utf.new_cursor.incremented (1) ¦
					(c.natural_32_code | (l_utf.code (@ c.target_index + 1) |<< 8)) = p.read_natural_16 (@ c.target_index - 1)
		end

	utf_16_0_pointer_into_escaped_string_32 (p: MANAGED_POINTER; a_result: STRING_32)
			-- Copy {STRING_32} object corresponding to UTF-16 sequence `p' which is zero-terminated,
			-- where invalid UTF-16LE sequences are escaped, appended into `a_result'.
		require
			minimum_size: p.count >= 2
			valid_count: p.count \\ 2 = 0
		do
			utf_16_0_subpointer_into_escaped_string_32 (p, 0, p.count // 2 - 1, True, a_result)
		ensure
			instance_free: class
			roundtrip: attached escaped_utf_32_string_to_utf_16le_string_8 (a_result.substring (old a_result.count + 1, a_result.count)) as l_utf and then
				∀ c: l_utf.new_cursor.incremented (1) ¦
					(c.natural_32_code | (l_utf.code (@ c.target_index + 1) |<< 8)) = p.read_natural_16 (@ c.target_index - 1)
		end

	utf_16_0_subpointer_to_escaped_string_32 (p: MANAGED_POINTER; start_pos, end_pos: INTEGER; a_stop_at_null: BOOLEAN): STRING_32
			-- {STRING_32} object corresponding to UTF-16 sequence `p' between code units `start_pos' and
			-- `end_pos' or the first null character encountered if `a_stop_at_null', where invalid
			-- UTF-16LE sequences are escaped.
		require
			minimum_size: p.count >= 2
			start_position_big_enough: start_pos >= 0
			end_position_big_enough: start_pos <= end_pos + 1
			end_pos_small_enough: end_pos < p.count // 2
		do
			create Result.make (end_pos - start_pos + 1)
			utf_16_0_subpointer_into_escaped_string_32 (p, start_pos, end_pos, a_stop_at_null, Result)
		ensure
			instance_free: class
			roundtrip: attached escaped_utf_32_string_to_utf_16le_string_8 (Result) as l_utf and then
				∀ c: l_utf.new_cursor.incremented (1) ¦
					(c.natural_32_code | (l_utf.code (@ c.target_index + 1) |<< 8)) = p.read_natural_16 (start_pos * 2 + @ c.target_index - 1)
		end

	utf_16_0_subpointer_into_escaped_string_32 (p: MANAGED_POINTER; start_pos, end_pos: INTEGER; a_stop_at_null: BOOLEAN; a_result: STRING_32)
			-- Copy {STRING_32} object corresponding to UTF-16 sequence `p' between code units `start_pos' and
			-- `end_pos' or the first null character encountered if `a_stop_at_null', where invalid
			-- UTF-16LE sequences are escaped, appended into `a_result'.
		require
			minimum_size: p.count >= 2
			start_position_big_enough: start_pos >= 0
			end_position_big_enough: start_pos <= end_pos + 1
			end_pos_small_enough: end_pos < p.count // 2
		local
			i, n: INTEGER
			c1, c2: NATURAL_32
		do
			from
					-- Allocate Result with the same number of bytes as copied from `p'.
				a_result.grow (a_result.count + end_pos - start_pos + 1)
				i := start_pos * 2
				n := end_pos * 2
			until
				i > n
			loop
				c1 := p.read_natural_16 (i)
				if c1 = 0 and a_stop_at_null then
						-- We hit our null terminating character, we can stop
					i := n + 1
				else
					i := i + 2
					if c1 < 0xD800 or c1 >= 0xE000 then
							-- Codepoint from Basic Multilingual Plane: one 16-bit code unit.
						a_result.extend (c1.to_character_32)
					elseif c1 <= 0xDBFF and then i <= n then
							-- Check if a lead surrogate (value between 0xD800 and 0xDBFF) is followed by a trail surrogate.
						c2 := p.read_natural_16 (i)
						if c2 >= 0xDC00 and c2 <= 0xDFFF then
								-- Supplementary Planes: surrogate pair with lead and trail surrogates.
							a_result.extend (((c1 |<< 10) + c2 - 0x35FDC00).to_character_32)
							i := i + 2
						else
								-- Escape a lead surrogate not followed by a trail one.
							escape_code_into (a_result, c1.as_natural_16)
						end
					else
							-- Escape a trail surrogate not following a lead one or
							-- a lead surrogate not followed by a trail one.
						escape_code_into (a_result, c1.as_natural_16)
					end
				end
			end
		ensure
			instance_free: class
			roundtrip: attached escaped_utf_32_string_to_utf_16le_string_8 (a_result.substring (old a_result.count + 1, a_result.count)) as l_utf and then
				∀ c: l_utf.new_cursor.incremented (1) ¦
					(c.natural_32_code | (l_utf.code (@ c.target_index + 1) |<< 8)) = p.read_natural_16 (start_pos * 2 + @ c.target_index - 1)
		end

	utf_16_to_string_32 (s: SPECIAL [NATURAL_16]): STRING_32
			-- {STRING_32} object corresponding to UTF-16 sequence `s'.
		do
			create Result.make (s.count)
			utf_16_into_string_32 (s, Result)
		ensure
			instance_free: class
			roundtrip: is_valid_utf_16 (s) implies string_32_to_utf_16 (Result).is_equal (s)
		end

	utf_16_into_string_32 (s: SPECIAL [NATURAL_16]; a_result: STRING_32)
			-- Copy {STRING_32} object corresponding to UTF-16 sequence `s'
			-- appended into `a_result'.
		local
			i: like {SPECIAL [NATURAL_16]}.count
			n: like {SPECIAL [NATURAL_16]}.count
			c: NATURAL_32
		do
			from
				n := s.count
				a_result.grow (a_result.count + n)
			until
				i >= n
			loop
				c := s [i]
				i := i + 1
				if c < 0xD800 or c >= 0xE000 then
						-- Codepoint from Basic Multilingual Plane: one 16-bit code unit.
					a_result.extend (c.to_character_32)
				else
						-- Supplementary Planes: surrogate pair with lead and trail surrogates.
					if i < n then
						a_result.extend (((c |<< 10) + s [i] - 0x35FDC00).to_character_32)
						i := i + 1
					end
				end
			end
		ensure
			instance_free: class
			roundtrip: is_valid_utf_16 (s) implies string_32_to_utf_16 (a_result.substring (old a_result.count + 1, a_result.count)).is_equal (s)
		end

	utf_16le_string_8_to_string_32 (s: READABLE_STRING_8): STRING_32
			-- {STRING_32} object corresponding to UTF-16LE sequence `s'.
		do
				-- There is at least half the characters of `s'.
			create Result.make (s.count |>> 1)
			utf_16le_string_8_into_string_32 (s, Result)
		ensure
			instance_free: class
			roundtrip: is_valid_utf_16le_string_8 (s) implies escaped_utf_32_string_to_utf_16le_string_8 (Result).same_string (s)
		end

	utf_16le_string_8_into_string_32 (s: READABLE_STRING_8; a_result: STRING_32)
			-- Copy {STRING_32} object corresponding to UTF-16LE sequence `s' appended into `a_result'.
		local
			i, nb: INTEGER
			c1, c2: NATURAL_32
		do
			from
				nb := s.count
					-- There is at least half the characters of `s'.
				a_result.grow (a_result.count + (nb |>> 1))
			until
				i + 1 >= nb
			loop
				i := i + 2
					-- Extract the first 2-bytes
				c1 := s.code (i - 1) | (s.code (i) |<< 8)
				if c1 < 0xD800 or c1 >= 0xE000 then
						-- Codepoint from Basic Multilingual Plane: one 16-bit code unit, this is valid Unicode.
					a_result.extend (c1.to_character_32)
				else
					i := i + 2
					if i <= nb then
						c2 := s.code (i - 1) | (s.code (i) |<< 8)
						a_result.extend (((c1 |<< 10) + c2 - 0x35FDC00).to_character_32)
					end
				end
			end
		ensure
			instance_free: class
			roundtrip: is_valid_utf_16le_string_8 (s) implies escaped_utf_32_string_to_utf_16le_string_8 (a_result.substring (old a_result.count + 1, a_result.count)).same_string (s)
		end

	utf_16le_string_8_to_escaped_string_32 (s: READABLE_STRING_8): STRING_32
			-- {STRING_32} object corresponding to UTF-16LE sequence `s', where invalid UTF-16LE
			-- sequences are escaped.
		do
				-- There is at least half the characters of `s'.
			create Result.make (s.count |>> 1)
			utf_16le_string_8_into_escaped_string_32 (s, Result)
		ensure
			instance_free: class
			roundtrip: escaped_utf_32_string_to_utf_16le_string_8 (Result).same_string (s)
		end

	utf_16le_string_8_into_escaped_string_32 (s: READABLE_STRING_8; a_result: STRING_32)
			-- Copy {STRING_32} object corresponding to UTF-16LE sequence `s', where invalid UTF-16LE
			-- sequences are escaped, appended into `a_result'.
		local
			i, nb: INTEGER
			c1, c2: NATURAL_32
		do
			from
				nb := s.count
					-- There is at least half the characters of `s'.
				a_result.grow (a_result.count + (nb |>> 1))
			until
				i + 1 >= nb
			loop
				i := i + 2
					-- Extract the first 2-bytes
				c1 := s.code (i - 1) | (s.code (i) |<< 8)
				if c1 < 0xD800 or c1 >= 0xE000 then
						-- Codepoint from Basic Multilingual Plane: one 16-bit code unit.
					a_result.extend (c1.to_character_32)
				elseif c1 <= 0xDBFF and i + 2 <= nb then
						-- Check if a lead surrogate is followed by a trail surrogate.
					c2 := s.code (i + 1) | (s.code (i + 2) |<< 8)
					if c2 >= 0xDC00 and c2 <= 0xDFFF then
							-- Supplementary Planes: surrogate pair with lead and trail surrogates.
						a_result.extend (((c1 |<< 10) + c2 - 0x35FDC00).to_character_32)
						i := i + 2
					else
							-- Escape a lead surrogate not followed by a trail one.
						escape_code_into (a_result, c1.as_natural_16)
					end
				else
						-- Escape a trail surrogate not following a lead one or
						-- a lead surrogate not followed by a trail one.
					escape_code_into (a_result, c1.as_natural_16)
				end
			end
		ensure
			instance_free: class
			roundtrip: escaped_utf_32_string_to_utf_16le_string_8 (a_result.substring (old a_result.count + 1, a_result.count)).same_string (s)
		end

feature -- UTF-16 to UTF-8

	utf_16_to_utf_8_string_8 (s: SPECIAL [NATURAL_16]): STRING_8
			-- UTF-8 sequence corresponding to UTF-16 sequence `s'.
		do
			debug ("to_implement")
				(create {REFACTORING_HELPER}).to_implement ("Convert directly from UTF-16 to UTF-8.")
			end
			Result := string_32_to_utf_8_string_8 (utf_16_to_string_32 (s))
		ensure
			instance_free: class
			roundtrip: is_valid_utf_16 (s) implies string_32_to_utf_16 (utf_8_string_8_to_string_32 (Result)).is_equal (s)
		end

	utf_16_into_utf_8_string_8 (s: SPECIAL [NATURAL_16]; a_result: STRING_8)
			-- Copy UTF-8 sequence corresponding to UTF-16 sequence `s' appended into `a_result'.
		do
			debug ("to_implement")
				(create {REFACTORING_HELPER}).to_implement ("Convert directly from UTF-16 to UTF-8.")
			end
			string_32_into_utf_8_string_8 (utf_16_to_string_32 (s), a_result)
		ensure
			instance_free: class
			roundtrip: is_valid_utf_16 (s) implies string_32_to_utf_16 (utf_8_string_8_to_string_32 (a_result.substring (old a_result.count + 1, a_result.count))).is_equal (s)
		end

	utf_16le_string_8_to_utf_8_string_8 (s: READABLE_STRING_8): STRING_8
			-- UTF-8 sequence corresponding to UTF-16LE sequence `s'.
		do
			create Result.make (s.count)
			utf_16le_string_8_into_utf_8_string_8 (s, Result)
		ensure
			instance_free: class
			roundtrip: is_valid_utf_16le_string_8 (s) implies utf_32_string_to_utf_16le_string_8 (utf_8_string_8_to_string_32 (Result)).same_string (s)
		end

	utf_16le_string_8_into_utf_8_string_8 (s: READABLE_STRING_8; a_result: STRING_8)
			-- Copy UTF-8 sequence corresponding to UTF-16LE sequence `s' appended into `a_result'.
		require
			even_count: (s.count & 1) = 0
		local
			v: SPECIAL [NATURAL_16]
			i: like {STRING_8}.count
			n: like {STRING_8}.count
		do
			from
				n := s.count
				create v.make_empty (n |>> 1)
			until
				i >= n
			loop
				i := i + 2
				check
					valid_index: 1 <= i - 1 and i <= s.count
				end
				v.extend (s [i - 1].code.as_natural_16 | (s [i].code.as_natural_16 |<< 8))
			end
			utf_16_into_utf_8_string_8 (v, a_result)
		ensure
			instance_free: class
			roundtrip: is_valid_utf_16le_string_8 (s) implies utf_32_string_to_utf_16le_string_8 (utf_8_string_8_to_string_32 (a_result.substring (old a_result.count + 1, a_result.count))).same_string (s)
		end

feature -- UTF-8 to UTF-16

	utf_8_string_8_to_utf_16 (s: READABLE_STRING_8): SPECIAL [NATURAL_16]
			-- UTF-16 sequence corresponding to UTF-8 sequence `s'.
		do
			debug ("to_implement")
				(create {REFACTORING_HELPER}).to_implement ("Convert directly from UTF-8 to UTF-16.")
			end
			Result := string_32_to_utf_16 (utf_8_string_8_to_string_32 (s))
		ensure
			instance_free: class
			roundtrip: is_valid_utf_8_string_8 (s) implies utf_16_to_utf_8_string_8 (Result).same_string (s)
		end

	utf_8_string_8_to_utf_16_0 (s: READABLE_STRING_8): SPECIAL [NATURAL_16]
			-- UTF-16 sequence corresponding to UTF-8 sequence `s' with terminating zero.
		do
			Result := utf_8_string_8_to_utf_16 (s)
			Result := Result.aliased_resized_area_with_default (0, Result.count + 1)
		ensure
			instance_free: class
			roundtrip: is_valid_utf_8_string_8 (s) implies utf_16_to_utf_8_string_8 (Result).same_string (s)
		end

feature -- Byte Order Mark (BOM)

	utf_8_bom_to_string_8: STRING_8 = "%/239/%/187/%/191/"
			-- UTF-8 BOM sequence.

	utf_16be_bom_to_string_8: STRING_8 = "%/254/%/255/"
			-- UTF-16BE BOM sequence.

	utf_16le_bom_to_string_8: STRING_8 = "%/255/%/254/"
			-- UTF-16LE BOM sequence.

	utf_32be_bom_to_string_8: STRING_8 = "%U%U%/254/%/255/"
			-- UTF-32BE BOM sequence.

	utf_32le_bom_to_string_8: STRING_8 = "%/255/%/254/%U%U"
			-- UTF-32LE BOM sequence.

feature {NONE} -- Implementation

	escape_code_into (a_string: STRING_32; a_code: NATURAL_16)
			-- Escape `a_code' as documented in the note clause of the class into `a_string'.
			-- If `a_code' fits into a NATURAL_8, it will be just the `escape_character' followed
			-- by the 2-digit hexadecimal representation, otherwise `escape_character' followed
			-- by the letter `u' followed by the 4-digit hexadecimal representation.
		do
			a_string.append_character (escape_character)
			if a_code <= {NATURAL_8}.max_value then
				a_string.append_string_general (a_code.as_natural_8.to_hex_string)
			else
				a_string.append_character ('u')
				a_string.append_string_general (a_code.to_hex_string)
			end
		ensure
			instance_free: class
		end

	is_hexa_decimal (a_string: READABLE_STRING_GENERAL): BOOLEAN
			-- Is `a_string' a valid hexadecimal sequence?
		local
			l_convertor: like ctoi_convertor
		do
			l_convertor := ctoi_convertor
			l_convertor.reset ({NUMERIC_INFORMATION}.type_natural_32)
			l_convertor.parse_string_with_type (a_string, {NUMERIC_INFORMATION}.type_natural_32)
			Result := l_convertor.is_integral_integer
		ensure
			instance_free: class
		end

	to_natural_32 (a_hex_string: READABLE_STRING_GENERAL): NATURAL_32
			-- Convert hexadecimal value `a_hex_string' to its corresponding NATURAL_32 value.
		require
			is_hexa: is_hexa_decimal (a_hex_string)
		local
			l_convertor: like ctoi_convertor
		do
			l_convertor := ctoi_convertor
			l_convertor.parse_string_with_type (a_hex_string, {NUMERIC_INFORMATION}.type_no_limitation)
			Result := l_convertor.parsed_natural_32
		ensure
			instance_free: class
		end

	ctoi_convertor: HEXADECIMAL_STRING_TO_INTEGER_CONVERTER
			-- Convertor used to convert string to integer or natural
		once
			create Result.make
			Result.set_leading_separators_acceptable (False)
			Result.set_trailing_separators_acceptable (False)
		ensure
			instance_free: class
			ctoi_convertor_not_void: Result /= Void
		end

note
	ca_ignore: "CA011", "CA011: too many arguments"
	copyright: "Copyright (c) 1984-2021, Eiffel Software and others"
	license: "Eiffel Forum License v2 (see http://www.eiffel.com/licensing/forum.txt)"
	source: "[
			Eiffel Software
			5949 Hollister Ave., Goleta, CA 93117 USA
			Telephone 805-685-1006, Fax 805-685-6869
			Website http://www.eiffel.com
			Customer support http://support.eiffel.com
		]"

end