note description: "Scanner class for processing a JSON encoded string. Does not handle JSON `null' values" author: "B. Herlig, B. Schoeller" date: "$Date$" revision: "$Revision$" class A_JSON_TOKENIZER create make feature -- Initialization make (an_input: STRING) -- Creates the tokenizer on a JSON encoded `an_input' string. require input_specified: an_input /= Void do input := an_input char_pos := 1 has_errors := False error_position := 0 set_current_char ensure input_set: input /= Void and then input = an_input char_pos_initialized: char_pos = 1 error_flag_initialized: has_errors = False error_position_initialized: error_position = 0 end feature -- Implementation (Parser) expect_token (a_token: INTEGER) -- Expect `a_token' as current token, otherwise report an error. do if last_token /= a_token then record_error end read_token ensure wrong_expected_token_is_error: (old last_token /= a_token) implies has_errors end feature -- Access last_token: INTEGER -- Last token read has_errors: BOOLEAN -- Indicates if the last parsing did have errors error_position: INTEGER -- Position of the first error encountered -- Last read & decoded primitive values last_double: DOUBLE last_integer: INTEGER_64 last_string: STRING last_boolean: BOOLEAN last_identifier: STRING -- Last keyword read. As "null" is not handeld, -- this will only denotes boolean keywords feature -- Errorhandling record_error -- Record an error in parsing. do if not has_errors then has_errors := True error_position := char_pos end ensure error_set: has_errors error_position_updated_on_first_error: old has_errors xor (error_position = char_pos) end feature -- Implementation (Scanner: Character features) input: STRING -- Input operating on char_pos: INTEGER -- Current reading cursor position (characters) current_char: CHARACTER -- Current character next_char -- Move the cursor to the next char. do char_pos := char_pos + 1 set_current_char ensure moved_right: char_pos = old char_pos + 1 end read_token -- Read a token, indicate it's type in `last_token' do skip_white_space if current_char = '%U' then last_token := token_eof next_char elseif current_char = '{' then last_token := token_open_curly_brace next_char elseif current_char = '}' then last_token := token_closed_curly_brace next_char elseif current_char = '[' then last_token := token_open_bracket next_char elseif current_char = ']' then last_token := token_closed_bracket next_char elseif current_char = ',' then last_token := token_comma next_char elseif current_char = ':' then last_token := token_colon next_char elseif current_char = '.' then last_token := token_dot next_char elseif current_char = '"' then read_string elseif current_char.is_alpha then read_identifier elseif current_char.is_digit or current_char = '-' then read_number else last_token := token_unknown record_error next_char end end skip_white_space -- Skip all white-space. do from until char_pos > input.count or else not current_char.is_space loop next_char end ensure at_EOF_or_not_on_whitespace: char_pos > input.count or else not current_char.is_space end read_identifier -- Read an identifier. do last_token := token_identifier last_identifier := "" from until not (current_char.is_alpha) or char_pos > input.count loop last_identifier.extend (current_char) next_char end if last_identifier.is_equal ("true") then last_boolean := True elseif last_identifier.is_equal ("false") then last_boolean := False else -- Unknown identifier read record_error end end read_number -- Read a number (integer or double) local start_pos: INTEGER is_double: BOOLEAN do start_pos := char_pos is_double := False if current_char = '-' then next_char end from until (not current_char.is_digit) and (current_char /= '.') and (current_char /= 'e') and (current_char /= 'E') and (current_char /= '-') and (current_char /= '+') or (char_pos > input.count) loop if current_char = '.' or current_char = 'e' or current_char = 'E' then is_double := True end next_char end if is_double then last_token := token_double last_double := input.substring (start_pos, char_pos - 1).to_double else last_token := token_integer last_integer := input.substring (start_pos, char_pos - 1).to_integer_64 end end read_string -- Read a string and make it available in `last_string' local code_point: STRING code: INTEGER utf8_str: UC_UTF8_STRING do last_token := token_string last_string := "" -- skip opening quote-symbol next_char from until (current_char = '%"') or has_errors or (char_pos > input.count) loop if current_char = '\' then next_char if current_char = '%"' then last_string.extend ('%"') elseif current_char = '\' then -- Single backslashes do not need to be escaped in Eiffel last_string.extend ('\') elseif current_char = '/' then -- Neither do forward slashes last_string.extend ('/') elseif current_char = 'b' then last_string.extend ('%B') elseif current_char = 'f' then last_string.extend ('%F') elseif current_char = 'n' then last_string.extend ('%N') elseif current_char = 'r' then last_string.extend ('%R') elseif current_char = 't' then last_string.extend ('%T') elseif current_char = 'u' then -- Unicode: -- Read the next four hex-characters and create a unicode character from this code. create code_point.make (4) next_char code_point.append_character (current_char) next_char code_point.append_character (current_char) next_char code_point.append_character (current_char) next_char code_point.append_character (current_char) -- Check if it's a valid hex-code if string_routines.is_hexadecimal (code_point) then -- Convert to integer, and create a UTF-8 string from this codepoint code := string_routines.hexadecimal_to_integer (code_point) create utf8_str.make_empty utf8_str.append_unicode_character (create {UC_CHARACTER}.make_from_code (code)) last_string.append (utf8_str.to_utf8) else record_error end else last_string.extend (current_char) end else last_string.extend (current_char) end next_char end -- skip closing quote-symbol next_char end feature -- Implementation (Scanner: Tokens) frozen token_eof: INTEGER = 0 -- %U frozen token_identifier: INTEGER = 1 frozen token_integer: INTEGER = 2 frozen token_double: INTEGER = 3 frozen token_string: INTEGER = 4 frozen token_boolean: INTEGER = 5 frozen token_open_curly_brace: INTEGER = 6 -- { frozen token_closed_curly_brace: INTEGER = 7 -- } frozen token_open_bracket: INTEGER = 8 -- [ frozen token_closed_bracket: INTEGER = 9 -- ] frozen token_comma: INTEGER = 10 -- , frozen token_colon: INTEGER = 11 -- : frozen token_dot: INTEGER = 12 -- . frozen token_quote: INTEGER = 13 -- " frozen token_unknown: INTEGER = -1 feature {NONE} -- Implementation string_routines: KL_STRING_ROUTINES -- Access to Gobo's string routines, -- Used for checking & converting hex-strings once create Result ensure created: Result /= Void end set_current_char -- Update the value of 'current_char' do if char_pos > input.count then current_char := '%U' else current_char := input.item (char_pos) end end invariant input_specified: input /= Void character_position_positive: char_pos > 0 error_implies_error_position_set: has_errors implies error_position /= 0 end