note description: "Objects that represent an ARFF relation" author: "" date: "$Date$" revision: "$Revision$" class WEKA_ARFF_RELATION inherit ARRAYED_LIST [ARRAYED_LIST [STRING]] rename make as old_make redefine out end DEBUG_OUTPUT undefine is_equal, copy, out end WEKA_SHARED_EQUALITY_TESTERS undefine is_equal, copy, out end KL_SHARED_STRING_EQUALITY_TESTER undefine is_equal, copy, out end WEKA_ARFF_ATTRIBUTE_VISITOR undefine is_equal, copy, out end create make feature{NONE} -- Initialization make (a_attributes: like attributes) -- Initialize Current. do create attributes.make (a_attributes.count) attributes.compare_objects attributes.append (a_attributes) name := "noname" comment := "" trailing_comment := "" old_make (initiail_data_capacity) end feature -- Access name: STRING -- Name of current file attributes: ARRAYED_LIST [WEKA_ARFF_ATTRIBUTE] -- List of attributes mentioned in current relation -- The order of attributes in this list determines the order of values. -- Each element in current list represent an instance, -- and the inner list stores the values of all attributes in that instance, -- with the same order as `attributes'. -- Note: Elements in the list should not contain duplications. -- It is stored in a list instead of a set because we want to make sure the order of elements -- are not changed. For getting a set of attributes, see `attribute_set'. attribute_count: INTEGER -- Number of attributes in Current relation do Result := attributes.count ensure good_result: Result = attributes.count end attribute_indexes: DS_HASH_TABLE [INTEGER, WEKA_ARFF_ATTRIBUTE] -- Table where keys are attributes from `attributes' and values are -- their 1-based indexes indicating the column position of -- the corresponding attributes. do if attribute_indexes_internal = Void then initialize_tables end Result := attribute_indexes_internal end reversed_attribute_indexes: DS_HASH_TABLE [WEKA_ARFF_ATTRIBUTE, INTEGER] -- Table where keys are 1-based column indexes and values are -- attributes in the corresponding column position. do if reversed_attribute_indexes_internal = Void then initialize_tables end Result := reversed_attribute_indexes_internal end attribute_set: DS_HASH_SET [WEKA_ARFF_ATTRIBUTE] -- Set representation of `attributes' do create Result.make (attributes.count) Result.set_equality_tester (weka_arff_attribute_equality_tester) attributes.do_all (agent Result.force_last) end comment: STRING -- Comments to be located after the relation name, before attribute declaration trailing_comment: STRING -- Comments that appear at the end of the file out, as_string: STRING -- Current weka relation as string local l_cursor: CURSOR l_lines: LIST [STRING] do create Result.make (8192) -- Output the relation name part. Result.append_string ({WEKA_CONSTANTS}.relation) Result.append_character (' ') Result.append_string (name) Result.append_character ('%N') Result.append_character ('%N') -- Output comment. l_lines := comment.split ('%N') from l_lines.start until l_lines.after loop Result.append_character ('%%') Result.append_string (l_lines.item_for_iteration) Result.append_character ('%N') l_lines.forth end -- Output attributes. l_cursor := attributes.cursor from attributes.start until attributes.after loop Result.append_string (attributes.item_for_iteration.signature) Result.append_character ('%N') attributes.forth end attributes.go_to (l_cursor) Result.append_character ('%N') -- Output instances data. Result.append_string ({WEKA_CONSTANTS}.data) Result.append_character ('%N') l_cursor := cursor from start until after loop Result.append_string (instance_string) Result.append_character ('%N') forth end go_to (l_cursor) Result.append_character ('%N') -- Append trailing comment. Result.append_character ('%N') Result.append (trailing_comment) end value_set: DS_HASH_TABLE [DS_HASH_SET [STRING], WEKA_ARFF_ATTRIBUTE] -- Table from attribute to values of that attributes in all instances -- Key is an attribute, value is the set of values that attribute have across all instances. local l_instance: ARRAYED_LIST [STRING] i: INTEGER l_values: DS_HASH_SET [STRING] l_value: STRING l_attr: WEKA_ARFF_ATTRIBUTE do create Result.make (attributes.count) Result.set_key_equality_tester (weka_arff_attribute_equality_tester) -- Create an empty value set for each attribute. across attributes as l_attrs loop create l_values.make (5) l_values.set_equality_tester (string_equality_tester) Result.force_last (l_values, l_attrs.item) end -- Iterate through all instances in Current and -- collect values that each attribute can have. across Current as l_instances loop l_instance := l_instances.item i := 1 across attributes as l_attrs loop l_attr := l_attrs.item l_value := l_instance.i_th (i) l_values := Result.item (l_attr) if not l_values.has (l_value) then l_values.force_last (l_value) end i := i + 1 end end end values_of_attribute (a_attribute: WEKA_ARFF_ATTRIBUTE): LINKED_LIST [STRING] -- List of values of `a_attribute' across all instances. -- The order of values are preserved. local l_index: INTEGER do create Result.make Result.compare_objects l_index := attributes.index_of (a_attribute, 1) across Current as l_instances loop Result.extend (l_instances.item.i_th (l_index)) end end value_set_of_attribute (a_attribute: WEKA_ARFF_ATTRIBUTE): DS_HASH_SET [STRING] -- Set of values of `a_attribute' across all instances. -- The order of values are preserved. local l_index: INTEGER l_value: STRING do create Result.make (10) Result.set_equality_tester (string_equality_tester) l_index := attributes.index_of (a_attribute, 1) across Current as l_instances loop l_value := l_instances.item.i_th (l_index) if not Result.has (l_value) then Result.force_last (l_value) end end end projection (a_attribute_selection_function: FUNCTION [ANY, TUPLE [WEKA_ARFF_ATTRIBUTE], BOOLEAN]): like Current -- Projection of Current by selecting only attributes that satisfies `a_attribute_selection_function' -- The order of the attributes in the resulting relation is the same as in the original relation. local l_indexes: LINKED_LIST [INTEGER] i: INTEGER l_attributes: like attributes l_instance: like item l_prj_instance: like item do -- Store indexes of remaining attributes in `l_indexes'. create l_indexes.make create l_attributes.make (attributes.count) i := 1 across attributes as l_attrs loop if a_attribute_selection_function.item ([l_attrs.item]) then l_indexes.extend (i) l_attributes.extend (l_attrs.item) end i := i + 1 end -- Iterate through all instances in Current and store the projected data into Result. create Result.make (l_attributes) across Current as l_instances loop l_instance := l_instances.item create l_prj_instance.make (l_indexes.count) across l_indexes as l_attr_indexes loop l_prj_instance.extend (l_instance.i_th (l_attr_indexes.item)) end Result.extend (l_prj_instance) end Result.set_name (name) Result.set_comment (comment) Result.set_trailing_comment (trailing_comment) end cloned_object: like Current -- Cloned version of Current do Result := cloned_skeleton do_all (agent Result.extend) end cloned_skeleton: like Current -- Cloned skeleton of Current -- A skeleton contains all the information except the instances. do create Result.make (attributes.twin) Result.set_comment (comment) Result.set_trailing_comment (trailing_comment) Result.set_name (name) end content_cloned_object: like Current -- Cloned version of Current, with content list also cloned. local l_instance: ARRAYED_LIST [STRING] l_attribute: WEKA_ARFF_ATTRIBUTE l_attributes: like attributes do -- Clone attributes. create l_attributes.make (attributes.count) l_attributes.compare_objects across attributes as l_attrs loop l_attribute := l_attrs.item.cloned_objects l_attributes.extend (l_attribute) end create Result.make (l_attributes) -- Clone instances. across Current as l_instances loop l_instance := l_instances.item.twin Result.extend (l_instance) end -- Clone comments. Result.set_comment (comment) Result.set_trailing_comment (trailing_comment) Result.set_name (name) end nominalized_cloned_object: like Current -- Cloned version of Current, with all numeric values normalized local l_new_attrs: like attributes l_value_set: like value_set do l_value_set := value_set create l_new_attrs.make (attributes.count) across attributes as l_attrs loop if l_attrs.item.is_nominal then l_new_attrs.extend (l_attrs.item) else l_new_attrs.extend (l_attrs.item.as_nominal (l_value_set.item (l_attrs.item))) end end create Result.make (l_new_attrs) do_all (agent Result.extend) Result.set_comment (comment) Result.set_trailing_comment (trailing_comment) Result.set_name (name) end attribute_by_name (a_name: STRING): WEKA_ARFF_ATTRIBUTE -- Attribute in `attributes' with `a_name' require a_name_exists: has_attribute_by_name (a_name) local l_attr: WEKA_ARFF_ATTRIBUTE l_count: INTEGER do across attributes as l_attrs until Result /= Void loop l_attr := l_attrs.item if l_attr.name ~ a_name then Result := l_attr else l_count := l_attr.name.count if l_attr.name.item (1) = '%"' and then l_attr.name.item (l_count) = '%"' then if a_name ~ l_attr.name.substring (2, l_count - 1) then Result := l_attr end end end end end instance_as_hash_table (a_instance: LIST [STRING]): HASH_TABLE [STRING, STRING] -- Hash-table where keys are attribute names and values are attribute values for the corresponding names. -- The attribute values are from `a_instance'. require a_instance_valid: a_instance.count = attributes.count local l_cursor: CURSOR do create Result.make (attributes.count) Result.compare_objects l_cursor := a_instance.cursor a_instance.start across attributes as l_attrs loop Result.force (a_instance.item_for_iteration, l_attrs.item.name) a_instance.forth end a_instance.go_to (l_cursor) end data_as_ds_hash_table (a_instance: LIST [STRING]): DS_HASH_TABLE [STRING, WEKA_ARFF_ATTRIBUTE] -- Hash-table where keys are attributes and values are attribute values for the corresponding attributes. -- The attribute values are from `a_instance'. require a_instance_valid: a_instance.count = attributes.count local l_cursor: CURSOR do create Result.make (attributes.count) Result.set_key_equality_tester (weka_arff_attribute_equality_tester) l_cursor := a_instance.cursor a_instance.start across attributes as l_attrs loop Result.force_last (a_instance.item_for_iteration, l_attrs.item) a_instance.forth end a_instance.go_to (l_cursor) end partitions (a_attribute: WEKA_ARFF_ATTRIBUTE; a_value_partition_function: FUNCTION [ANY, TUPLE [STRING], INTEGER]): HASH_TABLE [WEKA_ARFF_RELATION, INTEGER] -- Partions from Current based on `a_value_partition_function' -- `a_value_partition_function' partitions the value of `a_attribute'. -- If some values should be in the same equivalent class, `a_value_partition_function' should -- return the same integer for those values. -- Results are a hash-table where keys are value partition number returned by `a_value_partition_function' and -- items are partitioned relations. require a_attribute_exists: attributes.has (a_attribute) local l_column: INTEGER l_instance: like item l_partition: WEKA_ARFF_RELATION l_partition_id: INTEGER do l_column := attribute_indexes.item (a_attribute) create Result.make (5) across Current as l_instances loop l_instance := l_instances.item l_partition_id := a_value_partition_function.item ([l_instance.i_th (l_column)]) Result.search (l_partition_id) if Result.found then l_partition := Result.found_item else l_partition := cloned_skeleton Result.force (l_partition, l_partition_id) end l_partition.extend (l_instance) end end partitions_by_attribute_value (a_attribute: WEKA_ARFF_ATTRIBUTE): HASH_TABLE [WEKA_ARFF_RELATION, STRING] -- Partitionsfrom Current based on the values of the attribute `a_attribute'. -- Results are a hash-table where keys are attribute values and -- items are partitioned relations. require a_attribute_exists: attributes.has (a_attribute) local l_column: INTEGER l_instance: like item l_partition: WEKA_ARFF_RELATION l_partition_id: INTEGER l_value: STRING do l_column := attribute_indexes.item (a_attribute) create Result.make (5) Result.compare_objects across Current as l_instances loop l_instance := l_instances.item l_value := l_instance.i_th (l_column) Result.search (l_value) if Result.found then l_partition := Result.found_item else l_partition := cloned_skeleton Result.force (l_partition, l_value) end l_partition.extend (l_instance) end end feature -- Access value_of_item (a_attribute: WEKA_ARFF_ATTRIBUTE): STRING -- Value of `a_attribute' at the instance at current cursor position require valid_cursor_position: not off a_attribute_exists: attributes.has (a_attribute) do Result := item.i_th (attribute_indexes.item (a_attribute)) end value_at_position (a_position: INTEGER): STRING -- Value of the attribute at `a_position'-th column of the instance -- at current cursor position require a_position_valid: a_position >= 1 and a_position <= attribute_count valid_cursor_position: not off do Result := item.i_th (a_position) end item_as_hash_table: HASH_TABLE [STRING, STRING] -- Returns a hash table where the keys are the attribute names and the values comes from the current item require valid_item: not off do Result := instance_as_hash_table (item) ensure attributes.for_all ( agent (a_attr: WEKA_ARFF_ATTRIBUTE; a_result: HASH_TABLE [STRING, STRING]): BOOLEAN do Result := a_result.has (a_attr.name) end (?, Result)) -- every_attribute_included: attributes.for_all (Result.has) end item_as_ds_hash_table: DS_HASH_TABLE [STRING, WEKA_ARFF_ATTRIBUTE] -- Returns a hash table where the keys are the attributes and the values comes from the current item require valid_item: not off do Result := data_as_ds_hash_table (item) end i_th_as_hash_table (a_i:INTEGER): like item_as_hash_table -- returns a hash table where the keys are the attribute names and the values comes from the ith item do Result := instance_as_hash_table (i_th (a_i)) end i_th_as_ds_hash_table (a_i:INTEGER): like item_as_ds_hash_table -- returns a hash table where the keys are the attribute and the values comes from the ith item do Result := data_as_ds_hash_table (i_th (a_i)) end attributes_as_hash_table: HASH_TABLE [STRING, STRING] -- returns a hash table where the keys are the attributes and the values are void do create Result.make(attributes.count) Result.compare_objects from attributes.start until attributes.after loop Result[attributes.item_for_iteration.name] := Void attributes.forth end end feature -- Status report is_instance_valid (a_instance: like item): BOOLEAN -- Does `a_instance' contain valid values for `attribute'? local i: INTEGER l_attrs: like attributes l_cursor: CURSOR do if a_instance.count = attributes.count then l_attrs := attributes l_cursor := l_attrs.cursor from Result := True i := 1 l_attrs.start until l_attrs.after or not Result loop Result := l_attrs.item_for_iteration.is_valid_value (a_instance.i_th (i)) i := i + 1 l_attrs.forth end l_attrs.go_to (l_cursor) end end has_attribute_by_name (a_name: STRING): BOOLEAN -- Is there an attribute in Current with `a_name'? do Result := False across attributes as l_attrs until Result loop Result := l_attrs.item.name ~ a_name end end feature -- Status report debug_output: STRING -- String that should be displayed in debugger to represent `Current'. -- Only the attribute part is included, the instance part is ignored. local l_cursor: CURSOR do create Result.make (1024) Result.append ("Name: ") Result.append (name) Result.append_character ('%N') l_cursor := attributes.cursor from attributes.start until attributes.after loop Result.append (attributes.item_for_iteration.signature) Result.append_character ('%N') attributes.forth end Result.append ("Instances: " + count.out) Result.append_character ('%N') attributes.go_to (l_cursor) end feature -- Basic operations set_name (a_name: like name) -- Set `name' with `a_name'. do name := a_name.twin ensure good_result: name ~ a_name end set_comment (a_comment: STRING) -- Set `comment' with `a_comment'. -- Make a copy from `a_comment'. do if a_comment = Void then comment := "" else comment := a_comment.twin end end set_trailing_comment (a_trailing_comment: STRING) -- Set `trailing_comment' with `a_trailing_comment'. -- Make a copy from `a_trailing_comment'. do if a_trailing_comment = Void then trailing_comment := "" else trailing_comment := a_trailing_comment.twin end end to_medium (a_medium: IO_MEDIUM) -- Store current relation in `a_medium'. require a_medium_is_ready: a_medium.is_open_write do a_medium.put_string (as_string) end extend_instance (a_instance: like item) -- Extend `a_intance' into Current relation. require a_instance_valid: is_instance_valid (a_instance) do extend (a_instance) end remove_value (a_attribute: WEKA_ARFF_ATTRIBUTE; a_value: STRING) -- Remove the value `a_value' from `a_attribute'. -- After removal, instances which contained `a_value' will be removed -- from `instances'. -- NOTE: After removal, original cursor may change. require a_attribute_exits: attributes.has (a_attribute) a_value_valid: a_attribute.is_valid_value (a_value) local l_column: INTEGER do last_value_to_remove := a_value attributes.start attributes.search (a_attribute) l_column := attributes.index -- Remove the specified value from attribute definition -- Only needed for nominal attributes. attributes.item.process (Current) -- Remove instances which mention `a_value'. from start until after loop if item.i_th (l_column) ~ a_value then remove else forth end end end remove_values (a_attribute: WEKA_ARFF_ATTRIBUTE; a_values: DS_HASH_SET [STRING]) -- Remove the value `a_values' from `a_attribute'. -- After removal, instances which contained `a_values' will be removed -- from `instances'. -- NOTE: After removal, original cursor may change. require a_attribute_exits: attributes.has (a_attribute) do a_values.do_all (agent remove_value (a_attribute, ?)) end feature -- Constants initiail_data_capacity: INTEGER = 100 -- The initial capacity of `data' feature{NONE} -- Implementation instance_string: STRING -- String for the data in current `item' require not_off: not off local l_data: like item l_cursor: CURSOR l_attrs: like attributes i: INTEGER do create Result.make (256) l_attrs := attributes l_data := item l_cursor := l_data.cursor from i := 1 l_data.start Result.append (l_attrs.i_th (i).value (l_data.item_for_iteration)) i := i + 1 l_data.forth until l_data.after loop Result.append_character (',') Result.append (l_attrs.i_th (i).value (l_data.item_for_iteration)) i := i + 1 l_data.forth end end feature{NONE} -- Process process_boolean_attribute (a_attribute: WEKA_ARFF_BOOLEAN_ATTRIBUTE) -- Process `a_attribute'. do end process_numeric_attribute (a_attribute: WEKA_ARFF_NUMERIC_ATTRIBUTE) -- Process `a_attribute'. do end process_nominal_attribute (a_attribute: WEKA_ARFF_NOMINAL_ATTRIBUTE) -- Process `a_attribute'. do a_attribute.values.remove (last_value_to_remove) end process_string_attribute (a_attribute: WEKA_ARFF_STRING_ATTRIBUTE) -- Process `a_attribute'. do end last_value_to_remove: detachable STRING -- Value to be removed attribute_indexes_internal: detachable like attribute_indexes -- Cache for `attribute_indexes' reversed_attribute_indexes_internal: detachable like reversed_attribute_indexes -- Cache for `reversed_attribute_indexes' initialize_tables -- Initialize internal data structures. local l_column: INTEGER do create attribute_indexes_internal.make (attributes.count) attribute_indexes_internal.set_key_equality_tester (weka_arff_attribute_equality_tester) create reversed_attribute_indexes_internal.make (attributes.count) l_column := 1 across attributes as l_attrs loop attribute_indexes_internal.force_last (l_column, l_attrs.item) reversed_attribute_indexes_internal.force_last (l_attrs.item, l_column) l_column := l_column + 1 end end end