note
	description:
		"A brief PCRE how-to in a runnable example and regexp-testing"
	library: "Eiffel PCRE Regexp Library"
	copyright: "Copyright (c) 2002, Harald Erdbruegger and others"
	license: "MIT License"
	date: "$Date$"
	revision: "$Revision$"

class
	PCRE

create
	make

feature {NONE} -- Initialization

	make
			-- Execute example.
		do
			some_simple_matches
			localization
		end

feature -- Examples

	some_simple_matches
			-- Some simple examples.
		local
			regexp: REGULAR_EXPRESSION_MATCH_AND_REPLACE
			columns: ARRAY [STRING]
			res: STRING
			i, nb: INTEGER
		do
				-- Create the regular expression.
			create regexp

				-- Compile a pattern: look for character pairs (note: there are two subexpressions).
			regexp.compile ("((.)\2)")
			check
					-- We know, we compiled a valid regexp.
				is_compiled: regexp.is_compiled
			end
				-- Match on a subject.
			regexp.match ("hello from eiffel")
				-- In this example the query of `has_matched' is not needed
				-- if you query the `match_count' for a value greater zero
				-- ('not has_matched = (match_count = 0)').
			check
					-- We know there is one.
				has_matched: regexp.has_matched
					-- Two subexpressions.
				match_count: regexp.match_count = 3
			end
				-- Analyze the first match.
			res := regexp.captured_substring (1)
			check
				double_l: res.is_equal ("ll")
			end
			io.put_string (res); io.new_line
			io.put_string ("----------------"); io.new_line
			res := regexp.captured_substring (2)
			check
				single_l: res.is_equal ("l")
			end
			io.put_string (res); io.new_line
			io.put_string ("----------------"); io.new_line

				-- Put the captured substring between parentheses.
			res := regexp.replace ("(\1\)")
			check
				done: res.is_equal ("he(ll)o from eiffel")
			end
			io.put_string (res); io.new_line
			io.put_string ("----------------"); io.new_line
				-- Try to find a next match.
			regexp.next_match
			check
					-- We know there is one.
				has_matched2: regexp.has_matched
					-- Two subexpressions.
				match_count2: regexp.match_count = 3
			end
				-- Analyze the next hit.
			res := regexp.captured_substring (1)
			check
				double_f: res.is_equal ("ff")
			end
			io.put_string (res); io.new_line
			io.put_string ("----------------"); io.new_line
				-- Put the captured substring between brackets.
			res := regexp.replace ("[\1\]")
			check
				done2: res.is_equal ("hello from ei[ff]el")
			end
			io.put_string (res); io.new_line
			io.put_string ("----------------"); io.new_line
				-- Back to the beginning.
			regexp.first_match
			check
					-- We know there is one.
				has_matched3: regexp.has_matched
					-- Two subexpressions.
				match_count3: regexp.match_count = 3
			end
				-- Put an hyphen between double letters.
			res := regexp.replace_all ("\2\-\2\")
			check
				done3: res.is_equal ("hel-lo from eif-fel")
			end
			io.put_string (res); io.new_line
			io.put_string ("----------------"); io.new_line
				-- Now we compile a new pattern: the same as \t.
			regexp.compile ("%T")
			check
					-- We know, we compiled a valid regexp.
				is_compiled2: regexp.is_compiled
			end
				-- Match on a subject.
			regexp.match ("this%Tis%Ta%Ttab%Tseparated%Tline%Twith%Tan%Tempty%T%Tcolumn")
			check
					-- We know there is one.
				has_matched4: regexp.has_matched
					-- No subexpessions.
				match_count4: regexp.match_count = 1
			end
			columns := regexp.split
			check
				eleven_columns: columns.count = 11
			end
			i := columns.lower
			nb := columns.upper
			from until i > nb loop
				io.put_string (columns.item (i)); io.new_line
				i := i + 1
			end
			io.put_string ("----------------"); io.new_line
		end

	localization
			-- Example with localization.
		local
			case_mapping: CASE_MAPPING
			word_set: PCRE_CHARACTER_SET
			regexp: REGULAR_EXPRESSION_MATCH_AND_REPLACE
			res: STRING
		do
				-- German umlaute added.
			create word_set.make ("0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_ÄÖÜäöü")
			create regexp
				-- Compile a pattern: match a word.
			regexp.compile ("\w+")
			regexp.match ("Erdbrügger - my Name")
			res := regexp.captured_substring (0)
			check
				matched1: res.is_equal ("Erdbr")
			end
			io.put_string (res); io.new_line
			io.put_string ("----------------"); io.new_line
				-- Now we change the word character set.
			regexp.reset
			regexp.set_word_set (word_set)
				-- Compile a pattern: match a word.
			regexp.compile ("\w+")
			regexp.match ("Erdbrügger - my Name")
			res := regexp.captured_substring (0)
			check
				matched2: res.is_equal ("Erdbrügger")
			end
			io.put_string (res); io.new_line
			io.put_string ("----------------"); io.new_line
				-- Reset the word_set.
			regexp.reset
			regexp.set_word_set (regexp.default_word_set)
				-- The same query in caseless mode:
				-- the (?i) in the pattern is the same as the statements:
				--     regexp.reset -- not needed after regexp.make
				--     regexp.set_caseless (True)
			regexp.compile ("(?i)[A-ZÄÖÜ]+")
			regexp.match ("Erdbrügger - my Name")
			res := regexp.captured_substring (0)
			check
				matched3: res.is_equal ("Erdbr")
			end
			io.put_string (res); io.new_line
			io.put_string ("----------------"); io.new_line
				-- The only way to reset the caseless mode except the routine
				-- regexp.set_default_options.
			regexp.reset
			regexp.set_caseless (False)
				-- A new pattern.
			regexp.compile ("[A-ZÄÖÜ]+")
			regexp.match ("Erdbrügger - my Name")
			res := regexp.captured_substring (0)
			check
				matched4: res.is_equal ("E")
			end
			io.put_string (res); io.new_line
			io.put_string ("----------------"); io.new_line
				-- German umlaute added for case insensitivity.
			create case_mapping.make ("ABCDEFGHIJKLMNOPQRSTUVWXYZÄÖÜ", "abcdefghijklmnopqrstuvwxyzäöü")
				-- Changing the character case mapping need
				-- a reset (not is_compiled).
			regexp.reset
			regexp.set_character_case_mapping (case_mapping)
			regexp.compile ("[A-ZÄÖÜ]+")
			regexp.match ("Erdbrügger - my Name")
			res := regexp.captured_substring (0)
			check
				matched5: res.is_equal ("E")
			end
			io.put_string (res); io.new_line
			io.put_string ("----------------"); io.new_line
				-- Now we use the caseless-mode.
			regexp.compile ("(?i)[A-ZÄÖÜ]+")
			check
				is_caseless: regexp.is_caseless
			end
			regexp.match ("Erdbrügger - my Name")
			res := regexp.captured_substring (0)
			check
				matched6: res.is_equal ("Erdbrügger")
			end
			io.put_string (res); io.new_line
			io.put_string ("----------------"); io.new_line
				-- If you want to make all regular expressions localized change the
				-- `default_character_case_mapping' and/or `default_word_set'.
			regexp.default_character_case_mapping.clear
			regexp.default_character_case_mapping.add ("ABCDEFGHIJKLMNOPQRSTUVWXYZÄÖÜ", "abcdefghijklmnopqrstuvwxyzäöü")
			word_set := regexp.default_word_set
			word_set.wipe_out
			word_set.add_string ("0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_ÄÖÜäöü")
				-- After these instructions all new created regular expressions uses
				-- the `character_case_mapping' and `word_set' of the changed default values
				-- defined above. Only a explicit setting of `regexp.set_character_case_mapping'
				-- and/or `regexp.set_word_set' can change this behavior.
		end

end