// Copyright: Matthias Steffens and the file's // original author(s). // // This code is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY. Please see the GNU General Public // License for more details. // // File: ./includes/transtab_latin1_charset.inc.php // Repository: $HeadURL$ // Author(s): Matthias Steffens // // Created: 24-Jul-08, 17:45 // Modified: $Date: 2008-08-19 20:05:53 +0000 (Tue, 19 Aug 2008) $ // $Author$ // $Revision: 1206 $ // Search & replace patterns and variables for matching (and conversion of) ISO-8859-1 character case & classes. // Search & replace patterns must be specified as perl-style regular expression and search patterns must include the // leading & trailing slashes. // NOTE: Quote from ("i18n with PHP5: Pitfalls"): // "PCRE and other regular expression extensions are not locale-aware. This most notably influences the \w class // that is unable to work for Cyrillic letters. There could be a workaround for this if some preprocessor for the // regex string could replace \w and friends with character range prior to calling PCRE functions." // The 'start_session()' function in file 'include.inc.php' should establish an appropriate locale via function // 'setSystemLocale()' so that e.g. '[[:upper:]]' would also match 'Ø' etc. However, since locale support depends // on the individual server & system, we keep the workaround which literally specifies higher ASCII chars of the // latin1 character set below. (in order to have this work, the character encoding of 'search.php' must be set to // 'Western (Iso Latin 1)' aka 'ISO-8859-1'!) // higher ASCII chars upper case = "ÄÅÁÀÂÃÇÉÈÊËÑÖØÓÒÔÕÜÚÙÛÍÌÎÏÆ" // higher ASCII chars lower case = "äåáàâãçéèêëñöøóòôõüúùûíìîïæÿß" // The variables '$alnum', '$alpha', '$cntrl', '$dash', '$digit', '$graph', '$lower', '$print', '$punct', '$space', // '$upper', '$word' must be used within a perl-style regex character class. // Matches ISO-8859-1 letters & digits: $alnum = "[:alnum:]ÄÅÁÀÂÃÇÉÈÊËÑÖØÓÒÔÕÜÚÙÛÍÌÎÏÆäåáàâãçéèêëñöøóòôõüúùûíìîïæÿß"; // Matches ISO-8859-1 letters: $alpha = "[:alpha:]ÄÅÁÀÂÃÇÉÈÊËÑÖØÓÒÔÕÜÚÙÛÍÌÎÏÆäåáàâãçéèêëñöøóòôõüúùûíìîïæÿß"; // Matches ISO-8859-1 control characters: $cntrl = "[:cntrl:]"; // Matches ISO-8859-1 dashes & hyphens: $dash = "-–"; // Matches ISO-8859-1 digits: $digit = "[:digit:]"; // Matches ISO-8859-1 printing characters (excluding space): $graph = "[:graph:]ÄÅÁÀÂÃÇÉÈÊËÑÖØÓÒÔÕÜÚÙÛÍÌÎÏÆäåáàâãçéèêëñöøóòôõüúùûíìîïæÿß"; // Matches ISO-8859-1 lower case letters: $lower = "[:lower:]äåáàâãçéèêëñöøóòôõüúùûíìîïæÿß"; // Matches ISO-8859-1 printing characters (including space): $print = "[:print:]ÄÅÁÀÂÃÇÉÈÊËÑÖØÓÒÔÕÜÚÙÛÍÌÎÏÆäåáàâãçéèêëñöøóòôõüúùûíìîïæÿß"; // Matches ISO-8859-1 punctuation: $punct = "[:punct:]"; // Matches ISO-8859-1 whitespace (separating characters with no visual representation): $space = "[:space:]"; // Matches ISO-8859-1 upper case letters: $upper = "[:upper:]ÄÅÁÀÂÃÇÉÈÊËÑÖØÓÒÔÕÜÚÙÛÍÌÎÏÆ"; // Matches ISO-8859-1 "word" characters: $word = "_[:alnum:]ÄÅÁÀÂÃÇÉÈÊËÑÖØÓÒÔÕÜÚÙÛÍÌÎÏÆäåáàâãçéèêëñöøóòôõüúùûíìîïæÿß"; // Defines the PCRE pattern modifier(s) to be used in conjunction with the above variables: // More info: $patternModifiers = ""; // Converts ISO-8859-1 upper case letters to their corresponding lower case letter: // TODO! $transtab_upper_lower = array( ); // Converts ISO-8859-1 lower case letters to their corresponding upper case letter: // TODO! $transtab_lower_upper = array( ); ?>