96 lines
		
	
	
		
			3.9 KiB
		
	
	
	
		
			PHP
		
	
	
	
	
	
			
		
		
	
	
			96 lines
		
	
	
		
			3.9 KiB
		
	
	
	
		
			PHP
		
	
	
	
	
	
| <?php
 | ||
| 	// Project:    Web Reference Database (refbase) <http://www.refbase.net>
 | ||
| 	// Copyright:  Matthias Steffens <mailto:refbase@extracts.de> and the file's
 | ||
| 	//             original author(s).
 | ||
| 	//
 | ||
| 	//             This code is distributed in the hope that it will be useful,
 | ||
| 	//             but WITHOUT ANY WARRANTY. Please see the GNU General Public
 | ||
| 	//             License for more details.
 | ||
| 	//
 | ||
| 	// File:       ./includes/transtab_latin1_charset.inc.php
 | ||
| 	// Repository: $HeadURL$
 | ||
| 	// Author(s):  Matthias Steffens <mailto:refbase@extracts.de>
 | ||
| 	//
 | ||
| 	// Created:    24-Jul-08, 17:45
 | ||
| 	// Modified:   $Date: 2008-08-19 20:05:53 +0000 (Tue, 19 Aug 2008) $
 | ||
| 	//             $Author$
 | ||
| 	//             $Revision: 1206 $
 | ||
| 
 | ||
| 	// Search & replace patterns and variables for matching (and conversion of) ISO-8859-1 character case & classes.
 | ||
| 	// Search & replace patterns must be specified as perl-style regular expression and search patterns must include the
 | ||
| 	// leading & trailing slashes.
 | ||
| 
 | ||
| 	// NOTE: Quote from <http://www.onphp5.com/article/22> ("i18n with PHP5: Pitfalls"):
 | ||
| 	//       "PCRE and other regular expression extensions are not locale-aware. This most notably influences the \w class
 | ||
| 	//        that is unable to work for Cyrillic letters. There could be a workaround for this if some preprocessor for the
 | ||
| 	//        regex string could replace \w and friends with character range prior to calling PCRE functions."
 | ||
| 
 | ||
| 	//       The 'start_session()' function in file 'include.inc.php' should establish an appropriate locale via function
 | ||
| 	//       'setSystemLocale()' so that e.g. '[[:upper:]]' would also match 'Ø' etc. However, since locale support depends
 | ||
| 	//       on the individual server & system, we keep the workaround which literally specifies higher ASCII chars of the
 | ||
| 	//       latin1 character set below. (in order to have this work, the character encoding of 'search.php' must be set to
 | ||
| 	//       'Western (Iso Latin 1)' aka 'ISO-8859-1'!)
 | ||
| 	//       higher ASCII chars upper case = "ÄÅÁÀÂÃÇÉÈÊËÑÖØÓÒÔÕÜÚÙÛÍÌÎÏÆ"
 | ||
| 	//       higher ASCII chars lower case = "äåáàâãçéèêëñöøóòôõüúùûíìîïæÿß"
 | ||
| 
 | ||
| 	//       The variables '$alnum', '$alpha', '$cntrl', '$dash', '$digit', '$graph', '$lower', '$print', '$punct', '$space',
 | ||
| 	//       '$upper', '$word' must be used within a perl-style regex character class.
 | ||
| 
 | ||
| 	// Matches ISO-8859-1 letters & digits:
 | ||
| 	$alnum = "[:alnum:]ÄÅÁÀÂÃÇÉÈÊËÑÖØÓÒÔÕÜÚÙÛÍÌÎÏÆäåáàâãçéèêëñöøóòôõüúùûíìîïæÿß";
 | ||
| 
 | ||
| 	// Matches ISO-8859-1 letters:
 | ||
| 	$alpha = "[:alpha:]ÄÅÁÀÂÃÇÉÈÊËÑÖØÓÒÔÕÜÚÙÛÍÌÎÏÆäåáàâãçéèêëñöøóòôõüúùûíìîïæÿß";
 | ||
| 
 | ||
| 	// Matches ISO-8859-1 control characters:
 | ||
| 	$cntrl = "[:cntrl:]";
 | ||
| 
 | ||
| 	// Matches ISO-8859-1 dashes & hyphens:
 | ||
| 	$dash = "-–";
 | ||
| 
 | ||
| 	// Matches ISO-8859-1 digits:
 | ||
| 	$digit = "[:digit:]";
 | ||
| 
 | ||
| 	// Matches ISO-8859-1 printing characters (excluding space):
 | ||
| 	$graph = "[:graph:]ÄÅÁÀÂÃÇÉÈÊËÑÖØÓÒÔÕÜÚÙÛÍÌÎÏÆäåáàâãçéèêëñöøóòôõüúùûíìîïæÿß";
 | ||
| 
 | ||
| 	// Matches ISO-8859-1 lower case letters:
 | ||
| 	$lower = "[:lower:]äåáàâãçéèêëñöøóòôõüúùûíìîïæÿß";
 | ||
| 
 | ||
| 	// Matches ISO-8859-1 printing characters (including space):
 | ||
| 	$print = "[:print:]ÄÅÁÀÂÃÇÉÈÊËÑÖØÓÒÔÕÜÚÙÛÍÌÎÏÆäåáàâãçéèêëñöøóòôõüúùûíìîïæÿß";
 | ||
| 
 | ||
| 	// Matches ISO-8859-1 punctuation:
 | ||
| 	$punct = "[:punct:]";
 | ||
| 
 | ||
| 	// Matches ISO-8859-1 whitespace (separating characters with no visual representation):
 | ||
| 	$space = "[:space:]";
 | ||
| 
 | ||
| 	// Matches ISO-8859-1 upper case letters:
 | ||
| 	$upper = "[:upper:]ÄÅÁÀÂÃÇÉÈÊËÑÖØÓÒÔÕÜÚÙÛÍÌÎÏÆ";
 | ||
| 
 | ||
| 	// Matches ISO-8859-1 "word" characters:
 | ||
| 	$word = "_[:alnum:]ÄÅÁÀÂÃÇÉÈÊËÑÖØÓÒÔÕÜÚÙÛÍÌÎÏÆäåáàâãçéèêëñöøóòôõüúùûíìîïæÿß";
 | ||
| 
 | ||
| 	// Defines the PCRE pattern modifier(s) to be used in conjunction with the above variables:
 | ||
| 	// More info: <http://www.php.net/manual/en/reference.pcre.pattern.modifiers.php>
 | ||
| 	$patternModifiers = "";
 | ||
| 
 | ||
| 
 | ||
| 	// Converts ISO-8859-1 upper case letters to their corresponding lower case letter:
 | ||
| 	// TODO!
 | ||
| 	$transtab_upper_lower = array(
 | ||
| 
 | ||
| 
 | ||
| 	);
 | ||
| 
 | ||
| 
 | ||
| 	// Converts ISO-8859-1 lower case letters to their corresponding upper case letter:
 | ||
| 	// TODO!
 | ||
| 	$transtab_lower_upper = array(
 | ||
| 
 | ||
| 
 | ||
| 	);
 | ||
| 
 | ||
| ?>
 |