Files
Refbase2X/includes/transtab_latin1_charset.inc.php
2021-01-28 15:58:21 +01:00

96 lines
3.9 KiB
PHP
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

<?php
// Project: Web Reference Database (refbase) <http://www.refbase.net>
// Copyright: Matthias Steffens <mailto:refbase@extracts.de> and the file's
// original author(s).
//
// This code is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY. Please see the GNU General Public
// License for more details.
//
// File: ./includes/transtab_latin1_charset.inc.php
// Repository: $HeadURL$
// Author(s): Matthias Steffens <mailto:refbase@extracts.de>
//
// Created: 24-Jul-08, 17:45
// Modified: $Date: 2008-08-19 20:05:53 +0000 (Tue, 19 Aug 2008) $
// $Author$
// $Revision: 1206 $
// Search & replace patterns and variables for matching (and conversion of) ISO-8859-1 character case & classes.
// Search & replace patterns must be specified as perl-style regular expression and search patterns must include the
// leading & trailing slashes.
// NOTE: Quote from <http://www.onphp5.com/article/22> ("i18n with PHP5: Pitfalls"):
// "PCRE and other regular expression extensions are not locale-aware. This most notably influences the \w class
// that is unable to work for Cyrillic letters. There could be a workaround for this if some preprocessor for the
// regex string could replace \w and friends with character range prior to calling PCRE functions."
// The 'start_session()' function in file 'include.inc.php' should establish an appropriate locale via function
// 'setSystemLocale()' so that e.g. '[[:upper:]]' would also match 'Ø' etc. However, since locale support depends
// on the individual server & system, we keep the workaround which literally specifies higher ASCII chars of the
// latin1 character set below. (in order to have this work, the character encoding of 'search.php' must be set to
// 'Western (Iso Latin 1)' aka 'ISO-8859-1'!)
// higher ASCII chars upper case = "ÄÅÁÀÂÃÇÉÈÊËÑÖØÓÒÔÕÜÚÙÛÍÌÎÏÆ"
// higher ASCII chars lower case = "äåáàâãçéèêëñöøóòôõüúùûíìîïæÿß"
// The variables '$alnum', '$alpha', '$cntrl', '$dash', '$digit', '$graph', '$lower', '$print', '$punct', '$space',
// '$upper', '$word' must be used within a perl-style regex character class.
// Matches ISO-8859-1 letters & digits:
$alnum = "[:alnum:]ÄÅÁÀÂÃÇÉÈÊËÑÖØÓÒÔÕÜÚÙÛÍÌÎÏÆäåáàâãçéèêëñöøóòôõüúùûíìîïæÿß";
// Matches ISO-8859-1 letters:
$alpha = "[:alpha:]ÄÅÁÀÂÃÇÉÈÊËÑÖØÓÒÔÕÜÚÙÛÍÌÎÏÆäåáàâãçéèêëñöøóòôõüúùûíìîïæÿß";
// Matches ISO-8859-1 control characters:
$cntrl = "[:cntrl:]";
// Matches ISO-8859-1 dashes & hyphens:
$dash = "-";
// Matches ISO-8859-1 digits:
$digit = "[:digit:]";
// Matches ISO-8859-1 printing characters (excluding space):
$graph = "[:graph:]ÄÅÁÀÂÃÇÉÈÊËÑÖØÓÒÔÕÜÚÙÛÍÌÎÏÆäåáàâãçéèêëñöøóòôõüúùûíìîïæÿß";
// Matches ISO-8859-1 lower case letters:
$lower = "[:lower:]äåáàâãçéèêëñöøóòôõüúùûíìîïæÿß";
// Matches ISO-8859-1 printing characters (including space):
$print = "[:print:]ÄÅÁÀÂÃÇÉÈÊËÑÖØÓÒÔÕÜÚÙÛÍÌÎÏÆäåáàâãçéèêëñöøóòôõüúùûíìîïæÿß";
// Matches ISO-8859-1 punctuation:
$punct = "[:punct:]";
// Matches ISO-8859-1 whitespace (separating characters with no visual representation):
$space = "[:space:]";
// Matches ISO-8859-1 upper case letters:
$upper = "[:upper:]ÄÅÁÀÂÃÇÉÈÊËÑÖØÓÒÔÕÜÚÙÛÍÌÎÏÆ";
// Matches ISO-8859-1 "word" characters:
$word = "_[:alnum:]ÄÅÁÀÂÃÇÉÈÊËÑÖØÓÒÔÕÜÚÙÛÍÌÎÏÆäåáàâãçéèêëñöøóòôõüúùûíìîïæÿß";
// Defines the PCRE pattern modifier(s) to be used in conjunction with the above variables:
// More info: <http://www.php.net/manual/en/reference.pcre.pattern.modifiers.php>
$patternModifiers = "";
// Converts ISO-8859-1 upper case letters to their corresponding lower case letter:
// TODO!
$transtab_upper_lower = array(
);
// Converts ISO-8859-1 lower case letters to their corresponding upper case letter:
// TODO!
$transtab_lower_upper = array(
);
?>