Files
Refbase2X/includes/transtab_unicode_refbase.inc.php
2021-01-28 15:58:21 +01:00

149 lines
7.0 KiB
PHP
Raw Blame History

This file contains invisible Unicode characters

This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

<?php
// Project: Web Reference Database (refbase) <http://www.refbase.net>
// Copyright: Matthias Steffens <mailto:refbase@extracts.de> and the file's
// original author(s).
//
// This code is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY. Please see the GNU General Public
// License for more details.
//
// File: ./includes/transtab_unicode_refbase.inc.php
// Repository: $HeadURL$
// Author(s): Matthias Steffens <mailto:refbase@extracts.de>
//
// Created: 11-Jun-08, 13:00
// Modified: $Date: 2008-06-19 17:56:34 +0000 (Thu, 19 Jun 2008) $
// $Author$
// $Revision: 1146 $
// Search & replace patterns and functions for conversion from Unicode entities to refbase markup.
// Search & replace patterns must be specified as perl-style regular expression and search patterns must include the leading & trailing slashes.
$transtab_unicode_refbase = array(
'/‰|‰/' => "[permil]", // ‰: <U2030> (per mille sign); ‰: ?
'/∞/' => "[infinity]", // <U221E> (infinity)
'/α/' => "[alpha]",
'/β/' => "[beta]",
'/γ/' => "[gamma]",
'/δ/' => "[delta]",
'/ε/' => "[epsilon]",
'/ζ/' => "[zeta]",
'/η/' => "[eta]",
'/θ/' => "[theta]",
'/ι/' => "[iota]",
'/κ/' => "[kappa]",
'/λ/' => "[lambda]",
'/μ/' => "[mu]",
'/ν/' => "[nu]",
'/ξ/' => "[xi]",
'/ο/' => "[omicron]",
'/π/' => "[pi]",
'/ρ/' => "[rho]",
'/ς/' => "[sigmaf]",
'/σ/' => "[sigma]",
'/τ/' => "[tau]",
'/υ/' => "[upsilon]",
'/φ/' => "[phi]",
'/χ/' => "[chi]",
'/ψ/' => "[psi]",
'/ω/' => "[omega]",
'/Α/' => "[Alpha]",
'/Β/' => "[Beta]",
'/Γ/' => "[Gamma]",
'/Δ/' => "[Delta]",
'/Ε/' => "[Epsilon]",
'/Ζ/' => "[Zeta]",
'/Η/' => "[Eta]",
'/Θ/' => "[Theta]",
'/Ι/' => "[Iota]",
'/Κ/' => "[Kappa]",
'/Λ/' => "[Lambda]",
'/Μ/' => "[Mu]",
'/Ν/' => "[Nu]",
'/Ξ/' => "[Xi]",
'/Ο/' => "[Omicron]",
'/Π/' => "[Pi]",
'/Ρ/' => "[Rho]",
'/Σ/' => "[Sigma]",
'/Τ/' => "[Tau]",
'/Υ/' => "[Upsilon]",
'/Φ/' => "[Phi]",
'/Χ/' => "[Chi]",
'/Ψ/' => "[Psi]",
'/Ω/' => "[Omega]",
"/((?:¹|²|³|⁴|⁵|⁶|⁷|⁸|⁹|⁰|⁺|⁻|⁼|⁽|⁾|ⁿ)+)/ie" => "unicodeSuperScriptToRefbase('\\1')", // function 'unicodeSuperScriptToRefbase()' will convert Unicode superscript entities to appropriate refbase superscript markup
"/((?:₁|₂|₃|₄|₅|₆|₇|₈|₉|₀|₊|₋|₌|₍|₎)+)/ie" => "unicodeSubScriptToRefbase('\\1')", // function 'unicodeSubScriptToRefbase()' will convert Unicode subscript entities to appropriate refbase subscript markup
// Note that, when matching superscript or subscript Unicode characters, we cannot use the double-byte characters within character classes
// (like [¹²³⁴⁵⁶⁷⁸⁹⁰⁺⁻⁼⁽⁾ⁿ] or ([₁₂₃₄₅₆₇₈₉₀₊₋₌₍₎]) since this may cause the single-byte parts of these characters to be matched and replaced as well!
);
$unicodeSuperScriptSearchReplaceActionsArray = array(
'/¹/' => "1", // <U00B9> (superscript one)
'/²/' => "2", // <U00B2> (superscript two)
'/³/' => "3", // <U00B3> (superscript three)
'/⁴/' => "4", // <U2074> (superscript four)
'/⁵/' => "5", // <U2075> (superscript five)
'/⁶/' => "6", // <U2076> (superscript six)
'/⁷/' => "7", // <U2077> (superscript seven)
'/⁸/' => "8", // <U2078> (superscript eight)
'/⁹/' => "9", // <U2079> (superscript nine)
'/⁰/' => "0", // <U2070> (superscript zero)
'/⁺/' => "+", // <U207A> (superscript plus sign)
'/⁻/' => "-", // <U207B> (superscript minus)
'/⁼/' => "=", // <U207C> (superscript equals sign)
'/⁽/' => "(", // <U207D> (superscript left parenthesis)
'/⁾/' => ")", // <U207E> (superscript right parenthesis)
'/ⁿ/' => "n", // <U207F> (superscript latin small letter n)
);
$unicodeSubScriptSearchReplaceActionsArray = array(
'/₁/' => "1", // <U2081> (subscript one)
'/₂/' => "2", // <U2082> (subscript two)
'/₃/' => "3", // <U2083> (subscript three)
'/₄/' => "4", // <U2084> (subscript four)
'/₅/' => "5", // <U2085> (subscript five)
'/₆/' => "6", // <U2086> (subscript six)
'/₇/' => "7", // <U2087> (subscript seven)
'/₈/' => "8", // <U2088> (subscript eight)
'/₉/' => "9", // <U2089> (subscript nine)
'/₀/' => "0", // <U2080> (subscript zero)
'/₊/' => "+", // <U208A> (subscript plus sign)
'/₋/' => "-", // <U208B> (subscript minus)
'/₌/' => "=", // <U208C> (subscript equals sign)
'/₍/' => "(", // <U208D> (subscript left parenthesis)
'/₎/' => ")", // <U208E> (subscript right parenthesis)
);
// --------------------------------------------------------------------
// Converts Unicode superscript entities to appropriate refbase superscript markup:
function unicodeSuperScriptToRefbase($sourceString)
{
global $unicodeSuperScriptSearchReplaceActionsArray;
$sourceString = searchReplaceText($unicodeSuperScriptSearchReplaceActionsArray, $sourceString, true); // function 'searchReplaceText()' is defined in 'include.inc.php'
return "[super:" . $sourceString . "]";
}
// --------------------------------------------------------------------
// Converts Unicode subscript entities to appropriate refbase subscript markup:
function unicodeSubScriptToRefbase($sourceString)
{
global $unicodeSubScriptSearchReplaceActionsArray;
$sourceString = searchReplaceText($unicodeSubScriptSearchReplaceActionsArray, $sourceString, true); // function 'searchReplaceText()' is defined in 'include.inc.php'
return "[sub:" . $sourceString . "]";
}
?>