159 lines
7.5 KiB
PHP
159 lines
7.5 KiB
PHP
<?php
|
||
// Project: Web Reference Database (refbase) <http://www.refbase.net>
|
||
// Copyright: Matthias Steffens <mailto:refbase@extracts.de> and the file's
|
||
// original author(s).
|
||
//
|
||
// This code is distributed in the hope that it will be useful,
|
||
// but WITHOUT ANY WARRANTY. Please see the GNU General Public
|
||
// License for more details.
|
||
//
|
||
// File: ./includes/transtab_refbase_unicode.inc.php
|
||
// Repository: $HeadURL: file:///svn/p/refbase/code/branches/bleeding-edge/includes/transtab_refbase_unicode.inc.php $
|
||
// Author(s): Matthias Steffens <mailto:refbase@extracts.de>
|
||
//
|
||
// Created: 02-Jun-06, 01:41
|
||
// Modified: $Date: 2008-07-30 14:50:42 +0000 (Wed, 30 Jul 2008) $
|
||
// $Author: msteffens $
|
||
// $Revision: 1183 $
|
||
|
||
// Search & replace patterns and functions for conversion from refbase markup to Unicode entities.
|
||
// Search & replace patterns must be specified as perl-style regular expression and search patterns must include the leading & trailing slashes.
|
||
|
||
global $patternModifiers; // defined in 'transtab_unicode_charset.inc.php' and 'transtab_latin1_charset.inc.php'
|
||
|
||
$transtab_refbase_unicode = array(
|
||
|
||
// "/__(?!_)(.+?)__/" => '\\1', // the pattern for underline (__...__) must come before the one for italic (_..._)
|
||
// "/_(.+?)_/" => '\\1', // fontshape markup is currently NOT converted (uncomment to strip fontshape markup from exported text)
|
||
// "/\\*\\*(.+?)\\*\\*/" => '\\1',
|
||
"/\\[super:(.+?)\\]/ie" => "superScriptToUnicode('\\1')", // function 'superScriptToUnicode()' will convert superscript text to appropriate Unicode entities
|
||
"/\\[sub:(.+?)\\]/ie" => "subScriptToUnicode('\\1')", // function 'subScriptToUnicode()' will convert subscript text to appropriate Unicode entities
|
||
"/\\[permil\\]/" => '‰', // <U2030> (per mille sign)
|
||
"/\\[infinity\\]/" => '∞', // <U221E> (infinity)
|
||
"/\\[alpha\\]/" => 'α',
|
||
"/\\[beta\\]/" => 'β',
|
||
"/\\[gamma\\]/" => 'γ',
|
||
"/\\[delta\\]/" => 'δ',
|
||
"/\\[epsilon\\]/" => 'ε',
|
||
"/\\[zeta\\]/" => 'ζ',
|
||
"/\\[eta\\]/" => 'η',
|
||
"/\\[theta\\]/" => 'θ',
|
||
"/\\[iota\\]/" => 'ι',
|
||
"/\\[kappa\\]/" => 'κ',
|
||
"/\\[lambda\\]/" => 'λ',
|
||
"/\\[mu\\]/" => 'μ',
|
||
"/\\[nu\\]/" => 'ν',
|
||
"/\\[xi\\]/" => 'ξ',
|
||
"/\\[omicron\\]/" => 'ο',
|
||
"/\\[pi\\]/" => 'π',
|
||
"/\\[rho\\]/" => 'ρ',
|
||
"/\\[sigmaf\\]/" => 'ς',
|
||
"/\\[sigma\\]/" => 'σ',
|
||
"/\\[tau\\]/" => 'τ',
|
||
"/\\[upsilon\\]/" => 'υ',
|
||
"/\\[phi\\]/" => 'φ',
|
||
"/\\[chi\\]/" => 'χ',
|
||
"/\\[psi\\]/" => 'ψ',
|
||
"/\\[omega\\]/" => 'ω',
|
||
"/\\[Alpha\\]/" => 'Α',
|
||
"/\\[Beta\\]/" => 'Β',
|
||
"/\\[Gamma\\]/" => 'Γ',
|
||
"/\\[Delta\\]/" => 'Δ',
|
||
"/\\[Epsilon\\]/" => 'Ε',
|
||
"/\\[Zeta\\]/" => 'Ζ',
|
||
"/\\[Eta\\]/" => 'Η',
|
||
"/\\[Theta\\]/" => 'Θ',
|
||
"/\\[Iota\\]/" => 'Ι',
|
||
"/\\[Kappa\\]/" => 'Κ',
|
||
"/\\[Lambda\\]/" => 'Λ',
|
||
"/\\[Mu\\]/" => 'Μ',
|
||
"/\\[Nu\\]/" => 'Ν',
|
||
"/\\[Xi\\]/" => 'Ξ',
|
||
"/\\[Omicron\\]/" => 'Ο',
|
||
"/\\[Pi\\]/" => 'Π',
|
||
"/\\[Rho\\]/" => 'Ρ',
|
||
"/\\[Sigma\\]/" => 'Σ',
|
||
"/\\[Tau\\]/" => 'Τ',
|
||
"/\\[Upsilon\\]/" => 'Υ',
|
||
"/\\[Phi\\]/" => 'Φ',
|
||
"/\\[Chi\\]/" => 'Χ',
|
||
"/\\[Psi\\]/" => 'Ψ',
|
||
"/\\[Omega\\]/" => 'Ω',
|
||
"/\"(.+?)\"/" => '“\\1”', // <U201C>...<U201D> (left and right double quotation marks)
|
||
"/ +- +/" => ' – ', // <U2013> (endash)
|
||
"//$patternModifiers" => '–' // <U2013> (endash)
|
||
// Note that for UTF-8 based systems, '$patternModifiers' contains the "u" (PCRE_UTF8) pattern modifier which causes PHP/PCRE
|
||
// to treat pattern strings as UTF-8 (otherwise this conversion pattern would garble UTF-8 characters such as "Ö")
|
||
|
||
);
|
||
|
||
|
||
$unicodeSuperScriptSearchReplaceActionsArray = array(
|
||
|
||
"/1/" => '¹', // <U00B9> (superscript one)
|
||
"/2/" => '²', // <U00B2> (superscript two)
|
||
"/3/" => '³', // <U00B3> (superscript three)
|
||
"/4/" => '⁴', // <U2074> (superscript four)
|
||
"/5/" => '⁵', // <U2075> (superscript five)
|
||
"/6/" => '⁶', // <U2076> (superscript six)
|
||
"/7/" => '⁷', // <U2077> (superscript seven)
|
||
"/8/" => '⁸', // <U2078> (superscript eight)
|
||
"/9/" => '⁹', // <U2079> (superscript nine)
|
||
"/0/" => '⁰', // <U2070> (superscript zero)
|
||
"/\\+/" => '⁺', // <U207A> (superscript plus sign)
|
||
"/-/" => '⁻', // <U207B> (superscript minus)
|
||
"/=/" => '⁼', // <U207C> (superscript equals sign)
|
||
"/\\(/" => '⁽', // <U207D> (superscript left parenthesis)
|
||
"/\\)/" => '⁾', // <U207E> (superscript right parenthesis)
|
||
"/n/" => 'ⁿ', // <U207F> (superscript latin small letter n)
|
||
"/([^¹²³⁴⁵⁶⁷⁸⁹⁰⁺⁻⁼⁽⁾ⁿ]+)/" => '[super:\\1]' // keep superscript markup in place for any text that has no matching superscript entity in Unicode
|
||
|
||
);
|
||
|
||
|
||
$unicodeSubScriptSearchReplaceActionsArray = array(
|
||
|
||
"/1/" => '₁', // <U2081> (subscript one)
|
||
"/2/" => '₂', // <U2082> (subscript two)
|
||
"/3/" => '₃', // <U2083> (subscript three)
|
||
"/4/" => '₄', // <U2084> (subscript four)
|
||
"/5/" => '₅', // <U2085> (subscript five)
|
||
"/6/" => '₆', // <U2086> (subscript six)
|
||
"/7/" => '₇', // <U2087> (subscript seven)
|
||
"/8/" => '₈', // <U2088> (subscript eight)
|
||
"/9/" => '₉', // <U2089> (subscript nine)
|
||
"/0/" => '₀', // <U2080> (subscript zero)
|
||
"/\\+/" => '₊', // <U208A> (subscript plus sign)
|
||
"/-/" => '₋', // <U208B> (subscript minus)
|
||
"/=/" => '₌', // <U208C> (subscript equals sign)
|
||
"/\\(/" => '₍', // <U208D> (subscript left parenthesis)
|
||
"/\\)/" => '₎', // <U208E> (subscript right parenthesis)
|
||
"/([^₁₂₃₄₅₆₇₈₉₀₊₋₌₍₎]+)/" => '[sub:\\1]' // keep subscript markup in place for any text that has no matching subscript entity in Unicode
|
||
|
||
);
|
||
|
||
// --------------------------------------------------------------------
|
||
|
||
// Converts superscript text to appropriate Unicode entities:
|
||
function superScriptToUnicode($sourceString)
|
||
{
|
||
global $unicodeSuperScriptSearchReplaceActionsArray;
|
||
|
||
$sourceString = searchReplaceText($unicodeSuperScriptSearchReplaceActionsArray, $sourceString, true); // function 'searchReplaceText()' is defined in 'include.inc.php'
|
||
|
||
return $sourceString;
|
||
}
|
||
|
||
// --------------------------------------------------------------------
|
||
|
||
// Converts subscript text to appropriate Unicode entities:
|
||
function subScriptToUnicode($sourceString)
|
||
{
|
||
global $unicodeSubScriptSearchReplaceActionsArray;
|
||
|
||
$sourceString = searchReplaceText($unicodeSubScriptSearchReplaceActionsArray, $sourceString, true); // function 'searchReplaceText()' is defined in 'include.inc.php'
|
||
|
||
return $sourceString;
|
||
}
|
||
?>
|