|
|
- <?php
- // Project: Web Reference Database (refbase) <http://www.refbase.net>
- // Copyright: Matthias Steffens <mailto:refbase@extracts.de> and the file's
- // original author(s).
- //
- // This code is distributed in the hope that it will be useful,
- // but WITHOUT ANY WARRANTY. Please see the GNU General Public
- // License for more details.
- //
- // File: ./includes/transtab_refbase_unicode.inc.php
- // Repository: $HeadURL: file:///svn/p/refbase/code/branches/bleeding-edge/includes/transtab_refbase_unicode.inc.php $
- // Author(s): Matthias Steffens <mailto:refbase@extracts.de>
- //
- // Created: 02-Jun-06, 01:41
- // Modified: $Date: 2008-07-30 14:50:42 +0000 (Wed, 30 Jul 2008) $
- // $Author: msteffens $
- // $Revision: 1183 $
-
- // Search & replace patterns and functions for conversion from refbase markup to Unicode entities.
- // Search & replace patterns must be specified as perl-style regular expression and search patterns must include the leading & trailing slashes.
-
- global $patternModifiers; // defined in 'transtab_unicode_charset.inc.php' and 'transtab_latin1_charset.inc.php'
-
- $transtab_refbase_unicode = array(
-
- // "/__(?!_)(.+?)__/" => '\\1', // the pattern for underline (__...__) must come before the one for italic (_..._)
- // "/_(.+?)_/" => '\\1', // fontshape markup is currently NOT converted (uncomment to strip fontshape markup from exported text)
- // "/\\*\\*(.+?)\\*\\*/" => '\\1',
- "/\\[super:(.+?)\\]/ie" => "superScriptToUnicode('\\1')", // function 'superScriptToUnicode()' will convert superscript text to appropriate Unicode entities
- "/\\[sub:(.+?)\\]/ie" => "subScriptToUnicode('\\1')", // function 'subScriptToUnicode()' will convert subscript text to appropriate Unicode entities
- "/\\[permil\\]/" => '‰', // <U2030> (per mille sign)
- "/\\[infinity\\]/" => '∞', // <U221E> (infinity)
- "/\\[alpha\\]/" => 'α',
- "/\\[beta\\]/" => 'β',
- "/\\[gamma\\]/" => 'γ',
- "/\\[delta\\]/" => 'δ',
- "/\\[epsilon\\]/" => 'ε',
- "/\\[zeta\\]/" => 'ζ',
- "/\\[eta\\]/" => 'η',
- "/\\[theta\\]/" => 'θ',
- "/\\[iota\\]/" => 'ι',
- "/\\[kappa\\]/" => 'κ',
- "/\\[lambda\\]/" => 'λ',
- "/\\[mu\\]/" => 'μ',
- "/\\[nu\\]/" => 'ν',
- "/\\[xi\\]/" => 'ξ',
- "/\\[omicron\\]/" => 'ο',
- "/\\[pi\\]/" => 'π',
- "/\\[rho\\]/" => 'ρ',
- "/\\[sigmaf\\]/" => 'ς',
- "/\\[sigma\\]/" => 'σ',
- "/\\[tau\\]/" => 'τ',
- "/\\[upsilon\\]/" => 'υ',
- "/\\[phi\\]/" => 'φ',
- "/\\[chi\\]/" => 'χ',
- "/\\[psi\\]/" => 'ψ',
- "/\\[omega\\]/" => 'ω',
- "/\\[Alpha\\]/" => 'Α',
- "/\\[Beta\\]/" => 'Β',
- "/\\[Gamma\\]/" => 'Γ',
- "/\\[Delta\\]/" => 'Δ',
- "/\\[Epsilon\\]/" => 'Ε',
- "/\\[Zeta\\]/" => 'Ζ',
- "/\\[Eta\\]/" => 'Η',
- "/\\[Theta\\]/" => 'Θ',
- "/\\[Iota\\]/" => 'Ι',
- "/\\[Kappa\\]/" => 'Κ',
- "/\\[Lambda\\]/" => 'Λ',
- "/\\[Mu\\]/" => 'Μ',
- "/\\[Nu\\]/" => 'Ν',
- "/\\[Xi\\]/" => 'Ξ',
- "/\\[Omicron\\]/" => 'Ο',
- "/\\[Pi\\]/" => 'Π',
- "/\\[Rho\\]/" => 'Ρ',
- "/\\[Sigma\\]/" => 'Σ',
- "/\\[Tau\\]/" => 'Τ',
- "/\\[Upsilon\\]/" => 'Υ',
- "/\\[Phi\\]/" => 'Φ',
- "/\\[Chi\\]/" => 'Χ',
- "/\\[Psi\\]/" => 'Ψ',
- "/\\[Omega\\]/" => 'Ω',
- "/\"(.+?)\"/" => '“\\1”', // <U201C>...<U201D> (left and right double quotation marks)
- "/ +- +/" => ' – ', // <U2013> (endash)
- "//$patternModifiers" => '–' // <U2013> (endash)
- // Note that for UTF-8 based systems, '$patternModifiers' contains the "u" (PCRE_UTF8) pattern modifier which causes PHP/PCRE
- // to treat pattern strings as UTF-8 (otherwise this conversion pattern would garble UTF-8 characters such as "Ö")
-
- );
-
-
- $unicodeSuperScriptSearchReplaceActionsArray = array(
-
- "/1/" => '¹', // <U00B9> (superscript one)
- "/2/" => '²', // <U00B2> (superscript two)
- "/3/" => '³', // <U00B3> (superscript three)
- "/4/" => '⁴', // <U2074> (superscript four)
- "/5/" => '⁵', // <U2075> (superscript five)
- "/6/" => '⁶', // <U2076> (superscript six)
- "/7/" => '⁷', // <U2077> (superscript seven)
- "/8/" => '⁸', // <U2078> (superscript eight)
- "/9/" => '⁹', // <U2079> (superscript nine)
- "/0/" => '⁰', // <U2070> (superscript zero)
- "/\\+/" => '⁺', // <U207A> (superscript plus sign)
- "/-/" => '⁻', // <U207B> (superscript minus)
- "/=/" => '⁼', // <U207C> (superscript equals sign)
- "/\\(/" => '⁽', // <U207D> (superscript left parenthesis)
- "/\\)/" => '⁾', // <U207E> (superscript right parenthesis)
- "/n/" => 'ⁿ', // <U207F> (superscript latin small letter n)
- "/([^¹²³⁴⁵⁶⁷⁸⁹⁰⁺⁻⁼⁽⁾ⁿ]+)/" => '[super:\\1]' // keep superscript markup in place for any text that has no matching superscript entity in Unicode
-
- );
-
-
- $unicodeSubScriptSearchReplaceActionsArray = array(
-
- "/1/" => '₁', // <U2081> (subscript one)
- "/2/" => '₂', // <U2082> (subscript two)
- "/3/" => '₃', // <U2083> (subscript three)
- "/4/" => '₄', // <U2084> (subscript four)
- "/5/" => '₅', // <U2085> (subscript five)
- "/6/" => '₆', // <U2086> (subscript six)
- "/7/" => '₇', // <U2087> (subscript seven)
- "/8/" => '₈', // <U2088> (subscript eight)
- "/9/" => '₉', // <U2089> (subscript nine)
- "/0/" => '₀', // <U2080> (subscript zero)
- "/\\+/" => '₊', // <U208A> (subscript plus sign)
- "/-/" => '₋', // <U208B> (subscript minus)
- "/=/" => '₌', // <U208C> (subscript equals sign)
- "/\\(/" => '₍', // <U208D> (subscript left parenthesis)
- "/\\)/" => '₎', // <U208E> (subscript right parenthesis)
- "/([^₁₂₃₄₅₆₇₈₉₀₊₋₌₍₎]+)/" => '[sub:\\1]' // keep subscript markup in place for any text that has no matching subscript entity in Unicode
-
- );
-
- // --------------------------------------------------------------------
-
- // Converts superscript text to appropriate Unicode entities:
- function superScriptToUnicode($sourceString)
- {
- global $unicodeSuperScriptSearchReplaceActionsArray;
-
- $sourceString = searchReplaceText($unicodeSuperScriptSearchReplaceActionsArray, $sourceString, true); // function 'searchReplaceText()' is defined in 'include.inc.php'
-
- return $sourceString;
- }
-
- // --------------------------------------------------------------------
-
- // Converts subscript text to appropriate Unicode entities:
- function subScriptToUnicode($sourceString)
- {
- global $unicodeSubScriptSearchReplaceActionsArray;
-
- $sourceString = searchReplaceText($unicodeSubScriptSearchReplaceActionsArray, $sourceString, true); // function 'searchReplaceText()' is defined in 'include.inc.php'
-
- return $sourceString;
- }
- ?>
|