// Copyright: Matthias Steffens and the file's // original author(s). // // This code is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY. Please see the GNU General Public // License for more details. // // File: ./includes/transtab_refbase_unicode.inc.php // Repository: $HeadURL: file:///svn/p/refbase/code/branches/bleeding-edge/includes/transtab_refbase_unicode.inc.php $ // Author(s): Matthias Steffens // // Created: 02-Jun-06, 01:41 // Modified: $Date: 2008-07-30 14:50:42 +0000 (Wed, 30 Jul 2008) $ // $Author: msteffens $ // $Revision: 1183 $ // Search & replace patterns and functions for conversion from refbase markup to Unicode entities. // Search & replace patterns must be specified as perl-style regular expression and search patterns must include the leading & trailing slashes. global $patternModifiers; // defined in 'transtab_unicode_charset.inc.php' and 'transtab_latin1_charset.inc.php' $transtab_refbase_unicode = array( // "/__(?!_)(.+?)__/" => '\\1', // the pattern for underline (__...__) must come before the one for italic (_..._) // "/_(.+?)_/" => '\\1', // fontshape markup is currently NOT converted (uncomment to strip fontshape markup from exported text) // "/\\*\\*(.+?)\\*\\*/" => '\\1', "/\\[super:(.+?)\\]/ie" => "superScriptToUnicode('\\1')", // function 'superScriptToUnicode()' will convert superscript text to appropriate Unicode entities "/\\[sub:(.+?)\\]/ie" => "subScriptToUnicode('\\1')", // function 'subScriptToUnicode()' will convert subscript text to appropriate Unicode entities "/\\[permil\\]/" => '‰', // (per mille sign) "/\\[infinity\\]/" => '∞', // (infinity) "/\\[alpha\\]/" => 'α', "/\\[beta\\]/" => 'β', "/\\[gamma\\]/" => 'γ', "/\\[delta\\]/" => 'δ', "/\\[epsilon\\]/" => 'ε', "/\\[zeta\\]/" => 'ζ', "/\\[eta\\]/" => 'η', "/\\[theta\\]/" => 'θ', "/\\[iota\\]/" => 'ι', "/\\[kappa\\]/" => 'κ', "/\\[lambda\\]/" => 'λ', "/\\[mu\\]/" => 'μ', "/\\[nu\\]/" => 'ν', "/\\[xi\\]/" => 'ξ', "/\\[omicron\\]/" => 'ο', "/\\[pi\\]/" => 'π', "/\\[rho\\]/" => 'ρ', "/\\[sigmaf\\]/" => 'ς', "/\\[sigma\\]/" => 'σ', "/\\[tau\\]/" => 'τ', "/\\[upsilon\\]/" => 'υ', "/\\[phi\\]/" => 'φ', "/\\[chi\\]/" => 'χ', "/\\[psi\\]/" => 'ψ', "/\\[omega\\]/" => 'ω', "/\\[Alpha\\]/" => 'Α', "/\\[Beta\\]/" => 'Β', "/\\[Gamma\\]/" => 'Γ', "/\\[Delta\\]/" => 'Δ', "/\\[Epsilon\\]/" => 'Ε', "/\\[Zeta\\]/" => 'Ζ', "/\\[Eta\\]/" => 'Η', "/\\[Theta\\]/" => 'Θ', "/\\[Iota\\]/" => 'Ι', "/\\[Kappa\\]/" => 'Κ', "/\\[Lambda\\]/" => 'Λ', "/\\[Mu\\]/" => 'Μ', "/\\[Nu\\]/" => 'Ν', "/\\[Xi\\]/" => 'Ξ', "/\\[Omicron\\]/" => 'Ο', "/\\[Pi\\]/" => 'Π', "/\\[Rho\\]/" => 'Ρ', "/\\[Sigma\\]/" => 'Σ', "/\\[Tau\\]/" => 'Τ', "/\\[Upsilon\\]/" => 'Υ', "/\\[Phi\\]/" => 'Φ', "/\\[Chi\\]/" => 'Χ', "/\\[Psi\\]/" => 'Ψ', "/\\[Omega\\]/" => 'Ω', "/\"(.+?)\"/" => '“\\1”', // ... (left and right double quotation marks) "/ +- +/" => ' – ', // (endash) "/–/$patternModifiers" => '–' // (endash) // Note that for UTF-8 based systems, '$patternModifiers' contains the "u" (PCRE_UTF8) pattern modifier which causes PHP/PCRE // to treat pattern strings as UTF-8 (otherwise this conversion pattern would garble UTF-8 characters such as "Ö") ); $unicodeSuperScriptSearchReplaceActionsArray = array( "/1/" => '¹', // (superscript one) "/2/" => '²', // (superscript two) "/3/" => '³', // (superscript three) "/4/" => '⁴', // (superscript four) "/5/" => '⁵', // (superscript five) "/6/" => '⁶', // (superscript six) "/7/" => '⁷', // (superscript seven) "/8/" => '⁸', // (superscript eight) "/9/" => '⁹', // (superscript nine) "/0/" => '⁰', // (superscript zero) "/\\+/" => '⁺', // (superscript plus sign) "/-/" => '⁻', // (superscript minus) "/=/" => '⁼', // (superscript equals sign) "/\\(/" => '⁽', // (superscript left parenthesis) "/\\)/" => '⁾', // (superscript right parenthesis) "/n/" => 'ⁿ', // (superscript latin small letter n) "/([^¹²³⁴⁵⁶⁷⁸⁹⁰⁺⁻⁼⁽⁾ⁿ]+)/" => '[super:\\1]' // keep superscript markup in place for any text that has no matching superscript entity in Unicode ); $unicodeSubScriptSearchReplaceActionsArray = array( "/1/" => '₁', // (subscript one) "/2/" => '₂', // (subscript two) "/3/" => '₃', // (subscript three) "/4/" => '₄', // (subscript four) "/5/" => '₅', // (subscript five) "/6/" => '₆', // (subscript six) "/7/" => '₇', // (subscript seven) "/8/" => '₈', // (subscript eight) "/9/" => '₉', // (subscript nine) "/0/" => '₀', // (subscript zero) "/\\+/" => '₊', // (subscript plus sign) "/-/" => '₋', // (subscript minus) "/=/" => '₌', // (subscript equals sign) "/\\(/" => '₍', // (subscript left parenthesis) "/\\)/" => '₎', // (subscript right parenthesis) "/([^₁₂₃₄₅₆₇₈₉₀₊₋₌₍₎]+)/" => '[sub:\\1]' // keep subscript markup in place for any text that has no matching subscript entity in Unicode ); // -------------------------------------------------------------------- // Converts superscript text to appropriate Unicode entities: function superScriptToUnicode($sourceString) { global $unicodeSuperScriptSearchReplaceActionsArray; $sourceString = searchReplaceText($unicodeSuperScriptSearchReplaceActionsArray, $sourceString, true); // function 'searchReplaceText()' is defined in 'include.inc.php' return $sourceString; } // -------------------------------------------------------------------- // Converts subscript text to appropriate Unicode entities: function subScriptToUnicode($sourceString) { global $unicodeSubScriptSearchReplaceActionsArray; $sourceString = searchReplaceText($unicodeSubScriptSearchReplaceActionsArray, $sourceString, true); // function 'searchReplaceText()' is defined in 'include.inc.php' return $sourceString; } ?>