Files
Refbase2X/includes/transtab_refbase_unicode.inc.php
2021-01-28 15:58:21 +01:00

159 lines
7.5 KiB
PHP
Raw Blame History

This file contains invisible Unicode characters

This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

<?php
// Project: Web Reference Database (refbase) <http://www.refbase.net>
// Copyright: Matthias Steffens <mailto:refbase@extracts.de> and the file's
// original author(s).
//
// This code is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY. Please see the GNU General Public
// License for more details.
//
// File: ./includes/transtab_refbase_unicode.inc.php
// Repository: $HeadURL: file:///svn/p/refbase/code/branches/bleeding-edge/includes/transtab_refbase_unicode.inc.php $
// Author(s): Matthias Steffens <mailto:refbase@extracts.de>
//
// Created: 02-Jun-06, 01:41
// Modified: $Date: 2008-07-30 14:50:42 +0000 (Wed, 30 Jul 2008) $
// $Author: msteffens $
// $Revision: 1183 $
// Search & replace patterns and functions for conversion from refbase markup to Unicode entities.
// Search & replace patterns must be specified as perl-style regular expression and search patterns must include the leading & trailing slashes.
global $patternModifiers; // defined in 'transtab_unicode_charset.inc.php' and 'transtab_latin1_charset.inc.php'
$transtab_refbase_unicode = array(
// "/__(?!_)(.+?)__/" => '\\1', // the pattern for underline (__...__) must come before the one for italic (_..._)
// "/_(.+?)_/" => '\\1', // fontshape markup is currently NOT converted (uncomment to strip fontshape markup from exported text)
// "/\\*\\*(.+?)\\*\\*/" => '\\1',
"/\\[super:(.+?)\\]/ie" => "superScriptToUnicode('\\1')", // function 'superScriptToUnicode()' will convert superscript text to appropriate Unicode entities
"/\\[sub:(.+?)\\]/ie" => "subScriptToUnicode('\\1')", // function 'subScriptToUnicode()' will convert subscript text to appropriate Unicode entities
"/\\[permil\\]/" => '‰', // <U2030> (per mille sign)
"/\\[infinity\\]/" => '∞', // <U221E> (infinity)
"/\\[alpha\\]/" => 'α',
"/\\[beta\\]/" => 'β',
"/\\[gamma\\]/" => 'γ',
"/\\[delta\\]/" => 'δ',
"/\\[epsilon\\]/" => 'ε',
"/\\[zeta\\]/" => 'ζ',
"/\\[eta\\]/" => 'η',
"/\\[theta\\]/" => 'θ',
"/\\[iota\\]/" => 'ι',
"/\\[kappa\\]/" => 'κ',
"/\\[lambda\\]/" => 'λ',
"/\\[mu\\]/" => 'μ',
"/\\[nu\\]/" => 'ν',
"/\\[xi\\]/" => 'ξ',
"/\\[omicron\\]/" => 'ο',
"/\\[pi\\]/" => 'π',
"/\\[rho\\]/" => 'ρ',
"/\\[sigmaf\\]/" => 'ς',
"/\\[sigma\\]/" => 'σ',
"/\\[tau\\]/" => 'τ',
"/\\[upsilon\\]/" => 'υ',
"/\\[phi\\]/" => 'φ',
"/\\[chi\\]/" => 'χ',
"/\\[psi\\]/" => 'ψ',
"/\\[omega\\]/" => 'ω',
"/\\[Alpha\\]/" => 'Α',
"/\\[Beta\\]/" => 'Β',
"/\\[Gamma\\]/" => 'Γ',
"/\\[Delta\\]/" => 'Δ',
"/\\[Epsilon\\]/" => 'Ε',
"/\\[Zeta\\]/" => 'Ζ',
"/\\[Eta\\]/" => 'Η',
"/\\[Theta\\]/" => 'Θ',
"/\\[Iota\\]/" => 'Ι',
"/\\[Kappa\\]/" => 'Κ',
"/\\[Lambda\\]/" => 'Λ',
"/\\[Mu\\]/" => 'Μ',
"/\\[Nu\\]/" => 'Ν',
"/\\[Xi\\]/" => 'Ξ',
"/\\[Omicron\\]/" => 'Ο',
"/\\[Pi\\]/" => 'Π',
"/\\[Rho\\]/" => 'Ρ',
"/\\[Sigma\\]/" => 'Σ',
"/\\[Tau\\]/" => 'Τ',
"/\\[Upsilon\\]/" => 'Υ',
"/\\[Phi\\]/" => 'Φ',
"/\\[Chi\\]/" => 'Χ',
"/\\[Psi\\]/" => 'Ψ',
"/\\[Omega\\]/" => 'Ω',
"/\"(.+?)\"/" => '“\\1”', // <U201C>...<U201D> (left and right double quotation marks)
"/ +- +/" => ' ', // <U2013> (endash)
"/–/$patternModifiers" => '' // <U2013> (endash)
// Note that for UTF-8 based systems, '$patternModifiers' contains the "u" (PCRE_UTF8) pattern modifier which causes PHP/PCRE
// to treat pattern strings as UTF-8 (otherwise this conversion pattern would garble UTF-8 characters such as "Ö")
);
$unicodeSuperScriptSearchReplaceActionsArray = array(
"/1/" => '¹', // <U00B9> (superscript one)
"/2/" => '²', // <U00B2> (superscript two)
"/3/" => '³', // <U00B3> (superscript three)
"/4/" => '⁴', // <U2074> (superscript four)
"/5/" => '⁵', // <U2075> (superscript five)
"/6/" => '⁶', // <U2076> (superscript six)
"/7/" => '⁷', // <U2077> (superscript seven)
"/8/" => '⁸', // <U2078> (superscript eight)
"/9/" => '⁹', // <U2079> (superscript nine)
"/0/" => '⁰', // <U2070> (superscript zero)
"/\\+/" => '⁺', // <U207A> (superscript plus sign)
"/-/" => '⁻', // <U207B> (superscript minus)
"/=/" => '⁼', // <U207C> (superscript equals sign)
"/\\(/" => '⁽', // <U207D> (superscript left parenthesis)
"/\\)/" => '⁾', // <U207E> (superscript right parenthesis)
"/n/" => 'ⁿ', // <U207F> (superscript latin small letter n)
"/([^¹²³⁴⁵⁶⁷⁸⁹⁰⁺⁻⁼⁽⁾ⁿ]+)/" => '[super:\\1]' // keep superscript markup in place for any text that has no matching superscript entity in Unicode
);
$unicodeSubScriptSearchReplaceActionsArray = array(
"/1/" => '₁', // <U2081> (subscript one)
"/2/" => '₂', // <U2082> (subscript two)
"/3/" => '₃', // <U2083> (subscript three)
"/4/" => '₄', // <U2084> (subscript four)
"/5/" => '₅', // <U2085> (subscript five)
"/6/" => '₆', // <U2086> (subscript six)
"/7/" => '₇', // <U2087> (subscript seven)
"/8/" => '₈', // <U2088> (subscript eight)
"/9/" => '₉', // <U2089> (subscript nine)
"/0/" => '₀', // <U2080> (subscript zero)
"/\\+/" => '₊', // <U208A> (subscript plus sign)
"/-/" => '₋', // <U208B> (subscript minus)
"/=/" => '₌', // <U208C> (subscript equals sign)
"/\\(/" => '₍', // <U208D> (subscript left parenthesis)
"/\\)/" => '₎', // <U208E> (subscript right parenthesis)
"/([^₁₂₃₄₅₆₇₈₉₀₊₋₌₍₎]+)/" => '[sub:\\1]' // keep subscript markup in place for any text that has no matching subscript entity in Unicode
);
// --------------------------------------------------------------------
// Converts superscript text to appropriate Unicode entities:
function superScriptToUnicode($sourceString)
{
global $unicodeSuperScriptSearchReplaceActionsArray;
$sourceString = searchReplaceText($unicodeSuperScriptSearchReplaceActionsArray, $sourceString, true); // function 'searchReplaceText()' is defined in 'include.inc.php'
return $sourceString;
}
// --------------------------------------------------------------------
// Converts subscript text to appropriate Unicode entities:
function subScriptToUnicode($sourceString)
{
global $unicodeSubScriptSearchReplaceActionsArray;
$sourceString = searchReplaceText($unicodeSubScriptSearchReplaceActionsArray, $sourceString, true); // function 'searchReplaceText()' is defined in 'include.inc.php'
return $sourceString;
}
?>