You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

510 lines
24 KiB

  1. <?php
  2. // Project: Web Reference Database (refbase) <http://www.refbase.net>
  3. // Copyright: Matthias Steffens <mailto:refbase@extracts.de> and the file's
  4. // original author(s).
  5. //
  6. // This code is distributed in the hope that it will be useful,
  7. // but WITHOUT ANY WARRANTY. Please see the GNU General Public
  8. // License for more details.
  9. //
  10. // File: ./includes/oaidcxml.inc.php
  11. // Repository: $HeadURL$
  12. // Author(s): Matthias Steffens <mailto:refbase@extracts.de>
  13. //
  14. // Created: 05-Mar-08, 21:52
  15. // Modified: $Date: 2017-04-13 02:00:18 +0000 (Thu, 13 Apr 2017) $
  16. // $Author$
  17. // $Revision: 1416 $
  18. // This include file contains functions that'll export records to OAI_DC XML.
  19. // Requires ActiveLink PHP XML Package, which is available under the GPL from:
  20. // <http://www.active-link.com/software/>
  21. // TODO: I18n
  22. // Incorporate some include files:
  23. include_once 'includes/webservice.inc.php'; // include functions that are commonly used with the refbase webservices
  24. // Import the ActiveLink Packages
  25. require_once("classes/include.php");
  26. import("org.active-link.xml.XML");
  27. import("org.active-link.xml.XMLDocument");
  28. // --------------------------------------------------------------------
  29. // Return records as OAI_DC (i.e. simple/unqualified Dublin Core) XML as required
  30. // by the Open Archives Initiative Protocol for Metadata Harvesting (OAI-PMH):
  31. //
  32. // Spec: <http://www.openarchives.org/OAI/openarchivesprotocol.html>
  33. // Guides: <http://www.oaforum.org/tutorial/english/page5.htm>
  34. // <http://dublincore.org/documents/dc-xml-guidelines/>
  35. function oaidcCollection($result)
  36. {
  37. global $contentTypeCharset; // these variables are defined in 'ini.inc.php'
  38. global $convertExportDataToUTF8;
  39. global $citeKeysArray; // '$citeKeysArray' is made globally available from
  40. // within this function
  41. // Individual records are objects and collections of records are strings
  42. $oaidcCollectionDoc = new XMLDocument();
  43. if (($convertExportDataToUTF8 == "yes") AND ($contentTypeCharset != "UTF-8"))
  44. $oaidcCollectionDoc->setEncoding("UTF-8");
  45. else
  46. $oaidcCollectionDoc->setEncoding($contentTypeCharset);
  47. $oaidcCollection = new XML("dcCollection");
  48. $oaidcCollection->setTagAttribute("xmlns:oai_dc", "http://www.openarchives.org/OAI/2.0/oai_dc/");
  49. $oaidcCollection->setTagAttribute("xmlns:dc", "http://purl.org/dc/elements/1.1/");
  50. $oaidcCollection->setTagAttribute("xmlns:xsi", "http://www.w3.org/2001/XMLSchema-instance");
  51. $oaidcCollection->setTagAttribute("xsi:schemaLocation", "http://www.openarchives.org/OAI/2.0/oai_dc/ http://www.openarchives.org/OAI/2.0/oai_dc.xsd");
  52. // ----------------------------------------------------------
  53. // Add OAI_DC XML entries:
  54. $exportArray = array(); // array for individually exported records
  55. $citeKeysArray = array(); // array of cite keys (used to ensure uniqueness of cite keys among all exported records)
  56. // Generate the export for each record and push them onto an array:
  57. while ($row = @ mysqli_fetch_array($result))
  58. {
  59. // Export the current record as OAI_DC XML:
  60. $record = oaidcRecord($row, "oai_dc");
  61. if (!empty($record)) // unless the record buffer is empty...
  62. array_push($exportArray, $record); // ...add it to an array of exports
  63. }
  64. // for each of the OAI_DC XML entries in the result set...
  65. foreach ($exportArray as $oaidc)
  66. $oaidcCollection->addXMLasBranch($oaidc);
  67. $oaidcCollectionDoc->setXML($oaidcCollection);
  68. $oaidcCollectionString = $oaidcCollectionDoc->getXMLString();
  69. return $oaidcCollectionString;
  70. }
  71. // --------------------------------------------------------------------
  72. // Generate an OAI_DC (i.e. simple/unqualified Dublin Core) XML record:
  73. // (returns an XML object (oaidc) of a single record)
  74. //
  75. // TODO: - see inline comments labeled with "TODO"
  76. function oaidcRecord($row, $metadataPrefix = "oai_dc", $addNameSpaceInfo = true)
  77. {
  78. global $databaseBaseURL; // these variables are defined in 'ini.inc.php'
  79. global $contentTypeCharset;
  80. global $fileVisibility;
  81. global $fileVisibilityException;
  82. global $filesBaseURL;
  83. global $convertExportDataToUTF8;
  84. global $defaultCiteStyle;
  85. global $citeStyle;
  86. global $alnum, $alpha, $cntrl, $dash, $digit, $graph, $lower, $print, $punct, $space, $upper, $word, $patternModifiers; // defined in 'transtab_unicode_charset.inc.php' and 'transtab_latin1_charset.inc.php'
  87. // The array '$transtab_refbase_unicode' contains search & replace patterns for conversion from refbase markup to Unicode entities.
  88. global $transtab_refbase_unicode; // defined in 'transtab_refbase_unicode.inc.php'
  89. // The array '$transtab_refbase_ascii' contains search & replace patterns for conversion from refbase markup to plain text.
  90. global $transtab_refbase_ascii; // defined in 'transtab_refbase_ascii.inc.php'
  91. // Define inline text markup to generate a plain text citation string:
  92. // (to be included within a 'dcterms:bibliographicCitation' element)
  93. $markupPatternsArrayPlain = array("bold-prefix" => "", // NOTE: should we rather keep refbase font-shape markup (like _italic_ and **bold**) for plain text output?
  94. "bold-suffix" => "",
  95. "italic-prefix" => "",
  96. "italic-suffix" => "",
  97. "underline-prefix" => "",
  98. "underline-suffix" => "",
  99. "endash" => "-",
  100. "emdash" => "-",
  101. "ampersand" => "&",
  102. "double-quote" => '"',
  103. "double-quote-left" => '"',
  104. "double-quote-right" => '"',
  105. "single-quote" => "'",
  106. "single-quote-left" => "'",
  107. "single-quote-right" => "'",
  108. "less-than" => "<",
  109. "greater-than" => ">",
  110. "newline" => "\n"
  111. );
  112. // This is a stupid hack that maps the names of the '$row' array keys to those used
  113. // by the '$formVars' array (which is required by function 'generateCiteKey()')
  114. // (eventually, the '$formVars' array should use the MySQL field names as names for its array keys)
  115. $formVars = buildFormVarsArray($row); // function 'buildFormVarsArray()' is defined in 'include.inc.php'
  116. // Generate or extract the cite key for this record:
  117. // (to be included within a 'dc:identifier' element)
  118. $citeKey = generateCiteKey($formVars); // function 'generateCiteKey()' is defined in 'include.inc.php'
  119. // Generate OpenURL data:
  120. // (to be included within a 'dc:identifier' element)
  121. $openURL = openURL($row, "openurl:"); // function 'openURL()' is defined in 'openurl.inc.php'
  122. // Encode special chars and perform charset conversions:
  123. foreach ($row as $rowFieldName => $rowFieldValue)
  124. {
  125. // We only convert those special chars to entities which are supported by XML:
  126. // function 'encodeHTMLspecialchars()' is defined in 'include.inc.php'
  127. $row[$rowFieldName] = encodeHTMLspecialchars($row[$rowFieldName]);
  128. // Convert field data to UTF-8:
  129. // (if '$convertExportDataToUTF8' is set to "yes" in 'ini.inc.php' and character encoding is not UTF-8 already)
  130. // (Note that charset conversion can only be done *after* the cite key has been generated, otherwise cite key
  131. // generation will produce garbled text!)
  132. // function 'convertToCharacterEncoding()' is defined in 'include.inc.php'
  133. if (($convertExportDataToUTF8 == "yes") AND ($contentTypeCharset != "UTF-8"))
  134. $row[$rowFieldName] = convertToCharacterEncoding("UTF-8", "IGNORE", $row[$rowFieldName]);
  135. }
  136. // Defines field-specific search & replace 'actions' that will be applied to all those refbase fields that are listed in the corresponding 'fields' element:
  137. // (If you don't want to perform any search and replace actions, specify an empty array, like: '$fieldSpecificSearchReplaceActionsArray = array();'.
  138. // Note that the search patterns MUST include the leading & trailing slashes -- which is done to allow for mode modifiers such as 'imsxU'.)
  139. // "/Search Pattern/" => "Replace Pattern"
  140. $fieldSpecificSearchReplaceActionsArray = array();
  141. if ($convertExportDataToUTF8 == "yes")
  142. $fieldSpecificSearchReplaceActionsArray[] = array('fields' => array("title", "publication", "abbrev_journal", "address", "keywords", "abstract", "orig_title", "series_title", "abbrev_series_title", "notes"),
  143. 'actions' => $transtab_refbase_unicode
  144. );
  145. // Apply field-specific search & replace 'actions' to all fields that are listed in the 'fields' element of the arrays contained in '$fieldSpecificSearchReplaceActionsArray':
  146. foreach ($fieldSpecificSearchReplaceActionsArray as $fieldActionsArray)
  147. foreach ($row as $rowFieldName => $rowFieldValue)
  148. if (in_array($rowFieldName, $fieldActionsArray['fields']))
  149. $row[$rowFieldName] = searchReplaceText($fieldActionsArray['actions'], $rowFieldValue, true); // function 'searchReplaceText()' is defined in 'include.inc.php'
  150. // Fetch the name of the citation style file that's associated with the style given in '$citeStyle':
  151. $citeStyleFile = getStyleFile($citeStyle); // function 'getStyleFile()' is defined in 'include.inc.php'
  152. if (empty($citeStyleFile))
  153. {
  154. $citeStyle = $defaultCiteStyle; // if the given cite style could not be found, we'll use the default cite style which is defined by the '$defaultCiteStyle' variable in 'ini.inc.php'
  155. $citeStyleFile = getStyleFile($citeStyle);
  156. }
  157. // Include the found citation style file *once*:
  158. include_once "cite/" . $citeStyleFile;
  159. // Generate a proper citation for this record, ordering attributes according to the chosen output style & record type:
  160. // - Plain text version of citation string:
  161. $recordCitationPlain = citeRecord($row, $citeStyle, "", $markupPatternsArrayPlain, false); // function 'citeRecord()' is defined in the citation style file given in '$citeStyleFile' (which, in turn, must reside in the 'styles' directory of the refbase root directory)
  162. // Convert any refbase markup that remains in the citation string (such as _italic_ or **bold**) to plain text:
  163. $recordCitationPlain = searchReplaceText($transtab_refbase_ascii, $recordCitationPlain, true);
  164. // Convert any remaining refbase markup in the 'title', 'keywords' & 'abstract' fields to plain text:
  165. $row['title'] = searchReplaceText($transtab_refbase_ascii, $row['title'], true);
  166. $row['keywords'] = searchReplaceText($transtab_refbase_ascii, $row['keywords'], true);
  167. $row['abstract'] = searchReplaceText($transtab_refbase_ascii, $row['abstract'], true);
  168. // Strip any " (ed)" or " (eds)" suffix from author/editor string:
  169. if (preg_match("/ *\(eds?\)$/", $row['author']))
  170. $row['author'] = preg_replace("/[ \r\n]*\(eds?\)/i", "", $row['author']);
  171. if (preg_match("/ *\(eds?\)$/", $row['editor']))
  172. $row['editor'] = preg_replace("/[ \r\n]*\(eds?\)/i", "", $row['editor']);
  173. // Include a link to any corresponding file if one of the following conditions is met:
  174. // - the variable '$fileVisibility' (defined in 'ini.inc.php') is set to 'everyone'
  175. // - the variable '$fileVisibility' is set to 'login' AND the user is logged in
  176. // - the variable '$fileVisibility' is set to 'user-specific' AND the 'user_permissions' session variable contains 'allow_download'
  177. // - the array variable '$fileVisibilityException' (defined in 'ini.inc.php') contains a pattern (in array element 1) that matches the contents of the field given (in array element 0)
  178. //
  179. // TODO: - the URL-generating code should be made into a dedicated function (since it's shared with 'modsxml.inc.php' and 'atomxml.inc.php')
  180. $printURL = false;
  181. if ($fileVisibility == "everyone" OR ($fileVisibility == "login" AND isset($_SESSION['loginEmail'])) OR ($fileVisibility == "user-specific" AND (isset($_SESSION['user_permissions']) AND preg_match("/allow_download/", $_SESSION['user_permissions']))) OR (!empty($fileVisibilityException) AND preg_match($fileVisibilityException[1], $row[$fileVisibilityException[0]])))
  182. {
  183. if (!empty($row['file']))
  184. {
  185. if (preg_match('#^(https?|ftp|file)://#i', $row['file'])) // if the 'file' field contains a full URL (starting with "http://", "https://", "ftp://", or "file://")
  186. {
  187. $URLprefix = ""; // we don't alter the URL given in the 'file' field
  188. }
  189. else // if the 'file' field contains only a partial path (like 'polarbiol/10240001.pdf') or just a file name (like '10240001.pdf')
  190. {
  191. // use the base URL of the standard files directory as prefix:
  192. if (preg_match('#^/#', $filesBaseURL)) // absolute path -> file dir is located outside of refbase root dir
  193. $URLprefix = 'http://' . $_SERVER['HTTP_HOST'] . $filesBaseURL;
  194. else // relative path -> file dir is located within refbase root dir
  195. $URLprefix = $databaseBaseURL . $filesBaseURL;
  196. }
  197. $printURL = true;
  198. }
  199. }
  200. // ----------------------------------------------------------
  201. // Start OAI_DC XML record:
  202. if (!empty($metadataPrefix))
  203. $recordPrefix = $metadataPrefix . ":";
  204. $record = new XML($recordPrefix . "dc"); // create an XML object for a single record
  205. if ($addNameSpaceInfo)
  206. {
  207. if ($metadataPrefix == "oai_dc")
  208. $record->setTagAttribute("xmlns:oai_dc", "http://www.openarchives.org/OAI/2.0/oai_dc/");
  209. elseif ($metadataPrefix == "srw_dc")
  210. $record->setTagAttribute("xmlns:srw_dc", "info:srw/schema/1/dc-v1.1");
  211. $record->setTagAttribute("xmlns:dc", "http://purl.org/dc/elements/1.1/");
  212. if ($metadataPrefix == "oai_dc") // NOTE: should we include these for 'srw_dc:dc' output as well?
  213. {
  214. $record->setTagAttribute("xmlns:xsi", "http://www.w3.org/2001/XMLSchema-instance");
  215. $record->setTagAttribute("xsi:schemaLocation", "http://www.openarchives.org/OAI/2.0/oai_dc/ http://www.openarchives.org/OAI/2.0/oai_dc.xsd");
  216. }
  217. elseif ($metadataPrefix == "srw_dc")
  218. $record->setTagAttribute("xmlns:prism", "http://prismstandard.org/namespaces/1.2/basic/");
  219. }
  220. // Add Dublin Core elements:
  221. // NOTE: With a few exceptions, we try to adhere to the guidelines given at
  222. // "Using simple Dublin Core to describe eprints" by Andy Powell et al.
  223. // See: <http://eprints-uk.rdn.ac.uk/project/docs/simpledc-guidelines/>
  224. // - 'dc:title':
  225. if (!empty($row['title']))
  226. addMetaElement($record, "dc", "title", array(), $row['title']); // function 'addMetaElement()' is defined in 'webservice.inc.php'
  227. // - 'dc:creator':
  228. if (!empty($row['author']) AND ($row['author'] != $row['editor']))
  229. addMetaElement($record, "dc", "creator", array(), $row['author']);
  230. // - 'dc:creator':
  231. // TODO: add refbase corporate author(s) as 'dc:creator'
  232. // - 'dc:contributor':
  233. if (!empty($row['editor']))
  234. addMetaElement($record, "dc", "contributor", array(), $row['editor']);
  235. // - 'dc:description':
  236. if (!empty($row['abstract']))
  237. addMetaElement($record, "dc", "description", array(), $row['abstract']);
  238. // - 'dc:identifier':
  239. // - DOI:
  240. if (!empty($row['doi']))
  241. addMetaElement($record, "dc", "identifier", array(), $row['doi'], "doi");
  242. // - PMID:
  243. if (!empty($row['notes']) AND preg_match("/PMID *: *\d+/i", $row['notes']))
  244. addMetaElement($record, "dc", "identifier", array(), $row['notes'], "pmid");
  245. // - arXiv:
  246. if (!empty($row['notes']) AND preg_match("/arXiv *: *[^ ;]+/i", $row['notes']))
  247. addMetaElement($record, "dc", "identifier", array(), $row['notes'], "arxiv");
  248. // - ISBN:
  249. if (!empty($row['isbn']))
  250. addMetaElement($record, "dc", "identifier", array(), $row['isbn'], "isbn");
  251. // - OpenURL:
  252. addMetaElement($record, "dc", "identifier", array(), $openURL, "openurl");
  253. // - refbase ID:
  254. addMetaElement($record, "dc", "identifier", array(), $databaseBaseURL . generateURL("show.php", "html", array("record" => $row['serial']), true), "url");
  255. // - Cite key:
  256. addMetaElement($record, "dc", "identifier", array(), $citeKey, "citekey");
  257. // - Bibliographic citation:
  258. // NOTE: In 'atomxml.inc.php', the bibliographic citation is put into a
  259. // 'dcterms:bibliographicCitation' element so that it can be uniquely
  260. // identified and extracted easily. However, in case of simple Dublin
  261. // Core output, we just put it into a 'dc:identifier' element and
  262. // use a "citation:" prefix.
  263. addMetaElement($record, "dc", "identifier", array(), encodeHTMLspecialchars($recordCitationPlain), "citation");
  264. // - 'dc:source':
  265. // NOTE: - In <http://eprints-uk.rdn.ac.uk/project/docs/simpledc-guidelines/>,
  266. // Andy Powell et al. recommend that this element should NOT be used!
  267. // However, we use 'dc:source' elements for publication & series info
  268. // (publication/series title plus volume & issue) to provide a dedicated
  269. // source string that's easily readable and parsable.
  270. // Example: <dc:source>Polar Biology, Vol. 25, No. 10</dc:source>
  271. // - While we could also append the page info to the publication
  272. // 'dc:source' element, this info is more pertinent to the article
  273. // itself and is thus not included. For 'srw_dc:dc' output, page info is
  274. // included in PRISM elements (see below).
  275. // - All metadata (including the page info) are also provided as a machine
  276. // parsable citation in form of an OpenURL ContextObject (see above).
  277. // - Publication info:
  278. // NOTE: We only include the 'dc:source' element for 'oai_dc:dc' output. In case of 'srw_dc:dc'
  279. // output, we use the more fine-grained PRISM elements instead (see below)
  280. if (($metadataPrefix == "oai_dc") AND (!empty($row['publication']) OR !empty($row['abbrev_journal'])))
  281. {
  282. if (!empty($row['publication']))
  283. $source = $row['publication'];
  284. elseif (!empty($row['abbrev_journal']))
  285. $source = $row['abbrev_journal'];
  286. if (!empty($row['volume']))
  287. $source .= ", Vol. " . $row['volume'];
  288. if (!empty($row['issue']))
  289. $source .= ", No. " . $row['issue'];
  290. if (!empty($source))
  291. addMetaElement($record, "dc", "source", array(), $source);
  292. }
  293. // - Series info:
  294. if (!empty($row['series_title']) OR !empty($row['abbrev_series_title']))
  295. {
  296. if (!empty($row['series_title']))
  297. $series = $row['series_title'];
  298. elseif (!empty($row['abbrev_series_title']))
  299. $series = $row['abbrev_series_title'];
  300. if (!empty($row['series_volume']))
  301. $series .= ", Vol. " . $row['series_volume'];
  302. if (!empty($row['series_issue']))
  303. $series .= ", No. " . $row['series_issue'];
  304. if (!empty($series))
  305. addMetaElement($record, "dc", "source", array(), $series);
  306. // NOTE: To distinguish between regular publication & series info,
  307. // should we better use a "series:" prefix here? If so, use:
  308. // addMetaElement($record, "dc", "source", array(), $series, "series");
  309. }
  310. // - ISSN:
  311. // NOTE: for 'srw_dc:dc' output, we put the ISSN into the 'prism:issn' element
  312. if (($metadataPrefix == "oai_dc") AND !empty($row['issn']))
  313. addMetaElement($record, "dc", "source", array(), $row['issn'], "issn");
  314. // - 'dc:date':
  315. if (!empty($row['year']))
  316. addMetaElement($record, "dc", "date", array(), $row['year']);
  317. // - 'dc:type':
  318. if (!empty($row['type']))
  319. addMetaElement($record, "dc", "type", array(), $row['type'], $row['thesis']);
  320. // In case of a thesis, we add another 'dc:type' element with the actual thesis type:
  321. if (!empty($row['thesis']))
  322. addMetaElement($record, "dc", "type", array(), $row['thesis']);
  323. // - 'dc:format':
  324. // TODO: ideally, we should parse the content of the refbase 'medium' field and map it
  325. // to a media-type term from <http://www.iana.org/assignments/media-types/>
  326. if (!empty($row['medium']))
  327. $mediaType = $row['medium'];
  328. else
  329. $mediaType = "text";
  330. addMetaElement($record, "dc", "format", array(), $mediaType);
  331. // - 'dc:subject':
  332. // TODO: add user-specific keywords (from field 'user_keys') if the user is logged in
  333. if (!empty($row['keywords']))
  334. addMetaElement($record, "dc", "subject", array(), $row['keywords']);
  335. // - 'dc:coverage':
  336. // TODO: should we add contents from the refbase 'area' field as 'dc:coverage' element(s)?
  337. // - 'dc:relation':
  338. // - Related URL:
  339. if (!empty($row['url']))
  340. addMetaElement($record, "dc", "relation", array(), $row['url'], "url");
  341. // - Related FILE:
  342. if ($printURL)
  343. addMetaElement($record, "dc", "relation", array(), $URLprefix . $row['file'], "file");
  344. // - 'dc:publisher':
  345. if (!empty($row['publisher']))
  346. addMetaElement($record, "dc", "publisher", array(), $row['publisher']);
  347. // - 'dc:language':
  348. // TODO: convert to ISO notation (i.e. "en" instead of "English", etc)
  349. if (!empty($row['language']))
  350. addMetaElement($record, "dc", "language", array(), $row['language']);
  351. // ----------------------------------------------------------
  352. // Add PRISM elements:
  353. // NOTE: When using the 'srw_dc' namespace (i.e. 'info:srw/schema/1/dc-v1.1' as detailed at
  354. // <http://www.loc.gov/standards/sru/resources/dc-schema.html>), I don't think it's allowed
  355. // to include anything but the fifteen elements from simple Dublin Core. Is this correct?
  356. // If so, then:
  357. //
  358. // TODO: Do we need to put the PRISM elements in <extraRecordData> instead? Or can we put them within
  359. // a separate branch outside of (and next to) the '<srw_dc:dc>' element? Or shall we better omit
  360. // them entirely?
  361. // More info on SRU Extra Data>: <http://www.loc.gov/standards/sru/specs/extra-data.html>
  362. //
  363. // See also "Mixing DC metadata with other metadata schemas" in "Guidelines for implementing
  364. // Dublin Core in XML" <http://dublincore.org/documents/dc-xml-guidelines/>
  365. if ($metadataPrefix == "srw_dc") // we only include PRISM elements for 'srw_dc:dc' output
  366. {
  367. // - 'prism:issn':
  368. if (!empty($row['issn']))
  369. addMetaElement($record, "prism", "issn", array(), $row['issn']);
  370. // - 'prism:publicationName':
  371. if (!empty($row['publication']))
  372. addMetaElement($record, "prism", "publicationName", array(), $row['publication']);
  373. elseif (!empty($row['abbrev_journal']))
  374. addMetaElement($record, "prism", "publicationName", array(), $row['abbrev_journal']);
  375. // - 'prism:publicationDate':
  376. if (!empty($row['year']))
  377. addMetaElement($record, "prism", "publicationDate", array(), $row['year']);
  378. // - 'prism:volume':
  379. if (!empty($row['volume']))
  380. addMetaElement($record, "prism", "volume", array(), $row['volume']);
  381. // - 'prism:number':
  382. if (!empty($row['issue']))
  383. addMetaElement($record, "prism", "number", array(), $row['issue']);
  384. // - 'prism:startingPage', 'prism:endingPage':
  385. // TODO: Similar code is used in 'include.in.php', 'modsxml.inc.php' and 'openurl.inc.php',
  386. // so this should be made into a dedicated function!
  387. if (!empty($row['pages']) AND preg_match("/\d+/i", $row['pages'])) // if the 'pages' field contains a number
  388. {
  389. $pages = preg_replace("/^\D*(\d+)( *[$dash]+ *\d+)?.*/i$patternModifiers", "\\1\\2", $row['pages']); // extract page range (if there's any), otherwise just the first number
  390. $startPage = preg_replace("/^\D*(\d+).*/i", "\\1", $row['pages']); // extract starting page
  391. $endPage = extractDetailsFromField("pages", $pages, "/\D+/", "[-1]"); // extract ending page (function 'extractDetailsFromField()' is defined in 'include.inc.php')
  392. // NOTE: To extract the ending page, we'll use function 'extractDetailsFromField()'
  393. // instead of just grabbing a matched regex pattern since it'll also work
  394. // when just a number but no range is given (e.g. when startPage = endPage)
  395. // - 'prism:startingPage':
  396. if (preg_match("/\d+ *[$dash]+ *\d+/i$patternModifiers", $row['pages'])) // if there's a page range
  397. addMetaElement($record, "prism", "startingPage", array(), $startPage);
  398. // - 'prism:endingPage':
  399. addMetaElement($record, "prism", "endingPage", array(), $endPage);
  400. }
  401. }
  402. return $record;
  403. }
  404. // --------------------------------------------------------------------
  405. ?>