You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 
 

987 lines
44 KiB

<?php
// Project: Web Reference Database (refbase) <http://www.refbase.net>
// Copyright: Matthias Steffens <mailto:refbase@extracts.de> and the file's
// original author(s).
//
// This code is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY. Please see the GNU General Public
// License for more details.
//
// File: ./includes/modsxml.inc.php
// Repository: $HeadURL: file:///svn/p/refbase/code/branches/bleeding-edge/includes/modsxml.inc.php $
// Author(s): Richard Karnesky <mailto:karnesky@gmail.com>
//
// Created: 02-Oct-04, 12:00
// Modified: $Date: 2017-04-13 02:00:18 +0000 (Thu, 13 Apr 2017) $
// $Author: karnesky $
// $Revision: 1416 $
// This include file contains functions that'll export records to MODS XML.
// Requires ActiveLink PHP XML Package, which is available under the GPL from:
// <http://www.active-link.com/software/>
// Incorporate some include files:
include_once 'includes/transtab_refbase_unicode.inc.php'; // include refbase markup -> Unicode search & replace patterns
// Import the ActiveLink Packages
require_once("classes/include.php");
import("org.active-link.xml.XML");
import("org.active-link.xml.XMLDocument");
// For more on MODS, see:
// <http://www.loc.gov/standards/mods/>
// <http://www.scripps.edu/~cdputnam/software/bibutils/>
// TODO:
// Stuff in '// NOTE' comments
// There's a lot of overlap in the portions that depend on types. I plan
// on refactoring this, so that they can make calls to the same function.
// I don't know what to do with some fields
// See <http://www.loc.gov/standards/mods/v3/mods-3-0-outline.html>
// - Require clever parsing
// - address (?name->affiliation?)
// - medium (?typeOfResource?)
// - Don't know how refbase users use these
// - area (could be either topic or geographic, so we do nothing)
// - expedition
// --------------------------------------------------------------------
// Generates relatedItem branch for series
function serialBranch($series_editor, $series_title, $abbrev_series_title,
$series_volume, $series_issue) {
// defined in 'transtab_unicode_charset.inc.php' and 'transtab_latin1_charset.inc.php'
global $alnum, $alpha, $cntrl, $dash, $digit, $graph, $lower, $print, $punct,
$space, $upper, $word, $patternModifiers;
$series = new XMLBranch("relatedItem");
$series->setTagAttribute("type", "series");
// title
if (!empty($series_title))
$series->setTagContent(encodeXMLField('series_title', $series_title), "relatedItem/titleInfo/title");
// abbrev. title
if (!empty($abbrev_series_title)) {
$titleabbrev = NEW XMLBranch("titleInfo");
$titleabbrev->setTagAttribute("type", "abbreviated");
$titleabbrev->setTagContent(encodeXMLField('abbrev_series_title', $abbrev_series_title), "titleInfo/title");
$series->addXMLBranch($titleabbrev);
}
// editor
if (!empty($series_editor)) {
if (preg_match("/ *\(eds?\)$/", $series_editor))
$series_editor = preg_replace("/[ \r\n]*\(eds?\)/i", "", $series_editor);
$nameArray = separateNames("series_editor", "/\s*;\s*/", "/\s*,\s*/",
"/(?<=^|[$word])[^-$word]+|(?<=^|[$upper])(?=$|[$upper])/$patternModifiers",
$series_editor, "personal", "editor");
foreach ($nameArray as $singleName)
$series->addXMLBranch($singleName);
}
// volume, issue
if ((!empty($series_volume)) || (!empty($series_issue))) {
$part = new XMLBranch("part");
if (!empty($series_volume)) {
$detailvolume = new XMLBranch("detail");
$detailvolume->setTagContent(encodeXMLField('series_volume', $series_volume), "detail/number");
$detailvolume->setTagAttribute("type", "volume");
$part->addXMLBranch($detailvolume);
}
if (!empty($series_issue)) {
$detailnumber = new XMLBranch("detail");
$detailnumber->setTagContent(encodeXMLField('series_issue', $series_issue), "detail/number");
$detailnumber->setTagAttribute("type", "issue");
$part->addXMLBranch($detailnumber);
}
$series->addXMLBranch($part);
}
return $series;
}
// --------------------------------------------------------------------
// Separates people's names and then those names into their functional parts:
// {{Family1,{Given1-1,Given1-2}},{Family2,{Given2}}})
// Adds these to an array of XMLBranches.
function separateNames($rowFieldName, $betweenNamesDelim, $nameGivenDelim,
$betweenGivensDelim, $names, $type, $role) {
// defined in 'transtab_unicode_charset.inc.php' and 'transtab_latin1_charset.inc.php'
global $alnum, $alpha, $cntrl, $dash, $digit, $graph, $lower, $print, $punct,
$space, $upper, $word, $patternModifiers;
$nameArray = array();
$nameArray = preg_split($betweenNamesDelim, $names); // get a list of all authors
foreach ($nameArray as $singleName){
$nameBranch = new XMLBranch("name");
$nameBranch->setTagAttribute("type", $type);
if (preg_match($nameGivenDelim, $singleName))
list($singleNameFamily, $singleNameGivens) = preg_split($nameGivenDelim,
$singleName);
else {
$singleNameFamily = $singleName;
$singleNameGivens = "";
}
$nameFamilyBranch = new XMLBranch("namePart");
$nameFamilyBranch->setTagAttribute("type", "family");
$nameFamilyBranch->setTagContent(encodeXMLField($rowFieldName, $singleNameFamily));
$nameBranch->addXMLBranch($nameFamilyBranch);
if (!empty($singleNameGivens)) {
// before splitting given names into their parts, we remove any non-word chars
// between initials/forenames that are connected with a hyphen (which ensures
// that they are kept together and that the hyphen is maintained):
$singleNameGivens = preg_replace("/(?<=[$word])[^-$word]*([$dash])[^-$word]*(?=[$upper])/$patternModifiers",
"\\1", $singleNameGivens);
$singleNameGivenArray = preg_split($betweenGivensDelim, $singleNameGivens,
-1, PREG_SPLIT_NO_EMPTY);
foreach ($singleNameGivenArray as $singleNameGiven) {
$nameGivenBranch = new XMLBranch("namePart");
$nameGivenBranch->setTagAttribute("type", "given");
$nameGivenBranch->setTagContent(encodeXMLField($rowFieldName, $singleNameGiven));
$nameBranch->addXMLBranch($nameGivenBranch);
}
}
$nameBranch->setTagContent(encodeXMLField('name_role', $role), "name/role/roleTerm");
$nameBranch->setTagAttribute("authority", "marcrelator",
"name/role/roleTerm");
$nameBranch->setTagAttribute("type", "text", "name/role/roleTerm");
array_push($nameArray, $nameBranch);
}
return $nameArray;
}
// --------------------------------------------------------------------
function modsCollection($result) {
global $contentTypeCharset; // these variables are defined in 'ini.inc.php'
global $convertExportDataToUTF8;
global $citeKeysArray; // '$citeKeysArray' is made globally available from
// within this function
// The array '$transtab_refbase_unicode' contains search & replace patterns
// for conversion from refbase markup to Unicode entities.
global $transtab_refbase_unicode; // defined in 'transtab_refbase_unicode.inc.php'
global $fieldSpecificSearchReplaceActionsArray;
// Individual records are objects and collections of records are strings
$exportArray = array(); // array for individually exported records
$citeKeysArray = array(); // array of cite keys (used to ensure uniqueness of
// cite keys among all exported records)
// Defines field-specific search & replace 'actions' that will be applied to all
// those refbase fields that are listed in the corresponding 'fields' element:
// (If you don't want to perform any search and replace actions, specify an empty
// array, like: '$fieldSpecificSearchReplaceActionsArray = array();'.
// Note that the search patterns MUST include the leading & trailing slashes --
// which is done to allow for mode modifiers such as 'imsxU'.)
$fieldSpecificSearchReplaceActionsArray = array();
if ($convertExportDataToUTF8 == "yes")
$fieldSpecificSearchReplaceActionsArray[] = array(
'fields' => array("title", "publication", "abbrev_journal", "address", "keywords", "abstract", "orig_title", "series_title", "abbrev_series_title", "notes"),
'actions' => $transtab_refbase_unicode
);
// Generate the export for each record and push them onto an array:
while ($row = @ mysqli_fetch_array($result)) {
// Export the current record as MODS XML
$record = modsRecord($row);
if (!empty($record)) // unless the record buffer is empty...
array_push($exportArray, $record); // ...add it to an array of exports
}
$modsCollectionDoc = new XMLDocument();
if (($convertExportDataToUTF8 == "yes") AND ($contentTypeCharset != "UTF-8"))
$modsCollectionDoc->setEncoding("UTF-8");
else
$modsCollectionDoc->setEncoding($contentTypeCharset);
$modsCollection = new XML("modsCollection");
$modsCollection->setTagAttribute("xmlns", "http://www.loc.gov/mods/v3");
foreach ($exportArray as $mods)
$modsCollection->addXMLasBranch($mods);
$modsCollectionDoc->setXML($modsCollection);
$modsCollectionString = $modsCollectionDoc->getXMLString();
return $modsCollectionString;
}
// --------------------------------------------------------------------
// Returns an XML object (mods) of a single record
function modsRecord($row) {
global $databaseBaseURL; // these variables are defined in 'ini.inc.php'
global $contentTypeCharset;
global $fileVisibility;
global $fileVisibilityException;
global $filesBaseURL;
global $convertExportDataToUTF8;
// defined in 'transtab_unicode_charset.inc.php' and 'transtab_latin1_charset.inc.php'
global $alnum, $alpha, $cntrl, $dash, $digit, $graph, $lower, $print, $punct,
$space, $upper, $word, $patternModifiers;
$exportPrivate = True; // This will be a global variable or will be used
// when modsRow is called and will determine if we
// export user-specific data
$exportRecordURL = True; // Specifies whether an attribution string containing
// the URL to the refbase database record (and the last
// modification date) shall be written to the notes branch.
// Note that this string is required by the "-A|--append"
// feature of the 'refbase' command line client
// convert this record's modified date/time info to UNIX time stamp format:
// => "date('D, j M Y H:i:s O')", e.g. "Sat, 15 Jul 2006 22:24:16 +0200"
// function 'generateRFC2822TimeStamp()' is defined in 'include.inc.php'
$currentDateTimeStamp = generateRFC2822TimeStamp($row['modified_date'], $row['modified_time']);
// --- BEGIN TYPE * ---
// |
// | These apply to everything
// this is a stupid hack that maps the names of the '$row' array keys to those used
// by the '$formVars' array (which is required by function 'generateCiteKey()')
// (eventually, the '$formVars' array should use the MySQL field names as names for its array keys)
$formVars = buildFormVarsArray($row); // function 'buildFormVarsArray()' is defined in 'include.inc.php'
// generate or extract the cite key for this record
// (note that charset conversion can only be done *after* the cite key has been generated,
// otherwise cite key generation will produce garbled text!)
$citeKey = generateCiteKey($formVars); // function 'generateCiteKey()' is defined in 'include.inc.php'
// Create an XML object for a single record.
$record = new XML("mods");
$record->setTagAttribute("version", "3.2");
if (!empty($citeKey))
$record->setTagAttribute("ID", $citeKey);
// titleInfo
// Regular Title
if (!empty($row['title']))
$record->setTagContent(encodeXMLField('title', $row['title']), "mods/titleInfo/title");
// Translated Title
// NOTE: This field is excluded by the default cite SELECT method
if (!empty($row['orig_title'])) {
$orig_title = new XMLBranch("titleInfo");
$orig_title->setTagAttribute("type", "translated");
$orig_title->setTagContent(encodeXMLField('orig_title', $row['orig_title']), "titleInfo/title");
$record->addXMLBranch($orig_title);
}
// name
// author
if (!empty($row['author'])) {
if (preg_match("/ *\(eds?\)$/", $row['author'])) {
$author = preg_replace("/[ \r\n]*\(eds?\)/i", "", $row['author']);
$nameArray = separateNames("author", "/\s*;\s*/", "/\s*,\s*/",
"/(?<=^|[$word])[^-$word]+|(?<=^|[$upper])(?=$|[$upper])/$patternModifiers",
$author, "personal", "editor");
}
else if ($row['type'] == "Map") {
$nameArray = separateNames("author", "/\s*;\s*/", "/\s*,\s*/",
"/(?<=^|[$word])[^-$word]+|(?<=^|[$upper])(?=$|[$upper])/$patternModifiers",
$row['author'], "personal", "cartographer");
}
else {
$nameArray = separateNames("author", "/\s*;\s*/", "/\s*,\s*/",
"/(?<=^|[$word])[^-$word]+|(?<=^|[$upper])(?=$|[$upper])/$patternModifiers",
$row['author'], "personal", "author");
}
foreach ($nameArray as $singleName) {
$record->addXMLBranch($singleName);
}
}
// originInfo
if ((!empty($row['year'])) || (!empty($row['publisher'])) ||
(!empty($row['place']))) {
$origin = new XMLBranch("originInfo");
// dateIssued
if (!empty($row['year']))
$origin->setTagContent(encodeXMLField('year', $row['year']), "originInfo/dateIssued");
// Book Chapters and Journal Articles only have a dateIssued
// (editions, places, and publishers are associated with the host)
if (!preg_match("/^(Book Chapter|Journal Article)$/", $row['type'])) {
// publisher
if (!empty($row['publisher']))
$origin->setTagContent(encodeXMLField('publisher', $row['publisher']), "originInfo/publisher");
// place
if (!empty($row['place'])) {
$origin->setTagContent(encodeXMLField('place', $row['place']), "originInfo/place/placeTerm");
$origin->setTagAttribute("type", "text",
"originInfo/place/placeTerm");
}
// edition
if (!empty($row['edition']))
$origin->setTagContent(encodeXMLField('edition', $row['edition']), "originInfo/edition");
}
if ($origin->hasBranch())
$record->addXMLBranch($origin);
}
// language
if (!empty($row['language']))
$record->setTagContent(encodeXMLField('language', $row['language']), "mods/language");
// abstract
// NOTE: This field is excluded by the default cite SELECT method
if (!empty($row['abstract'])) {
$abstract = new XMLBranch("abstract");
$abstract->setTagContent(encodeXMLField('abstract', $row['abstract']));
if (!empty($row['summary_language'])) {
$abstract->setTagAttribute("lang", encodeXMLField('summary_language', $row['summary_language']));
}
$record->addXMLBranch($abstract);
}
// subject
// keywords
if (!empty($row['keywords'])) {
$subjectArray = array();
$subjectArray = preg_split("/\s*;\s*/", $row['keywords']); // "unrelated" keywords
foreach ($subjectArray as $singleSubject) {
$subjectBranch = new XMLBranch("subject");
$topicArray = array();
$topicArray = preg_split("/\s*,\s*/", $singleSubject); // "related" keywords
foreach ($topicArray as $singleTopic) {
$topicBranch = new XMLBranch("topic");
$topicBranch->setTagContent(encodeXMLField('keywords', $singleTopic));
$subjectBranch->addXMLBranch($topicBranch);
}
$record->addXMLBranch($subjectBranch);
}
}
// user_keys
// NOTE: a copy of the above. Needs to be a separate function later.
if ((!empty($row['user_keys'])) && $exportPrivate) {
$subjectArray = array();
$subjectArray = preg_split("/\s*;\s*/", $row['user_keys']); // "unrelated" user_keys
foreach ($subjectArray as $singleSubject) {
$subjectBranch = new XMLBranch("subject");
$topicArray = array();
$topicArray = preg_split("/\s*,\s*/", $singleSubject); // "related" user_keys
foreach ($topicArray as $singleTopic) {
$topicBranch = new XMLBranch("topic");
$topicBranch->setTagContent(encodeXMLField('user_keys', $singleTopic));
$subjectBranch->addXMLBranch($topicBranch);
}
$record->addXMLBranch($subjectBranch);
}
}
// user_groups
// NOTE: a copy of the above. Needs to be a separate function later.
if ((!empty($row['user_groups'])) && $exportPrivate) {
$subjectArray = array();
$subjectArray = preg_split("/\s*;\s*/", $row['user_groups']); // "unrelated" user_groups
foreach ($subjectArray as $singleSubject) {
$subjectBranch = new XMLBranch("subject");
$topicArray = array();
$topicArray = preg_split("/\s*,\s*/", $singleSubject); // "related" user_groups
foreach ($topicArray as $singleTopic) {
$topicBranch = new XMLBranch("topic");
$topicBranch->setTagContent(encodeXMLField('user_groups', $singleTopic));
$subjectBranch->addXMLBranch($topicBranch);
}
$record->addXMLBranch($subjectBranch);
}
}
// notes
if (!empty($row['notes']))
$record->setTagContent(encodeXMLField('notes', $row['notes']), "mods/note");
// user_notes
if ((!empty($row['user_notes'])) && $exportPrivate) // replaces any generic notes
$record->setTagContent(encodeXMLField('user_notes', $row['user_notes']), "mods/note");
// refbase attribution string
if ($exportRecordURL) {
$attributionBranch = new XMLBranch("note");
$attributionBranch->setTagContent("exported from refbase ("
. $databaseBaseURL . "show.php?record=" . $row['serial']
. "), last updated on " . $currentDateTimeStamp);
$record->addXMLBranch($attributionBranch);
}
// typeOfResource
// maps are 'cartographic', software is 'software, multimedia',
// and everything else is 'text'
$type = new XMLBranch("typeOfResource");
if ($row['type'] == "Map") {
$type->setTagContent("cartographic");
}
else if ($row['type'] == "Software") {
$type->setTagContent("software, multimedia");
}
else {
$type->setTagContent("text");
}
if ($row['type'] == "Manuscript") {
$type->setTagAttribute("manuscript", "yes");
}
$record->addXMLBranch($type);
// location
// Physical Location
// NOTE: This field is excluded by the default cite SELECT method
// This should also be parsed later
if (!empty($row['location'])) {
$location = new XMLBranch("location");
$locationArray = array();
$locationArray = preg_split("/\s*;\s*/", $row['location']);
foreach ($locationArray as $singleLocation) {
$locationBranch = new XMLBranch("physicalLocation");
$locationBranch->setTagContent(encodeXMLField('location', $singleLocation));
$location->addXMLBranch($locationBranch);
}
$record->addXMLBranch($location);
}
// URL (also an identifier, see below)
// NOTE: This field is excluded by the default cite SELECT method
if (!empty($row['url'])) {
$location = new XMLBranch("location");
$location->setTagContent(encodeXMLField('url', $row['url']), "location/url");
$record->addXMLBranch($location);
}
// Include a link to any corresponding FILE if one of the following conditions is met:
// - the variable '$fileVisibility' (defined in 'ini.inc.php') is set to 'everyone'
// - the variable '$fileVisibility' is set to 'login' AND the user is logged in
// - the variable '$fileVisibility' is set to 'user-specific' AND the 'user_permissions' session variable contains 'allow_download'
// - the array variable '$fileVisibilityException' (defined in 'ini.inc.php') contains a pattern (in array element 1) that matches the contents of the field given (in array element 0)
if ($fileVisibility == "everyone" OR ($fileVisibility == "login" AND isset($_SESSION['loginEmail'])) OR ($fileVisibility == "user-specific" AND (isset($_SESSION['user_permissions']) AND preg_match("/allow_download/", $_SESSION['user_permissions']))) OR (!empty($fileVisibilityException) AND preg_match($fileVisibilityException[1], $row[$fileVisibilityException[0]])))
{
// file
// Note that when converting MODS to Endnote or RIS, Bibutils will include the above
// URL (if given), otherwise it'll take the URL from the 'file' field. I.e. for
// Endnote or RIS, the URL to the PDF is only included if no regular URL is available.
if (!empty($row['file'])) {
$location = new XMLBranch("location");
if (preg_match('#^(https?|ftp|file)://#i', $row['file'])) { // if the 'file' field contains a full URL (starting with "http://", "https://", "ftp://", or "file://")
$URLprefix = ""; // we don't alter the URL given in the 'file' field
}
else { // if the 'file' field contains only a partial path (like 'polarbiol/10240001.pdf') or just a file name (like '10240001.pdf')
// use the base URL of the standard files directory as prefix:
if (preg_match('#^/#', $filesBaseURL)) // absolute path -> file dir is located outside of refbase root dir
$URLprefix = 'http://' . $_SERVER['HTTP_HOST'] . $filesBaseURL;
else // relative path -> file dir is located within refbase root dir
$URLprefix = $databaseBaseURL . $filesBaseURL;
}
$location->setTagContent(encodeXMLField('file', $URLprefix . $row['file']), "location/url");
$location->setTagAttribute("displayLabel", "Electronic full text", "location/url");
// the 'access' attribute requires MODS v3.2 or greater:
$location->setTagAttribute("access", "raw object", "location/url");
$record->addXMLBranch($location);
}
}
// identifier
// url
if (!empty($row['url'])) {
$identifier = new XMLBranch("identifier");
$identifier->setTagContent(encodeXMLField('url', $row['url']));
$identifier->setTagAttribute("type", "uri");
$record->addXMLBranch($identifier);
}
// doi
if (!empty($row['doi'])) {
$identifier = new XMLBranch("identifier");
$identifier->setTagContent(encodeXMLField('doi', $row['doi']));
$identifier->setTagAttribute("type", "doi");
$record->addXMLBranch($identifier);
}
// pubmed
// NOTE: Until refbase stores PubMed & arXiv IDs in a better way,
// we extract them from the 'notes' field
if (preg_match("/PMID *: *\d+/i", $row['notes'])) {
$identifier = new XMLBranch("identifier");
$identifier->setTagContent(preg_replace("/.*?PMID *: *(\d+).*/i", "\\1", $row['notes']));
$identifier->setTagAttribute("type", "pubmed");
$record->addXMLBranch($identifier);
}
// arxiv
// NOTE: see note for pubmed
if (preg_match("/arXiv *: *[^ ;]+/i", $row['notes'])) {
$identifier = new XMLBranch("identifier");
$identifier->setTagContent(preg_replace("/.*?arXiv *: *([^ ;]+).*/i", "\\1", $row['notes']));
$identifier->setTagAttribute("type", "arxiv");
$record->addXMLBranch($identifier);
}
// cite_key
if (!empty($citeKey)) {
$identifier = new XMLBranch("identifier");
$identifier->setTagContent(encodeXMLField('cite_key', $citeKey));
$identifier->setTagAttribute("type", "citekey");
$record->addXMLBranch($identifier);
}
// local--CALL NUMBER
// NOTE: This should really be parsed!
if (!empty($row['call_number'])) {
$identifierArray = array();
$identifierArray = preg_split("/\s*;\s*/", $row['call_number']);
foreach ($identifierArray as $singleIdentifier) {
if (!preg_match("/@\s*$/", $singleIdentifier)) {
$identifierBranch = new XMLBranch("identifier");
$identifierBranch->setTagContent(encodeXMLField('call_number', $singleIdentifier));
$identifierBranch->setTagAttribute("type", "local");
$record->addXMLBranch($identifierBranch);
}
}
}
// --- END TYPE * ---
// -----------------------------------------
// --- BEGIN TYPE != ABSTRACT || BOOK CHAPTER || CONFERENCE ARTICLE || JOURNAL ARTICLE || MAGAZINE ARTICLE || NEWSPAPER ARTICLE ---
// |
// | BOOK WHOLE, CONFERENCE VOLUME, JOURNAL, MANUAL, MANUSCRIPT, MAP, MISCELLANEOUS, PATENT,
// | REPORT, and SOFTWARE have some info as a branch off the root, whereas ABSTRACT, BOOK CHAPTER,
// | CONFERENCE ARTICLE, JOURNAL ARTICLE, MAGAZINE ARTICLE and NEWSPAPER ARTICLE place it in the relatedItem branch.
if (!preg_match("/^(Abstract|Book Chapter|Conference Article|Journal Article|Magazine Article|Newspaper Article)$/", $row['type'])) {
// name
// editor
if (!empty($row['editor'])) {
$editor=$row['editor'];
$author=$row['author'];
if (preg_match("/ *\(eds?\)$/", $editor))
$editor = preg_replace("/[ \r\n]*\(eds?\)/i", "", $editor);
if (preg_match("/ *\(eds?\)$/", $author))
$author = preg_replace("/[ \r\n]*\(eds?\)/i", "", $author);
if ($editor != $author) {
$nameArray = separateNames("editor", "/\s*;\s*/", "/\s*,\s*/",
"/(?<=^|[$word])[^-$word]+|(?<=^|[$upper])(?=$|[$upper])/$patternModifiers",
$editor, "personal", "editor");
foreach ($nameArray as $singleName)
$record->addXMLBranch($singleName);
}
}
// corporate
// (we treat a 'corporate_author' similar to how Bibutils converts the BibTeX
// 'organization' field to MODS XML, i.e., we add a separate name element with
// a 'type="corporate"' attribute and an 'author' role (or a 'degree grantor'
// role in case of theses))
if (!empty($row['corporate_author'])) {
$nameBranch = new XMLBranch("name");
$nameBranch->setTagAttribute("type", "corporate");
$nameBranch->setTagContent(encodeXMLField('corporate_author', $row['corporate_author']), "name/namePart");
if (empty($row['thesis']))
$nameBranch->setTagContent("author", "name/role/roleTerm");
else // thesis
$nameBranch->setTagContent("degree grantor", "name/role/roleTerm");
$nameBranch->setTagAttribute("authority", "marcrelator", "name/role/roleTerm");
$nameBranch->setTagAttribute("type", "text", "name/role/roleTerm");
$record->addXMLBranch($nameBranch);
}
// conference
if (!empty($row['conference'])) {
$nameBranch = new XMLBranch("name");
$nameBranch->setTagAttribute("type", "conference");
$nameBranch->setTagContent(encodeXMLField('conference', $row['conference']), "name/namePart");
$record->addXMLBranch($nameBranch);
}
// genre
// type
// NOTE: Is there a better MARC genre[1] for 'manuscript?'
// [1]<http://www.loc.gov/marc/sourcecode/genre/genrelist.html>
$genremarc = new XMLBranch("genre");
$genre = new XMLBranch("genre");
// NOTE: According to the MARC "Source Codes for Genre"[1]
// the MARC authority should be 'marcgt', not 'marc'.
// [1]<http://www.loc.gov/marc/sourcecode/genre/genresource.html>
$genremarc->setTagAttribute("authority", "marcgt");
if (empty($row['thesis'])) { // theses will get their own genre (see below)
if ($row['type'] == "Book Whole") {
$record->setTagContent("monographic",
"mods/originInfo/issuance");
$genremarc->setTagContent("book");
}
else if ($row['type'] == "Conference Volume") {
$genremarc->setTagContent("conference publication");
}
else if ($row['type'] == "Journal") {
$genremarc->setTagContent("periodical");
$genre->setTagContent("academic journal");
}
else if ($row['type'] == "Manual") { // should we set '<issuance>monographic' here (and for the ones below)?
$genremarc->setTagContent("instruction");
$genre->setTagContent("manual");
}
else if ($row['type'] == "Manuscript") {
$genremarc->setTagContent("loose-leaf");
$genre->setTagContent("manuscript");
}
else if ($row['type'] == "Map") {
$genremarc->setTagContent("map");
}
else if ($row['type'] == "Miscellaneous") {
$genre->setTagContent("miscellaneous");
}
else if ($row['type'] == "Patent") {
$genremarc->setTagContent("patent");
}
else if ($row['type'] == "Report") {
$genremarc->setTagContent("technical report");
$genre->setTagContent("report");
}
else if ($row['type'] == "Software") {
// $genremarc->setTagContent("programmed text"); // would this be correct?
$genre->setTagContent("software");
}
else if (!empty($row['type'])) { // catch-all: don't use a MARC genre
$genre->setTagContent(encodeXMLField('type', $row['type']));
}
if ($genremarc->hasLeaf())
$record->addXMLBranch($genremarc);
if ($genre->hasLeaf())
$record->addXMLBranch($genre);
}
// thesis
else { // if (!empty($row['thesis']))
$record->setTagContent("monographic",
"mods/originInfo/issuance");
$thesismarc = new XMLBranch("genre");
$thesis = new XMLBranch("genre");
$thesismarc->setTagContent("thesis");
$thesismarc->setTagAttribute("authority", "marcgt");
// tweak thesis names so that Bibutils will recognize them:
if ($row['thesis'] == "Master's thesis")
$row['thesis'] = "Masters thesis";
$thesis->setTagContent(encodeXMLField('thesis', $row['thesis']));
$record->addXMLBranch($thesismarc);
$record->addXMLBranch($thesis);
}
// physicalDescription
// pages
if (!empty($row['pages'])) {
$description = new XMLBranch("physicalDescription");
$pages = new XMLBranch("extent");
$pages->setTagAttribute("unit", "pages");
if (preg_match("/[0-9] *- *[0-9]/", $row['pages'])) { // if a page range
// split the page range into start and end pages
list($pagestart, $pageend) = preg_split('/\s*[-]\s*/', $row['pages']);
if ($pagestart < $pageend) { // extents MUST span multiple pages
$pages->setTagContent(encodeXMLField('pages', $pagestart), "extent/start");
$pages->setTagContent(encodeXMLField('pages', $pageend), "extent/end");
}
else {
$pages->setTagContent(encodeXMLField('pages', $row['pages']));
}
}
else if (preg_match("/^\d\d*\s*pp?.?$/", $row['pages'])) {
list($pagetotal) = preg_split('/\s*pp?/', $row['pages']);
$pages->setTagContent(encodeXMLField('pages', $pagetotal), "extent/total");
}
else {
$pages->setTagContent(encodeXMLField('pages', $row['pages']));
}
$description->addXMLBranch($pages);
$record->addXMLBranch($description);
}
// identifier
// isbn
if (!empty($row['isbn'])) {
$identifier = new XMLBranch("identifier");
$identifier->setTagContent(encodeXMLField('isbn', $row['isbn']));
$identifier->setTagAttribute("type", "isbn");
$record->addXMLBranch($identifier);
}
// issn
if (!empty($row['issn'])) {
$identifier = new XMLBranch("identifier");
$identifier->setTagContent(encodeXMLField('issn', $row['issn']));
$identifier->setTagAttribute("type", "issn");
$record->addXMLBranch($identifier);
}
// series
if ((!empty($row['series_editor'])) || (!empty($row['series_title'])) ||
(!empty($row['abbrev_series_title'])) ||
(!empty($row['series_volume'])) || (!empty($row['series_issue']))) {
$record->addXMLBranch(serialBranch($row['series_editor'],
$row['series_title'],
$row['abbrev_series_title'],
$row['series_volume'],
$row['series_issue']));
}
}
// --- END TYPE != ABSTRACT || BOOK CHAPTER || CONFERENCE ARTICLE || JOURNAL ARTICLE || MAGAZINE ARTICLE || NEWSPAPER ARTICLE ---
// -----------------------------------------
// --- BEGIN TYPE == ABSTRACT || BOOK CHAPTER || CONFERENCE ARTICLE || JOURNAL ARTICLE || MAGAZINE ARTICLE || NEWSPAPER ARTICLE ---
// |
// | NOTE: These are currently the only types that have publication,
// | abbrev_journal, volume, and issue added.
// | A lot of info goes into the relatedItem branch.
else { // if (preg_match("/^(Abstract|Book Chapter|Conference Article|Journal Article|Magazine Article|Newspaper Article)$/", $row['type']))
// relatedItem
$related = new XMLBranch("relatedItem");
$related->setTagAttribute("type", "host");
// title (Publication)
if (!empty($row['publication']))
$related->setTagContent(encodeXMLField('publication', $row['publication']),
"relatedItem/titleInfo/title");
// title (Abbreviated Journal)
if (!empty($row['abbrev_journal'])) {
$titleabbrev = NEW XMLBranch("titleInfo");
$titleabbrev->setTagAttribute("type", "abbreviated");
$titleabbrev->setTagContent(encodeXMLField('abbrev_journal', $row['abbrev_journal']), "titleInfo/title");
$related->addXMLBranch($titleabbrev);
}
// name
// editor
if (!empty($row['editor'])) {
$editor=$row['editor'];
if (preg_match("/ *\(eds?\)$/", $editor))
$editor = preg_replace("/[ \r\n]*\(eds?\)/i", "", $editor);
$nameArray = separateNames("editor", "/\s*;\s*/", "/\s*,\s*/",
"/(?<=^|[$word])[^-$word]+|(?<=^|[$upper])(?=$|[$upper])/$patternModifiers",
$editor, "personal", "editor");
foreach ($nameArray as $singleName)
$related->addXMLBranch($singleName);
}
// corporate
// NOTE: a copy of the code for 'corporate_author' above.
// Needs to be a separate function later.
if (!empty($row['corporate_author'])) {
$nameBranch = new XMLBranch("name");
$nameBranch->setTagAttribute("type", "corporate");
$nameBranch->setTagContent(encodeXMLField('corporate_author', $row['corporate_author']), "name/namePart");
if (empty($row['thesis']))
$nameBranch->setTagContent("author", "name/role/roleTerm");
else // thesis
$nameBranch->setTagContent("degree grantor", "name/role/roleTerm");
$nameBranch->setTagAttribute("authority", "marcrelator", "name/role/roleTerm");
$nameBranch->setTagAttribute("type", "text", "name/role/roleTerm");
$related->addXMLBranch($nameBranch);
}
// conference
// NOTE: a copy of the code for 'conference' above.
// Needs to be a separate function later.
if (!empty($row['conference'])) {
$nameBranch = new XMLBranch("name");
$nameBranch->setTagAttribute("type", "conference");
$nameBranch->setTagContent(encodeXMLField('conference', $row['conference']), "name/namePart");
$related->addXMLBranch($nameBranch);
}
// originInfo
$relorigin = new XMLBranch("originInfo");
// dateIssued
if (!empty($row['year']))
$relorigin->setTagContent(encodeXMLField('year', $row['year']), "originInfo/dateIssued");
// publisher
if (!empty($row['publisher']))
$relorigin->setTagContent(encodeXMLField('publisher', $row['publisher']), "originInfo/publisher");
// place
if (!empty($row['place'])) {
$relorigin->setTagContent(encodeXMLField('place', $row['place']), "originInfo/place/placeTerm");
$relorigin->setTagAttribute("type", "text",
"originInfo/place/placeTerm");
}
// edition
if (!empty($row['edition']))
$relorigin->setTagContent(encodeXMLField('edition', $row['edition']), "originInfo/edition");
if ($relorigin->hasBranch())
$related->addXMLBranch($relorigin);
// genre (and originInfo/issuance)
if (empty($row['thesis'])) { // theses will get their own genre (see below)
if (preg_match("/^(Journal Article|Magazine Article)$/", $row['type'])) {
$related->setTagContent("continuing",
"relatedItem/originInfo/issuance");
$genremarc = new XMLBranch("genre");
$genre = new XMLBranch("genre");
$genremarc->setTagContent("periodical");
$genremarc->setTagAttribute("authority", "marcgt");
if ($row['type'] == "Magazine Article")
$genre->setTagContent("magazine");
else
$genre->setTagContent("academic journal");
$related->addXMLBranch($genremarc);
$related->addXMLBranch($genre);
}
else if ($row['type'] == "Abstract") {
$record->setTagContent("abstract or summary", "mods/genre");
$record->setTagAttribute("authority", "marcgt", "mods/genre");
}
else if ($row['type'] == "Conference Article") {
$related->setTagContent("conference publication", "relatedItem/genre");
$related->setTagAttribute("authority", "marcgt", "relatedItem/genre");
}
else if ($row['type'] == "Newspaper Article") {
$related->setTagContent("continuing",
"relatedItem/originInfo/issuance");
$related->setTagContent("newspaper", "relatedItem/genre");
$related->setTagAttribute("authority", "marcgt", "relatedItem/genre");
}
else { // if ($row['type'] == "Book Chapter")
$related->setTagContent("monographic",
"relatedItem/originInfo/issuance");
$related->setTagContent("book", "relatedItem/genre");
$related->setTagAttribute("authority", "marcgt", "relatedItem/genre");
}
}
// thesis
else { // if (!empty($row['thesis']))
$thesismarc = new XMLBranch("genre");
$thesis = new XMLBranch("genre");
$thesismarc->setTagContent("thesis");
$thesismarc->setTagAttribute("authority", "marcgt");
// tweak thesis names so that Bibutils will recognize them:
if ($row['thesis'] == "Master's thesis")
$row['thesis'] = "Masters thesis";
$thesis->setTagContent(encodeXMLField('thesis', $row['thesis']));
$related->addXMLBranch($thesismarc);
$related->addXMLBranch($thesis);
}
if ((!empty($row['year'])) || (!empty($row['volume'])) ||
(!empty($row['issue'])) || (!empty($row['pages']))) {
$part = new XMLBranch("part");
if (!empty($row['year']))
$part->setTagContent(encodeXMLField('year', $row['year']), "date");
if (!empty($row['volume'])) {
$detailvolume = new XMLBranch("detail");
$detailvolume->setTagContent(encodeXMLField('volume', $row['volume']), "detail/number");
$detailvolume->setTagAttribute("type", "volume");
$part->addXMLBranch($detailvolume);
}
if (!empty($row['issue'])) {
$detailnumber = new XMLBranch("detail");
$detailnumber->setTagContent(encodeXMLField('issue', $row['issue']), "detail/number");
$detailnumber->setTagAttribute("type", "issue");
$part->addXMLBranch($detailnumber);
}
if (!empty($row['pages'])) {
if (preg_match("/[0-9] *- *[0-9]/", $row['pages'])) { // if a page range
// split the page range into start and end pages
list($pagestart, $pageend) = preg_split('/\s*[-]\s*/', $row['pages']);
if ($pagestart < $pageend) { // extents MUST span multiple pages
$pages = new XMLBranch("extent");
$pages->setTagContent(encodeXMLField('pages', $pagestart), "extent/start");
$pages->setTagContent(encodeXMLField('pages', $pageend), "extent/end");
$pages->setTagAttribute("unit", "page");
}
else {
$pages = new XMLBranch("detail");
if ($pagestart == $pageend) // single-page item
$pages->setTagContent(encodeXMLField('pages', $pagestart), "detail/number");
else
$pages->setTagContent(encodeXMLField('pages', $row['pages']), "detail/number");
$pages->setTagAttribute("type", "page");
}
}
else {
$pages = new XMLBranch("detail");
$pages->setTagContent(encodeXMLField('pages', $row['pages']), "detail/number");
$pages->setTagAttribute("type", "page");
}
$part->addXMLBranch($pages);
}
$related->addXMLBranch($part);
}
// identifier
// isbn
if (!empty($row['isbn'])) {
$identifier = new XMLBranch("identifier");
$identifier->setTagContent(encodeXMLField('isbn', $row['isbn']));
$identifier->setTagAttribute("type", "isbn");
$related->addXMLBranch($identifier);
}
// issn
if (!empty($row['issn'])) {
$identifier = new XMLBranch("identifier");
$identifier->setTagContent(encodeXMLField('issn', $row['issn']));
$identifier->setTagAttribute("type", "issn");
$related->addXMLBranch($identifier);
}
// series
if ((!empty($row['series_editor'])) || (!empty($row['series_title'])) ||
(!empty($row['abbrev_series_title'])) ||
(!empty($row['series_volume'])) || (!empty($row['series_issue']))) {
$related->addXMLBranch(serialBranch($row['series_editor'],
$row['series_title'],
$row['abbrev_series_title'],
$row['series_volume'],
$row['series_issue']));
}
$record->addXMLBranch($related);
}
// --- END TYPE == ABSTRACT || BOOK CHAPTER || CONFERENCE ARTICLE || JOURNAL ARTICLE || MAGAZINE ARTICLE || NEWSPAPER ARTICLE ---
return $record;
}
// --------------------------------------------------------------------
// Encode special chars, perform charset conversions and apply any
// field-specific search & replace actions:
function encodeXMLField($fieldName, $fieldValue)
{
global $fieldSpecificSearchReplaceActionsArray; // defined in function 'modsCollection()'
// function 'encodeField()' is defined in 'include.inc.php'
$encodedFieldValue = encodeField($fieldName, $fieldValue, $fieldSpecificSearchReplaceActionsArray, array(), true, "XML");
return $encodedFieldValue;
}
?>